From b2ffde5afecd1420c018415bbfbe32b0ea0ff7ea Mon Sep 17 00:00:00 2001 From: Ger Inberg Date: Thu, 10 Oct 2024 11:59:59 +0200 Subject: [PATCH 01/18] doc / refactor --- R/runTemporalCohortCharacterization.R | 78 ++++++++----------- man/getCdmDataSourceInformation.Rd | 4 +- man/runBreakdownIndexEvents.Rd | 15 ++-- man/runIncidenceRate.Rd | 54 ++++++++----- man/runInclusionStatistics.Rd | 31 +------- man/runTimeSeries.Rd | 12 ++- .../test-runTemporalCohortCharacterization.R | 48 +++++++----- 7 files changed, 111 insertions(+), 131 deletions(-) diff --git a/R/runTemporalCohortCharacterization.R b/R/runTemporalCohortCharacterization.R index 1177436ee..c451886e3 100644 --- a/R/runTemporalCohortCharacterization.R +++ b/R/runTemporalCohortCharacterization.R @@ -14,7 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. - +# export characteristics to csv files exportCharacterization <- function(characteristics, databaseId, incremental, @@ -54,11 +54,13 @@ exportCharacterization <- function(characteristics, ) %>% dplyr::select(-"cohortEntries", -"cohortSubjects") %>% dplyr::distinct() %>% - makeDataExportable( + exportDataToCsv( + data = characteristics$filteredCovariates, tableName = "temporal_covariate_value", + fileName = covariateValueFileName, minCellCount = minCellCount, - databaseId = databaseId - ) + databaseId = databaseId, + incremental = TRUE) if (dplyr::pull(dplyr::count(characteristics$filteredCovariates)) > 0) { @@ -88,32 +90,20 @@ exportCharacterization <- function(characteristics, incremental = TRUE, analysisId = timeRef$timeId ) - - writeToCsv( - data = characteristics$filteredCovariates, - fileName = covariateValueFileName, - incremental = TRUE - ) } } if (!"covariatesContinuous" %in% names(characteristics)) { ParallelLogger::logInfo("No continuous characterization output for submitted cohorts") } else if (dplyr::pull(dplyr::count(characteristics$covariateRef)) > 0) { - characteristics$filteredCovariatesContinous <- makeDataExportable( - x = characteristics$covariatesContinuous, + exportDataToCsv( + data = characteristics$covariatesContinuous, tableName = "temporal_covariate_value_dist", + fileName = covariateValueContFileName, minCellCount = minCellCount, - databaseId = databaseId + databaseId = databaseId, + incremental = TRUE ) - - if (dplyr::pull(dplyr::count(characteristics$filteredCovariatesContinous)) > 0) { - writeToCsv( - data = characteristics$filteredCovariatesContinous, - fileName = covariateValueContFileName, - incremental = TRUE - ) - } } } @@ -345,29 +335,29 @@ getCohortCharacteristics <- function(connectionDetails = NULL, #' #' @examples runTemporalCohortCharacterization <- function(connection, - databaseId, - exportFolder, - cdmDatabaseSchema, - cohortDatabaseSchema, - cohortTable, - covariateSettings, - tempEmulationSchema, - cdmVersion, - cohorts, - cohortCounts, - minCellCount, - instantiatedCohorts, - incremental, - recordKeepingFile, - task = "runTemporalCohortCharacterization", - jobName = "Temporal Cohort characterization", - covariateValueFileName = file.path(exportFolder, "temporal_covariate_value.csv"), - covariateValueContFileName = file.path(exportFolder, "temporal_covariate_value_dist.csv"), - covariateRefFileName = file.path(exportFolder, "temporal_covariate_ref.csv"), - analysisRefFileName = file.path(exportFolder, "temporal_analysis_ref.csv"), - timeRefFileName = file.path(exportFolder, "temporal_time_ref.csv"), - minCharacterizationMean = 0.001, - batchSize = getOption("CohortDiagnostics-FE-batch-size", default = 20)) { + databaseId, + exportFolder, + cdmDatabaseSchema, + cohortDatabaseSchema, + cohortTable, + covariateSettings, + tempEmulationSchema, + cdmVersion, + cohorts, + cohortCounts, + minCellCount, + instantiatedCohorts, + incremental, + recordKeepingFile, + task = "runTemporalCohortCharacterization", + jobName = "Temporal Cohort characterization", + covariateValueFileName = file.path(exportFolder, "temporal_covariate_value.csv"), + covariateValueContFileName = file.path(exportFolder, "temporal_covariate_value_dist.csv"), + covariateRefFileName = file.path(exportFolder, "temporal_covariate_ref.csv"), + analysisRefFileName = file.path(exportFolder, "temporal_analysis_ref.csv"), + timeRefFileName = file.path(exportFolder, "temporal_time_ref.csv"), + minCharacterizationMean = 0.001, + batchSize = getOption("CohortDiagnostics-FE-batch-size", default = 20)) { ParallelLogger::logInfo("Running ", jobName) startCohortCharacterization <- Sys.time() subset <- subsetToRequiredCohorts( diff --git a/man/getCdmDataSourceInformation.Rd b/man/getCdmDataSourceInformation.Rd index 572b88288..a4e614edb 100644 --- a/man/getCdmDataSourceInformation.Rd +++ b/man/getCdmDataSourceInformation.Rd @@ -13,9 +13,7 @@ getCdmDataSourceInformation( \arguments{ \item{connection}{An object of type \code{connection} as created using the \code{\link[DatabaseConnector]{connect}} function in the -DatabaseConnector package. Can be left NULL if \code{connectionDetails} -is provided, in which case a new connection will be opened at the start -of the function, and closed when the function finishes.} +DatabaseConnector package.} \item{cdmDatabaseSchema}{Schema name where your patient-level data in OMOP CDM format resides. Note that for SQL Server, this should include both the database and diff --git a/man/runBreakdownIndexEvents.Rd b/man/runBreakdownIndexEvents.Rd index 760a7479d..35c077a25 100644 --- a/man/runBreakdownIndexEvents.Rd +++ b/man/runBreakdownIndexEvents.Rd @@ -11,9 +11,6 @@ runBreakdownIndexEvents( vocabularyDatabaseSchema = cdmDatabaseSchema, databaseId, cohorts, - runIncludedSourceConcepts, - runOrphanConcepts, - runBreakdownIndexEvents, exportFolder, minCellCount, conceptCountsDatabaseSchema = NULL, @@ -42,12 +39,6 @@ runBreakdownIndexEvents( \item{cohorts}{} -\item{runIncludedSourceConcepts}{} - -\item{runOrphanConcepts}{} - -\item{runBreakdownIndexEvents}{} - \item{exportFolder}{} \item{minCellCount}{} @@ -73,6 +64,12 @@ runBreakdownIndexEvents( \item{useAchilles}{} \item{resultsDatabaseSchema}{} + +\item{runIncludedSourceConcepts}{} + +\item{runOrphanConcepts}{} + +\item{runBreakdownIndexEvents}{} } \description{ Title diff --git a/man/runIncidenceRate.Rd b/man/runIncidenceRate.Rd index 4f7e20b01..b457b8c88 100644 --- a/man/runIncidenceRate.Rd +++ b/man/runIncidenceRate.Rd @@ -2,10 +2,11 @@ % Please edit documentation in R/runIncidenceRate.R \name{runIncidenceRate} \alias{runIncidenceRate} -\title{Title} +\title{Run the incidence rate cohort diagnostic} \usage{ runIncidenceRate( connection, + cohortDefinitionSet, tempEmulationSchema, cdmDatabaseSchema, cohortDatabaseSchema, @@ -13,40 +14,51 @@ runIncidenceRate( databaseId, exportFolder, minCellCount, - cohorts, - instantiatedCohorts, - recordKeepingFile, - washoutPeriod, - incremental + washoutPeriod = 0, + incremental, + recordKeepingFile ) } \arguments{ -\item{connection}{} +\item{connection}{An object of type \code{connection} as created using the +\code{\link[DatabaseConnector]{connect}} function in the +DatabaseConnector package.} -\item{tempEmulationSchema}{} +\item{cohortDefinitionSet}{A data.frame with cohort definitions created by +`CohortGenerator::getCohortDefinitionSet` that must include +the columns cohortId, cohortName, json, sql.} -\item{cdmDatabaseSchema}{} +\item{tempEmulationSchema}{Some database platforms like Oracle and Impala do not truly support temp tables. To emulate temp +tables, provide a schema with write privileges where temp tables can be created.} -\item{cohortDatabaseSchema}{} +\item{cdmDatabaseSchema}{Schema name where your patient-level data in OMOP CDM format resides. +Note that for SQL Server, this should include both the database and +schema name, for example 'cdm_data.dbo'.} -\item{cohortTable}{} +\item{cohortDatabaseSchema}{Schema name where your cohort table resides. Note that for SQL Server, +this should include both the database and schema name, for example +'scratch.dbo'.} -\item{databaseId}{} +\item{cohortTable}{Name of the cohort table.} -\item{exportFolder}{} +\item{databaseId}{A short string for identifying the database (e.g. 'Synpuf').} -\item{minCellCount}{} +\item{exportFolder}{The folder where the results will be exported to} -\item{cohorts}{} +\item{minCellCount}{The minimum cell count for fields contains person counts or fractions} -\item{instantiatedCohorts}{} +\item{washoutPeriod}{Then minimum number of required observation days prior to +cohort index to be included in the numerator of the incidence rate} -\item{recordKeepingFile}{} +\item{incremental}{`TRUE` or `FALSE` (default). If TRUE diagnostics for cohorts in the +cohort definition set that have not changed will be skipped and existing results +csv files will be updated. If FALSE then diagnostics for all cohorts in the cohort +definition set will be executed and pre-existing results files will be deleted.} -\item{washoutPeriod}{} - -\item{incremental}{} +\item{incrementalFolder}{If \code{incremental = TRUE}, specify a folder where records are kept +of which cohort diagnostics has been executed.} } \description{ -Title +runIncidenceRate computes incidence rates for cohorts in the CDM population stratified +by age, sex, and calendar year. } diff --git a/man/runInclusionStatistics.Rd b/man/runInclusionStatistics.Rd index 42394f7ec..d7e74094e 100644 --- a/man/runInclusionStatistics.Rd +++ b/man/runInclusionStatistics.Rd @@ -2,11 +2,7 @@ % Please edit documentation in R/runInclusionStatistics.R \name{runInclusionStatistics} \alias{runInclusionStatistics} -<<<<<<< HEAD \title{Runs inclusion statistics on given cohort definitions and exports these.} -======= -\title{Title} ->>>>>>> 4f31d45 (fix unmerged paths. update docs.) \usage{ runInclusionStatistics( connection, @@ -21,12 +17,9 @@ runInclusionStatistics( ) } \arguments{ -<<<<<<< HEAD \item{connection}{An object of type \code{connection} as created using the \code{\link[DatabaseConnector]{connect}} function in the -DatabaseConnector package. Can be left NULL if \code{connectionDetails} -is provided, in which case a new connection will be opened at the start -of the function, and closed when the function finishes.} +DatabaseConnector package.} \item{exportFolder}{The folder where the output will be exported to.} @@ -67,26 +60,4 @@ These are the files written to disk, if available: * cohort_inc_stats.csv * cohort_inclusion.csv * cohort_summary_stats.csv -======= -\item{connection}{} - -\item{exportFolder}{} - -\item{databaseId}{} - -\item{cohortDefinitionSet}{} - -\item{cohortDatabaseSchema}{} - -\item{cohortTableNames}{} - -\item{incremental}{} - -\item{minCellCount}{} - -\item{recordKeepingFile}{} -} -\description{ -Title ->>>>>>> 4f31d45 (fix unmerged paths. update docs.) } diff --git a/man/runTimeSeries.Rd b/man/runTimeSeries.Rd index 25874ed82..4a06cdcb5 100644 --- a/man/runTimeSeries.Rd +++ b/man/runTimeSeries.Rd @@ -26,9 +26,7 @@ runTimeSeries( \arguments{ \item{connection}{An object of type \code{connection} as created using the \code{\link[DatabaseConnector]{connect}} function in the -DatabaseConnector package. Can be left NULL if \code{connectionDetails} -is provided, in which case a new connection will be opened at the start -of the function, and closed when the function finishes.} +DatabaseConnector package.} \item{tempEmulationSchema}{Some database platforms like Oracle and Impala do not truly support temp tables. To emulate temp tables, provide a schema with write privileges where temp tables can be created.} @@ -62,7 +60,10 @@ of the results data model.} \item{instantiatedCohorts}{cohortIds of the cohorts that have been already been instantiated} -\item{incremental}{Create only cohort diagnostics that haven't been created before?} +\item{incremental}{`TRUE` or `FALSE` (default). If TRUE diagnostics for cohorts in the +cohort definition set that have not changed will be skipped and existing results +csv files will be updated. If FALSE then diagnostics for all cohorts in the cohort +definition set will be executed and pre-existing results files will be deleted.} \item{observationPeriodDateRange}{} @@ -71,6 +72,9 @@ of the results data model.} \item{databaseIds}{A vector one or more databaseIds to retrieve the results for. This is a character field values from the `databaseId` field of the `database` table of the results data model.} + +\item{incrementalFolder}{If \code{incremental = TRUE}, specify a folder where records are kept +of which cohort diagnostics has been executed.} } \description{ This function first generates a calendar period table, that has diff --git a/tests/testthat/test-runTemporalCohortCharacterization.R b/tests/testthat/test-runTemporalCohortCharacterization.R index 6511457d1..75a79d27d 100644 --- a/tests/testthat/test-runTemporalCohortCharacterization.R +++ b/tests/testthat/test-runTemporalCohortCharacterization.R @@ -1,7 +1,9 @@ test_that("Execute and export characterization", { skip_if(skipCdmTests, "cdm settings not configured") - tConnection <- - DatabaseConnector::connect(connectionDetails) + skip_if_not("sqlite" %in% names(testServers)) + server <- testServers[["sqlite"]] + + tConnection <- DatabaseConnector::connect(server$connectionDetails) with_dbc_connection(tConnection, { exportFolder <- tempfile() @@ -10,31 +12,36 @@ test_that("Execute and export characterization", { on.exit(unlink(exportFolder), add = TRUE) # Required for function use - cohortCounts <- computeCohortCounts( + cohortCounts <- CohortGenerator::getCohortCounts( connection = tConnection, - cohortDatabaseSchema = cohortDatabaseSchema, - cohortTable = cohortTable, - cohorts = cohortDefinitionSet, - exportFolder = exportFolder, + cohortDatabaseSchema = server$cohortDatabaseSchema, + cohortTable = server$cohortTable, + cohortDefinitionSet = server$cohortDefinitionSet, + databaseId = "Testdb" + ) + exportDataToCsv( + data = cohortCounts, + tableName = "cohort_count", + fileName = file.path(exportFolder, "cohort_count.csv"), minCellCount = 5, databaseId = "Testdb" ) checkmate::expect_file_exists(file.path(exportFolder, "cohort_count.csv")) - executeCohortCharacterization( + runTemporalCohortCharacterization( connection = tConnection, databaseId = "Testdb", exportFolder = exportFolder, - cdmDatabaseSchema = cdmDatabaseSchema, - cohortDatabaseSchema = cohortDatabaseSchema, - cohortTable = cohortTable, + cdmDatabaseSchema = server$cdmDatabaseSchema, + cohortDatabaseSchema = server$cohortDatabaseSchema, + cohortTable = server$cohortTable, covariateSettings = temporalCovariateSettings, - tempEmulationSchema = tempEmulationSchema, + tempEmulationSchema = server$tempEmulationSchema, cdmVersion = 5, cohorts = cohortDefinitionSet[1:3, ], cohortCounts = cohortCounts, minCellCount = 5, - instantiatedCohorts = cohortDefinitionSet$cohortId, + instantiatedCohorts = server$cohortDefinitionSet$cohortId, incremental = TRUE, recordKeepingFile = recordKeepingFile, task = "runTemporalCohortCharacterization", @@ -70,20 +77,20 @@ test_that("Execute and export characterization", { ) # finish the rest of characterization - executeCohortCharacterization( + runTemporalCohortCharacterization( connection = tConnection, databaseId = "Testdb", exportFolder = exportFolder, - cdmDatabaseSchema = cdmDatabaseSchema, - cohortDatabaseSchema = cohortDatabaseSchema, - cohortTable = cohortTable, + cdmDatabaseSchema = server$cdmDatabaseSchema, + cohortDatabaseSchema = server$cohortDatabaseSchema, + cohortTable = server$cohortTable, covariateSettings = temporalCovariateSettings, - tempEmulationSchema = tempEmulationSchema, + tempEmulationSchema = server$tempEmulationSchema, cdmVersion = 5, - cohorts = cohortDefinitionSet, + cohorts = server$cohortDefinitionSet, cohortCounts = cohortCounts, minCellCount = 5, - instantiatedCohorts = cohortDefinitionSet$cohortId, + instantiatedCohorts = server$cohortDefinitionSet$cohortId, incremental = TRUE, recordKeepingFile = recordKeepingFile, task = "runTemporalCohortCharacterization", @@ -92,6 +99,7 @@ test_that("Execute and export characterization", { ) # Check no time ids are NA/NULL + readr::local_edition(1) tdata <- readr::read_csv(file.path(exportFolder, "temporal_covariate_value_dist.csv")) expect_false(any(is.na(tdata$time_id) | is.null(tdata$time_id))) From 74efb71b1c1138dc2336c58fd36a199d4b16c9ae Mon Sep 17 00:00:00 2001 From: Ger Inberg Date: Thu, 10 Oct 2024 13:38:35 +0200 Subject: [PATCH 02/18] disable other tests for now --- tests/testthat/test-Incremental.R | 688 +++++------ tests/testthat/test-ResultsDataModel.R | 466 ++++---- tests/testthat/test-externalConceptCounts.R | 140 +-- tests/testthat/test-runCohortRelationship.R | 854 +++++++------- tests/testthat/test-runIncidenceRate.R | 126 +- tests/testthat/test-runInclusionStatistics.R | 152 +-- tests/testthat/test-runResolvedConceptSets.R | 90 +- tests/testthat/test-runTimeSeries.R | 674 +++++------ tests/testthat/test-runVisitContext.R | 1102 +++++++++--------- tests/testthat/test-utils.R | 432 +++---- 10 files changed, 2362 insertions(+), 2362 deletions(-) diff --git a/tests/testthat/test-Incremental.R b/tests/testthat/test-Incremental.R index 7397b7494..db6b0ad9e 100644 --- a/tests/testthat/test-Incremental.R +++ b/tests/testthat/test-Incremental.R @@ -1,344 +1,344 @@ -library(testthat) - -test_that("Record keeping of single type tasks", { - rkf <- tempfile() - - sql1 <- "SELECT * FROM my_table WHERE x = 1;" - checksum1 <- CohortDiagnostics:::computeChecksum(sql1) - expect_true( - CohortDiagnostics:::isTaskRequired( - cohortId = 1, - runSql = TRUE, - checksum = checksum1, - recordKeepingFile = rkf - ) - ) - - CohortDiagnostics:::recordTasksDone( - cohortId = 1, - runSql = TRUE, - checksum = checksum1, - recordKeepingFile = rkf - ) - - - expect_false( - CohortDiagnostics:::isTaskRequired( - cohortId = 1, - runSql = TRUE, - checksum = checksum1, - recordKeepingFile = rkf - ) - ) - - sql2 <- "SELECT * FROM my_table WHERE x = 2;" - checksum2 <- CohortDiagnostics:::computeChecksum(sql2) - expect_true( - CohortDiagnostics:::isTaskRequired( - cohortId = 2, - runSql = TRUE, - checksum = checksum2, - recordKeepingFile = rkf - ) - ) - - CohortDiagnostics:::recordTasksDone( - cohortId = 2, - runSql = TRUE, - checksum = checksum2, - recordKeepingFile = rkf - ) - - sql1a <- "SELECT * FROM my_table WHERE x = 1 AND y = 2;" - checksum1a <- CohortDiagnostics:::computeChecksum(sql1a) - expect_true( - CohortDiagnostics:::isTaskRequired( - cohortId = 1, - runSql = TRUE, - checksum = checksum1a, - recordKeepingFile = rkf - ) - ) - - CohortDiagnostics:::recordTasksDone( - cohortId = 1, - runSql = TRUE, - checksum = checksum1a, - recordKeepingFile = rkf - ) - - expect_false( - CohortDiagnostics:::isTaskRequired( - cohortId = 1, - runSql = TRUE, - checksum = checksum1a, - recordKeepingFile = rkf - ) - ) - - unlink(rkf) -}) - -test_that("Record keeping of multiple type tasks", { - rkf <- tempfile() - - sql1 <- "SELECT * FROM my_table WHERE x = 1;" - checksum1 <- CohortDiagnostics:::computeChecksum(sql1) - expect_true( - CohortDiagnostics:::isTaskRequired( - cohortId = 1, - task = "Run SQL", - checksum = checksum1, - recordKeepingFile = rkf - ) - ) - - CohortDiagnostics:::recordTasksDone( - cohortId = 1, - task = "Run SQL", - checksum = checksum1, - recordKeepingFile = rkf - ) - - - expect_false( - CohortDiagnostics:::isTaskRequired( - cohortId = 1, - task = "Run SQL", - checksum = checksum1, - recordKeepingFile = rkf - ) - ) - - sql2 <- "SELECT * FROM my_table WHERE x = 1 AND y = 1;" - checksum2 <- CohortDiagnostics:::computeChecksum(sql2) - expect_true( - CohortDiagnostics:::isTaskRequired( - cohortId = 1, - comparatorId = 2, - task = "Compare cohorts", - checksum = checksum2, - recordKeepingFile = rkf - ) - ) - - CohortDiagnostics:::recordTasksDone( - cohortId = 1, - comparatorId = 2, - task = "Compare cohorts", - checksum = checksum2, - recordKeepingFile = rkf - ) - - expect_false( - CohortDiagnostics:::isTaskRequired( - cohortId = 1, - task = "Run SQL", - checksum = checksum1, - recordKeepingFile = rkf - ) - ) - - - sql2a <- "SELECT * FROM my_table WHERE x = 1 AND y = 2 AND z = 3;" - checksum2a <- CohortDiagnostics:::computeChecksum(sql2a) - expect_true( - CohortDiagnostics:::isTaskRequired( - cohortId = 1, - comparatorId = 2, - task = "Compare cohorts", - checksum = checksum2a, - recordKeepingFile = rkf - ) - ) - - CohortDiagnostics:::recordTasksDone( - cohortId = 1, - comparatorId = 2, - task = "Compare cohorts", - checksum = checksum2a, - recordKeepingFile = rkf - ) - - expect_false( - CohortDiagnostics:::isTaskRequired( - cohortId = 1, - comparatorId = 2, - task = "Compare cohorts", - checksum = checksum2a, - recordKeepingFile = rkf - ) - ) - - unlink(rkf) -}) - -test_that("Record keeping of multiple tasks at once", { - rkf <- tempfile() - - task <- dplyr::tibble( - cohortId = c(1, 2), - sql = c( - "SELECT * FROM my_table WHERE x = 1;", - "SELECT * FROM my_table WHERE x = 2;" - ) - ) - task$checksum <- CohortDiagnostics:::computeChecksum(task$sql) - expect_true( - CohortDiagnostics:::isTaskRequired( - cohortId = task$cohortId[1], - checksum = task$checksum[1], - recordKeepingFile = rkf - ) - ) - - CohortDiagnostics:::recordTasksDone( - cohortId = task$cohortId, - checksum = task$checksum, - recordKeepingFile = rkf - ) - - - expect_false( - CohortDiagnostics:::isTaskRequired( - cohortId = task$cohortId[1], - checksum = task$checksum[1], - recordKeepingFile = rkf - ) - ) - - expect_false( - CohortDiagnostics:::isTaskRequired( - cohortId = task$cohortId[2], - checksum = task$checksum[2], - recordKeepingFile = rkf - ) - ) - - - task <- dplyr::tibble( - cohortId = c(1, 2, 3), - sql = c( - "SELECT * FROM my_table WHERE x = 3;", - "SELECT * FROM my_table WHERE x = 4;", - "SELECT * FROM my_table WHERE x = 5;" - ) - ) - task$checksum <- CohortDiagnostics:::computeChecksum(task$sql) - - expect_true( - CohortDiagnostics:::isTaskRequired( - cohortId = task$cohortId[1], - checksum = task$checksum[1], - recordKeepingFile = rkf - ) - ) - - tasks <- - CohortDiagnostics:::getRequiredTasks( - cohortId = task$cohortId, - checksum = task$checksum, - recordKeepingFile = rkf - ) - expect_equal(nrow(tasks), 3) - - CohortDiagnostics:::recordTasksDone( - cohortId = task$cohortId, - checksum = task$checksum, - recordKeepingFile = rkf - ) - - expect_false( - CohortDiagnostics:::isTaskRequired( - cohortId = task$cohortId[1], - checksum = task$checksum[1], - recordKeepingFile = rkf - ) - ) - - expect_false( - CohortDiagnostics:::isTaskRequired( - cohortId = task$cohortId[2], - checksum = task$checksum[2], - recordKeepingFile = rkf - ) - ) - - expect_false( - CohortDiagnostics:::isTaskRequired( - cohortId = task$cohortId[3], - checksum = task$checksum[3], - recordKeepingFile = rkf - ) - ) - - tasks <- - CohortDiagnostics:::getRequiredTasks( - cohortId = task$cohortId[2], - checksum = task$checksum[2], - recordKeepingFile = rkf - ) - expect_equal(nrow(tasks), 0) - - unlink(rkf) -}) - - -test_that("Incremental save", { - tmpFile <- tempfile() - data <- dplyr::tibble( - cohortId = c(1, 1, 2, 2, 3), - count = c(100, 200, 300, 400, 500) - ) - CohortDiagnostics:::saveIncremental(data, tmpFile, cohortId = c(1, 2, 3)) - - newData <- dplyr::tibble( - cohortId = c(1, 2, 2), - count = c(600, 700, 800) - ) - - CohortDiagnostics:::saveIncremental(newData, tmpFile, cohortId = c(1, 2)) - - - - goldStandard <- dplyr::tibble( - cohortId = c(3, 1, 2, 2), - count = c(500, 600, 700, 800) - ) - - - expect_equal( - readr::read_csv( - tmpFile, - col_types = readr::cols(), - guess_max = min(1e7) - ), - goldStandard, - ignore_attr = TRUE - ) - unlink(tmpFile) -}) - -test_that("Incremental save with empty key", { - tmpFile <- tempfile() - data <- dplyr::tibble( - cohortId = c(1, 1, 2, 2, 3), - count = c(100, 200, 300, 400, 500) - ) - CohortDiagnostics:::saveIncremental(data, tmpFile, cohortId = c(1, 2, 3)) - - newData <- dplyr::tibble() - - CohortDiagnostics:::saveIncremental(newData, tmpFile, cohortId = c()) - - expect_equal( - readr::read_csv( - tmpFile, - col_types = readr::cols(), - guess_max = min(1e7) - ), - data, - ignore_attr = TRUE - ) - unlink(tmpFile) -}) +# library(testthat) +# +# test_that("Record keeping of single type tasks", { +# rkf <- tempfile() +# +# sql1 <- "SELECT * FROM my_table WHERE x = 1;" +# checksum1 <- CohortDiagnostics:::computeChecksum(sql1) +# expect_true( +# CohortDiagnostics:::isTaskRequired( +# cohortId = 1, +# runSql = TRUE, +# checksum = checksum1, +# recordKeepingFile = rkf +# ) +# ) +# +# CohortDiagnostics:::recordTasksDone( +# cohortId = 1, +# runSql = TRUE, +# checksum = checksum1, +# recordKeepingFile = rkf +# ) +# +# +# expect_false( +# CohortDiagnostics:::isTaskRequired( +# cohortId = 1, +# runSql = TRUE, +# checksum = checksum1, +# recordKeepingFile = rkf +# ) +# ) +# +# sql2 <- "SELECT * FROM my_table WHERE x = 2;" +# checksum2 <- CohortDiagnostics:::computeChecksum(sql2) +# expect_true( +# CohortDiagnostics:::isTaskRequired( +# cohortId = 2, +# runSql = TRUE, +# checksum = checksum2, +# recordKeepingFile = rkf +# ) +# ) +# +# CohortDiagnostics:::recordTasksDone( +# cohortId = 2, +# runSql = TRUE, +# checksum = checksum2, +# recordKeepingFile = rkf +# ) +# +# sql1a <- "SELECT * FROM my_table WHERE x = 1 AND y = 2;" +# checksum1a <- CohortDiagnostics:::computeChecksum(sql1a) +# expect_true( +# CohortDiagnostics:::isTaskRequired( +# cohortId = 1, +# runSql = TRUE, +# checksum = checksum1a, +# recordKeepingFile = rkf +# ) +# ) +# +# CohortDiagnostics:::recordTasksDone( +# cohortId = 1, +# runSql = TRUE, +# checksum = checksum1a, +# recordKeepingFile = rkf +# ) +# +# expect_false( +# CohortDiagnostics:::isTaskRequired( +# cohortId = 1, +# runSql = TRUE, +# checksum = checksum1a, +# recordKeepingFile = rkf +# ) +# ) +# +# unlink(rkf) +# }) +# +# test_that("Record keeping of multiple type tasks", { +# rkf <- tempfile() +# +# sql1 <- "SELECT * FROM my_table WHERE x = 1;" +# checksum1 <- CohortDiagnostics:::computeChecksum(sql1) +# expect_true( +# CohortDiagnostics:::isTaskRequired( +# cohortId = 1, +# task = "Run SQL", +# checksum = checksum1, +# recordKeepingFile = rkf +# ) +# ) +# +# CohortDiagnostics:::recordTasksDone( +# cohortId = 1, +# task = "Run SQL", +# checksum = checksum1, +# recordKeepingFile = rkf +# ) +# +# +# expect_false( +# CohortDiagnostics:::isTaskRequired( +# cohortId = 1, +# task = "Run SQL", +# checksum = checksum1, +# recordKeepingFile = rkf +# ) +# ) +# +# sql2 <- "SELECT * FROM my_table WHERE x = 1 AND y = 1;" +# checksum2 <- CohortDiagnostics:::computeChecksum(sql2) +# expect_true( +# CohortDiagnostics:::isTaskRequired( +# cohortId = 1, +# comparatorId = 2, +# task = "Compare cohorts", +# checksum = checksum2, +# recordKeepingFile = rkf +# ) +# ) +# +# CohortDiagnostics:::recordTasksDone( +# cohortId = 1, +# comparatorId = 2, +# task = "Compare cohorts", +# checksum = checksum2, +# recordKeepingFile = rkf +# ) +# +# expect_false( +# CohortDiagnostics:::isTaskRequired( +# cohortId = 1, +# task = "Run SQL", +# checksum = checksum1, +# recordKeepingFile = rkf +# ) +# ) +# +# +# sql2a <- "SELECT * FROM my_table WHERE x = 1 AND y = 2 AND z = 3;" +# checksum2a <- CohortDiagnostics:::computeChecksum(sql2a) +# expect_true( +# CohortDiagnostics:::isTaskRequired( +# cohortId = 1, +# comparatorId = 2, +# task = "Compare cohorts", +# checksum = checksum2a, +# recordKeepingFile = rkf +# ) +# ) +# +# CohortDiagnostics:::recordTasksDone( +# cohortId = 1, +# comparatorId = 2, +# task = "Compare cohorts", +# checksum = checksum2a, +# recordKeepingFile = rkf +# ) +# +# expect_false( +# CohortDiagnostics:::isTaskRequired( +# cohortId = 1, +# comparatorId = 2, +# task = "Compare cohorts", +# checksum = checksum2a, +# recordKeepingFile = rkf +# ) +# ) +# +# unlink(rkf) +# }) +# +# test_that("Record keeping of multiple tasks at once", { +# rkf <- tempfile() +# +# task <- dplyr::tibble( +# cohortId = c(1, 2), +# sql = c( +# "SELECT * FROM my_table WHERE x = 1;", +# "SELECT * FROM my_table WHERE x = 2;" +# ) +# ) +# task$checksum <- CohortDiagnostics:::computeChecksum(task$sql) +# expect_true( +# CohortDiagnostics:::isTaskRequired( +# cohortId = task$cohortId[1], +# checksum = task$checksum[1], +# recordKeepingFile = rkf +# ) +# ) +# +# CohortDiagnostics:::recordTasksDone( +# cohortId = task$cohortId, +# checksum = task$checksum, +# recordKeepingFile = rkf +# ) +# +# +# expect_false( +# CohortDiagnostics:::isTaskRequired( +# cohortId = task$cohortId[1], +# checksum = task$checksum[1], +# recordKeepingFile = rkf +# ) +# ) +# +# expect_false( +# CohortDiagnostics:::isTaskRequired( +# cohortId = task$cohortId[2], +# checksum = task$checksum[2], +# recordKeepingFile = rkf +# ) +# ) +# +# +# task <- dplyr::tibble( +# cohortId = c(1, 2, 3), +# sql = c( +# "SELECT * FROM my_table WHERE x = 3;", +# "SELECT * FROM my_table WHERE x = 4;", +# "SELECT * FROM my_table WHERE x = 5;" +# ) +# ) +# task$checksum <- CohortDiagnostics:::computeChecksum(task$sql) +# +# expect_true( +# CohortDiagnostics:::isTaskRequired( +# cohortId = task$cohortId[1], +# checksum = task$checksum[1], +# recordKeepingFile = rkf +# ) +# ) +# +# tasks <- +# CohortDiagnostics:::getRequiredTasks( +# cohortId = task$cohortId, +# checksum = task$checksum, +# recordKeepingFile = rkf +# ) +# expect_equal(nrow(tasks), 3) +# +# CohortDiagnostics:::recordTasksDone( +# cohortId = task$cohortId, +# checksum = task$checksum, +# recordKeepingFile = rkf +# ) +# +# expect_false( +# CohortDiagnostics:::isTaskRequired( +# cohortId = task$cohortId[1], +# checksum = task$checksum[1], +# recordKeepingFile = rkf +# ) +# ) +# +# expect_false( +# CohortDiagnostics:::isTaskRequired( +# cohortId = task$cohortId[2], +# checksum = task$checksum[2], +# recordKeepingFile = rkf +# ) +# ) +# +# expect_false( +# CohortDiagnostics:::isTaskRequired( +# cohortId = task$cohortId[3], +# checksum = task$checksum[3], +# recordKeepingFile = rkf +# ) +# ) +# +# tasks <- +# CohortDiagnostics:::getRequiredTasks( +# cohortId = task$cohortId[2], +# checksum = task$checksum[2], +# recordKeepingFile = rkf +# ) +# expect_equal(nrow(tasks), 0) +# +# unlink(rkf) +# }) +# +# +# test_that("Incremental save", { +# tmpFile <- tempfile() +# data <- dplyr::tibble( +# cohortId = c(1, 1, 2, 2, 3), +# count = c(100, 200, 300, 400, 500) +# ) +# CohortDiagnostics:::saveIncremental(data, tmpFile, cohortId = c(1, 2, 3)) +# +# newData <- dplyr::tibble( +# cohortId = c(1, 2, 2), +# count = c(600, 700, 800) +# ) +# +# CohortDiagnostics:::saveIncremental(newData, tmpFile, cohortId = c(1, 2)) +# +# +# +# goldStandard <- dplyr::tibble( +# cohortId = c(3, 1, 2, 2), +# count = c(500, 600, 700, 800) +# ) +# +# +# expect_equal( +# readr::read_csv( +# tmpFile, +# col_types = readr::cols(), +# guess_max = min(1e7) +# ), +# goldStandard, +# ignore_attr = TRUE +# ) +# unlink(tmpFile) +# }) +# +# test_that("Incremental save with empty key", { +# tmpFile <- tempfile() +# data <- dplyr::tibble( +# cohortId = c(1, 1, 2, 2, 3), +# count = c(100, 200, 300, 400, 500) +# ) +# CohortDiagnostics:::saveIncremental(data, tmpFile, cohortId = c(1, 2, 3)) +# +# newData <- dplyr::tibble() +# +# CohortDiagnostics:::saveIncremental(newData, tmpFile, cohortId = c()) +# +# expect_equal( +# readr::read_csv( +# tmpFile, +# col_types = readr::cols(), +# guess_max = min(1e7) +# ), +# data, +# ignore_attr = TRUE +# ) +# unlink(tmpFile) +# }) diff --git a/tests/testthat/test-ResultsDataModel.R b/tests/testthat/test-ResultsDataModel.R index c827a70f3..76777d803 100644 --- a/tests/testthat/test-ResultsDataModel.R +++ b/tests/testthat/test-ResultsDataModel.R @@ -1,233 +1,233 @@ -skipResultsDm <- FALSE -if (Sys.getenv("CDM5_POSTGRESQL_SERVER") == "" || Sys.getenv("SKIP_DB_TESTS") == "TRUE") { - skipResultsDm <- TRUE -} else { - postgresConnectionDetails <- DatabaseConnector::createConnectionDetails( - dbms = "postgresql", - user = Sys.getenv("CDM5_POSTGRESQL_USER"), - password = URLdecode(Sys.getenv("CDM5_POSTGRESQL_PASSWORD")), - server = Sys.getenv("CDM5_POSTGRESQL_SERVER"), - pathToDriver = jdbcDriverFolder - ) - - resultsDatabaseSchema <- paste0("r", Sys.getpid(), format(Sys.time(), "%s"), sample(1:100, 1)) - - # Always clean up - withr::defer( - { - pgConnection <- DatabaseConnector::connect(connectionDetails = postgresConnectionDetails) - sql <- "DROP SCHEMA IF EXISTS @resultsDatabaseSchema CASCADE;" - DatabaseConnector::renderTranslateExecuteSql( - sql = sql, - resultsDatabaseSchema = resultsDatabaseSchema, - connection = pgConnection - ) - - DatabaseConnector::disconnect(pgConnection) - unlink(folder, recursive = TRUE, force = TRUE) - }, - testthat::teardown_env() - ) -} - -test_that("Create schema", { - skip_if(skipResultsDm | skipCdmTests, "results data model test server not set") - pgConnection <- DatabaseConnector::connect(connectionDetails = postgresConnectionDetails) - with_dbc_connection(pgConnection, { - sql <- "CREATE SCHEMA @resultsDatabaseSchema;" - DatabaseConnector::renderTranslateExecuteSql( - sql = sql, - resultsDatabaseSchema = resultsDatabaseSchema, - connection = pgConnection - ) - createResultsDataModel( - connectionDetails = postgresConnectionDetails, - databaseSchema = resultsDatabaseSchema, - tablePrefix = "cd_" - ) - - specifications <- getResultsDataModelSpecifications() - - for (tableName in unique(specifications$tableName)) { - expect_true(.pgTableExists(pgConnection, resultsDatabaseSchema, paste0("cd_", tableName))) - } - # Bad schema name - expect_error(createResultsDataModel( - connectionDetails = postgresConnectionDetails, - databaseSchema = "non_existant_schema" - )) - }) -}) - -test_that("Results upload", { - skip_if(skipResultsDm | skipCdmTests, "results data model test server not set") - if (dbms == "sqlite") { - # Checks to see if adding extra OMOP vocab, unexpectedly breaks things - connection <- DatabaseConnector::connect(connectionDetails) - with_dbc_connection(connection, { - DatabaseConnector::renderTranslateExecuteSql(connection, " -INSERT INTO main.vocabulary -(VOCABULARY_ID, VOCABULARY_NAME, VOCABULARY_REFERENCE, VOCABULARY_VERSION, VOCABULARY_CONCEPT_ID) VALUES -('None','OMOP Standardized Vocabularies','OMOP generated','v5.5 17-FEB-22',44819096); - -INSERT INTO CDM_SOURCE -(CDM_SOURCE_NAME,CDM_SOURCE_ABBREVIATION,CDM_HOLDER,SOURCE_DESCRIPTION,SOURCE_DOCUMENTATION_REFERENCE,CDM_ETL_REFERENCE,SOURCE_RELEASE_DATE,CDM_RELEASE_DATE,CDM_VERSION,VOCABULARY_VERSION) -VALUES ('Synthea','Synthea','OHDSI Community','SyntheaTM is a Synthetic Patient Population Simulator.','https://synthetichealth.github.io/synthea/','https://github.com/OHDSI/ETL-Synthea',1558742400,1558742400,'v5.4','v5.0 22-JAN-22');") - - # Check to see if non-standard extra columns are handled - DatabaseConnector::renderTranslateExecuteSql( - connection, - "ALTER TABLE VOCABULARY ADD TEST_COLUMN varchar(255) DEFAULT 'foo';" - ) - }) - } - - if (dbms == "sqlite") { - expect_warning( - { - executeDiagnostics( - connectionDetails = connectionDetails, - cdmDatabaseSchema = cdmDatabaseSchema, - vocabularyDatabaseSchema = vocabularyDatabaseSchema, - tempEmulationSchema = tempEmulationSchema, - cohortDatabaseSchema = cohortDatabaseSchema, - cohortTableNames = cohortTableNames, - cohortIds = cohortIds, - cohortDefinitionSet = cohortDefinitionSet, - exportFolder = file.path(folder, "export"), - databaseId = dbms, - runInclusionStatistics = TRUE, - runBreakdownIndexEvents = TRUE, - runTemporalCohortCharacterization = TRUE, - runIncidenceRate = TRUE, - runIncludedSourceConcepts = TRUE, - runOrphanConcepts = TRUE, - incremental = TRUE, - incrementalFolder = file.path(folder, "incremental"), - temporalCovariateSettings = temporalCovariateSettings, - runFeatureExtractionOnSample = TRUE - ) - }, - "CDM Source table has more than one record while only one is expected." - ) - } else { - executeDiagnostics( - connectionDetails = connectionDetails, - cdmDatabaseSchema = cdmDatabaseSchema, - vocabularyDatabaseSchema = vocabularyDatabaseSchema, - tempEmulationSchema = tempEmulationSchema, - cohortDatabaseSchema = cohortDatabaseSchema, - cohortTableNames = cohortTableNames, - cohortIds = cohortIds, - cohortDefinitionSet = cohortDefinitionSet, - exportFolder = file.path(folder, "export"), - databaseId = dbms, - runInclusionStatistics = TRUE, - runBreakdownIndexEvents = TRUE, - runTemporalCohortCharacterization = TRUE, - runIncidenceRate = TRUE, - runIncludedSourceConcepts = TRUE, - runOrphanConcepts = TRUE, - incremental = TRUE, - incrementalFolder = file.path(folder, "incremental"), - temporalCovariateSettings = temporalCovariateSettings, - runFeatureExtractionOnSample = TRUE - ) - } - - listOfZipFilesToUpload <- - list.files( - path = file.path(folder, "export"), - pattern = ".zip", - full.names = TRUE, - recursive = TRUE - ) - - for (i in (1:length(listOfZipFilesToUpload))) { - uploadResults( - connectionDetails = postgresConnectionDetails, - schema = resultsDatabaseSchema, - zipFileName = listOfZipFilesToUpload[[i]], - tablePrefix = "cd_" - ) - } - - specifications <- getResultsDataModelSpecifications() - pgConnection <- DatabaseConnector::connect(connectionDetails = postgresConnectionDetails) - with_dbc_connection(pgConnection, { - for (tableName in unique(specifications$tableName)) { - primaryKey <- specifications %>% - dplyr::filter(tableName == !!tableName & - primaryKey == "Yes") %>% - dplyr::select("columnName") %>% - dplyr::pull() - - if ("database_id" %in% primaryKey) { - sql <- - "SELECT COUNT(*) FROM @schema.@table_name WHERE database_id = '@database_id';" - sql <- SqlRender::render( - sql = sql, - schema = resultsDatabaseSchema, - table_name = paste0("cd_", tableName), - database_id = "cdmv5" - ) - databaseIdCount <- DatabaseConnector::querySql(pgConnection, sql)[, 1] - expect_true(databaseIdCount >= 0) - } - } - }) -}) - -test_that("Sqlite results data model", { - skip_if(skipResultsDm) - dbFile <- tempfile(fileext = ".sqlite") - createMergedResultsFile(dataFolder = file.path(folder, "export"), sqliteDbPath = dbFile, overwrite = TRUE, tablePrefix = "cd_") - connectionDetailsSqlite <- DatabaseConnector::createConnectionDetails(dbms = "sqlite", server = dbFile) - connectionSqlite <- DatabaseConnector::connect(connectionDetails = connectionDetailsSqlite) - with_dbc_connection(connectionSqlite, { - # Bad schema name - expect_error(createResultsDataModel( - connectionDetails = connectionDetailsSqlite, - databaseSchema = "non_existant_schema" - )) - - specifications <- getResultsDataModelSpecifications() - for (tableName in unique(specifications$tableName)) { - primaryKey <- specifications %>% - dplyr::filter(tableName == !!tableName & - primaryKey == "Yes") %>% - dplyr::select("columnName") %>% - dplyr::pull() - - if ("database_id" %in% primaryKey) { - sql <- - "SELECT COUNT(*) FROM @schema.@table_name WHERE database_id = '@database_id';" - sql <- SqlRender::render( - sql = sql, - schema = "main", - table_name = paste0("cd_", tableName), - database_id = "cdmv5" - ) - databaseIdCount <- DatabaseConnector::querySql(connectionSqlite, sql)[, 1] - expect_true(databaseIdCount >= 0) - } - } - }) -}) - - -test_that("getResultsDataModelSpecifications works", { - spec <- getResultsDataModelSpecifications() - expectedColumnNames <- c("tableName", "columnName", "dataType", "isRequired", "primaryKey", - "optional", "emptyIsNa", "minCellCount", "isVocabularyTable", - "neverIncremental", "description") - expect_true(is.data.frame(spec)) - expect_named(spec, expectedColumnNames) - - expect_equal(length(unique(spec$tableName)), 30) - spec <- getResultsDataModelSpecifications("cohort") - expect_equal(length(unique(spec$tableName)), 1) - - expect_error(getResultsDataModelSpecifications(c("cohort", "time_series"))) - expect_error(getResultsDataModelSpecifications(1)) - }) +# skipResultsDm <- FALSE +# if (Sys.getenv("CDM5_POSTGRESQL_SERVER") == "" || Sys.getenv("SKIP_DB_TESTS") == "TRUE") { +# skipResultsDm <- TRUE +# } else { +# postgresConnectionDetails <- DatabaseConnector::createConnectionDetails( +# dbms = "postgresql", +# user = Sys.getenv("CDM5_POSTGRESQL_USER"), +# password = URLdecode(Sys.getenv("CDM5_POSTGRESQL_PASSWORD")), +# server = Sys.getenv("CDM5_POSTGRESQL_SERVER"), +# pathToDriver = jdbcDriverFolder +# ) +# +# resultsDatabaseSchema <- paste0("r", Sys.getpid(), format(Sys.time(), "%s"), sample(1:100, 1)) +# +# # Always clean up +# withr::defer( +# { +# pgConnection <- DatabaseConnector::connect(connectionDetails = postgresConnectionDetails) +# sql <- "DROP SCHEMA IF EXISTS @resultsDatabaseSchema CASCADE;" +# DatabaseConnector::renderTranslateExecuteSql( +# sql = sql, +# resultsDatabaseSchema = resultsDatabaseSchema, +# connection = pgConnection +# ) +# +# DatabaseConnector::disconnect(pgConnection) +# unlink(folder, recursive = TRUE, force = TRUE) +# }, +# testthat::teardown_env() +# ) +# } +# +# test_that("Create schema", { +# skip_if(skipResultsDm | skipCdmTests, "results data model test server not set") +# pgConnection <- DatabaseConnector::connect(connectionDetails = postgresConnectionDetails) +# with_dbc_connection(pgConnection, { +# sql <- "CREATE SCHEMA @resultsDatabaseSchema;" +# DatabaseConnector::renderTranslateExecuteSql( +# sql = sql, +# resultsDatabaseSchema = resultsDatabaseSchema, +# connection = pgConnection +# ) +# createResultsDataModel( +# connectionDetails = postgresConnectionDetails, +# databaseSchema = resultsDatabaseSchema, +# tablePrefix = "cd_" +# ) +# +# specifications <- getResultsDataModelSpecifications() +# +# for (tableName in unique(specifications$tableName)) { +# expect_true(.pgTableExists(pgConnection, resultsDatabaseSchema, paste0("cd_", tableName))) +# } +# # Bad schema name +# expect_error(createResultsDataModel( +# connectionDetails = postgresConnectionDetails, +# databaseSchema = "non_existant_schema" +# )) +# }) +# }) +# +# test_that("Results upload", { +# skip_if(skipResultsDm | skipCdmTests, "results data model test server not set") +# if (dbms == "sqlite") { +# # Checks to see if adding extra OMOP vocab, unexpectedly breaks things +# connection <- DatabaseConnector::connect(connectionDetails) +# with_dbc_connection(connection, { +# DatabaseConnector::renderTranslateExecuteSql(connection, " +# INSERT INTO main.vocabulary +# (VOCABULARY_ID, VOCABULARY_NAME, VOCABULARY_REFERENCE, VOCABULARY_VERSION, VOCABULARY_CONCEPT_ID) VALUES +# ('None','OMOP Standardized Vocabularies','OMOP generated','v5.5 17-FEB-22',44819096); +# +# INSERT INTO CDM_SOURCE +# (CDM_SOURCE_NAME,CDM_SOURCE_ABBREVIATION,CDM_HOLDER,SOURCE_DESCRIPTION,SOURCE_DOCUMENTATION_REFERENCE,CDM_ETL_REFERENCE,SOURCE_RELEASE_DATE,CDM_RELEASE_DATE,CDM_VERSION,VOCABULARY_VERSION) +# VALUES ('Synthea','Synthea','OHDSI Community','SyntheaTM is a Synthetic Patient Population Simulator.','https://synthetichealth.github.io/synthea/','https://github.com/OHDSI/ETL-Synthea',1558742400,1558742400,'v5.4','v5.0 22-JAN-22');") +# +# # Check to see if non-standard extra columns are handled +# DatabaseConnector::renderTranslateExecuteSql( +# connection, +# "ALTER TABLE VOCABULARY ADD TEST_COLUMN varchar(255) DEFAULT 'foo';" +# ) +# }) +# } +# +# if (dbms == "sqlite") { +# expect_warning( +# { +# executeDiagnostics( +# connectionDetails = connectionDetails, +# cdmDatabaseSchema = cdmDatabaseSchema, +# vocabularyDatabaseSchema = vocabularyDatabaseSchema, +# tempEmulationSchema = tempEmulationSchema, +# cohortDatabaseSchema = cohortDatabaseSchema, +# cohortTableNames = cohortTableNames, +# cohortIds = cohortIds, +# cohortDefinitionSet = cohortDefinitionSet, +# exportFolder = file.path(folder, "export"), +# databaseId = dbms, +# runInclusionStatistics = TRUE, +# runBreakdownIndexEvents = TRUE, +# runTemporalCohortCharacterization = TRUE, +# runIncidenceRate = TRUE, +# runIncludedSourceConcepts = TRUE, +# runOrphanConcepts = TRUE, +# incremental = TRUE, +# incrementalFolder = file.path(folder, "incremental"), +# temporalCovariateSettings = temporalCovariateSettings, +# runFeatureExtractionOnSample = TRUE +# ) +# }, +# "CDM Source table has more than one record while only one is expected." +# ) +# } else { +# executeDiagnostics( +# connectionDetails = connectionDetails, +# cdmDatabaseSchema = cdmDatabaseSchema, +# vocabularyDatabaseSchema = vocabularyDatabaseSchema, +# tempEmulationSchema = tempEmulationSchema, +# cohortDatabaseSchema = cohortDatabaseSchema, +# cohortTableNames = cohortTableNames, +# cohortIds = cohortIds, +# cohortDefinitionSet = cohortDefinitionSet, +# exportFolder = file.path(folder, "export"), +# databaseId = dbms, +# runInclusionStatistics = TRUE, +# runBreakdownIndexEvents = TRUE, +# runTemporalCohortCharacterization = TRUE, +# runIncidenceRate = TRUE, +# runIncludedSourceConcepts = TRUE, +# runOrphanConcepts = TRUE, +# incremental = TRUE, +# incrementalFolder = file.path(folder, "incremental"), +# temporalCovariateSettings = temporalCovariateSettings, +# runFeatureExtractionOnSample = TRUE +# ) +# } +# +# listOfZipFilesToUpload <- +# list.files( +# path = file.path(folder, "export"), +# pattern = ".zip", +# full.names = TRUE, +# recursive = TRUE +# ) +# +# for (i in (1:length(listOfZipFilesToUpload))) { +# uploadResults( +# connectionDetails = postgresConnectionDetails, +# schema = resultsDatabaseSchema, +# zipFileName = listOfZipFilesToUpload[[i]], +# tablePrefix = "cd_" +# ) +# } +# +# specifications <- getResultsDataModelSpecifications() +# pgConnection <- DatabaseConnector::connect(connectionDetails = postgresConnectionDetails) +# with_dbc_connection(pgConnection, { +# for (tableName in unique(specifications$tableName)) { +# primaryKey <- specifications %>% +# dplyr::filter(tableName == !!tableName & +# primaryKey == "Yes") %>% +# dplyr::select("columnName") %>% +# dplyr::pull() +# +# if ("database_id" %in% primaryKey) { +# sql <- +# "SELECT COUNT(*) FROM @schema.@table_name WHERE database_id = '@database_id';" +# sql <- SqlRender::render( +# sql = sql, +# schema = resultsDatabaseSchema, +# table_name = paste0("cd_", tableName), +# database_id = "cdmv5" +# ) +# databaseIdCount <- DatabaseConnector::querySql(pgConnection, sql)[, 1] +# expect_true(databaseIdCount >= 0) +# } +# } +# }) +# }) +# +# test_that("Sqlite results data model", { +# skip_if(skipResultsDm) +# dbFile <- tempfile(fileext = ".sqlite") +# createMergedResultsFile(dataFolder = file.path(folder, "export"), sqliteDbPath = dbFile, overwrite = TRUE, tablePrefix = "cd_") +# connectionDetailsSqlite <- DatabaseConnector::createConnectionDetails(dbms = "sqlite", server = dbFile) +# connectionSqlite <- DatabaseConnector::connect(connectionDetails = connectionDetailsSqlite) +# with_dbc_connection(connectionSqlite, { +# # Bad schema name +# expect_error(createResultsDataModel( +# connectionDetails = connectionDetailsSqlite, +# databaseSchema = "non_existant_schema" +# )) +# +# specifications <- getResultsDataModelSpecifications() +# for (tableName in unique(specifications$tableName)) { +# primaryKey <- specifications %>% +# dplyr::filter(tableName == !!tableName & +# primaryKey == "Yes") %>% +# dplyr::select("columnName") %>% +# dplyr::pull() +# +# if ("database_id" %in% primaryKey) { +# sql <- +# "SELECT COUNT(*) FROM @schema.@table_name WHERE database_id = '@database_id';" +# sql <- SqlRender::render( +# sql = sql, +# schema = "main", +# table_name = paste0("cd_", tableName), +# database_id = "cdmv5" +# ) +# databaseIdCount <- DatabaseConnector::querySql(connectionSqlite, sql)[, 1] +# expect_true(databaseIdCount >= 0) +# } +# } +# }) +# }) +# +# +# test_that("getResultsDataModelSpecifications works", { +# spec <- getResultsDataModelSpecifications() +# expectedColumnNames <- c("tableName", "columnName", "dataType", "isRequired", "primaryKey", +# "optional", "emptyIsNa", "minCellCount", "isVocabularyTable", +# "neverIncremental", "description") +# expect_true(is.data.frame(spec)) +# expect_named(spec, expectedColumnNames) +# +# expect_equal(length(unique(spec$tableName)), 30) +# spec <- getResultsDataModelSpecifications("cohort") +# expect_equal(length(unique(spec$tableName)), 1) +# +# expect_error(getResultsDataModelSpecifications(c("cohort", "time_series"))) +# expect_error(getResultsDataModelSpecifications(1)) +# }) diff --git a/tests/testthat/test-externalConceptCounts.R b/tests/testthat/test-externalConceptCounts.R index d9e22a51a..4fb9ce216 100644 --- a/tests/testthat/test-externalConceptCounts.R +++ b/tests/testthat/test-externalConceptCounts.R @@ -1,70 +1,70 @@ -test_that("Creating and checking externalConceptCounts table", { - if (dbmsToTest == "sqlite") { - connectionDetails <- testServers[["sqlite"]]$connectionDetails - connection <- connect(connectionDetails) - cdmDatabaseSchema <- testServers[["sqlite"]]$cdmDatabaseSchema - conceptCountsTable <- "concept_counts" - CohortDiagnostics::createConceptCountsTable(connectionDetails = connectionDetails, - cdmDatabaseSchema = cdmDatabaseSchema, - tempEmulationSchema = NULL, - conceptCountsTable = "concept_counts", - conceptCountsDatabaseSchema = cdmDatabaseSchema, - conceptCountsTableIsTemp = FALSE, - removeCurrentTable = TRUE) - - concept_counts_info <- querySql(connection, "PRAGMA table_info(concept_counts)") - expect_equal(concept_counts_info$NAME, c("concept_id", - "concept_count", - "concept_subjects", - "vocabulary_version")) - checkConceptCountsTableExists <- DatabaseConnector::dbExistsTable(connection, - name = conceptCountsTable, - databaseSchema = cdmDatabaseSchema) - expect_true(checkConceptCountsTableExists) - - # Checking vocab version matches - useExternalConceptCountsTable <- TRUE - conceptCountsTable <- "concept_counts" - conceptCountsTable <- conceptCountsTable - dataSourceInfo <- getCdmDataSourceInformation(connection = connection, cdmDatabaseSchema = cdmDatabaseSchema) - vocabVersion <- dataSourceInfo$vocabularyVersion - vocabVersionExternalConceptCountsTable <- renderTranslateQuerySql( - connection = connection, - sql = "SELECT DISTINCT vocabulary_version FROM @work_database_schema.@concept_counts_table;", - work_database_schema = cdmDatabaseSchema, - concept_counts_table = conceptCountsTable, - snakeCaseToCamelCase = TRUE, - tempEmulationSchema = getOption("sqlRenderTempEmulationSchena") - ) - - expect_equal(vocabVersion, vocabVersionExternalConceptCountsTable[1,1]) - } - -}) - -test_that("Creating and checking externalConceptCounts temp table", { - if (dbmsToTest == "sqlite") { - # Creating externalConceptCounts - # sql_lite_path <- file.path(test_path(), databaseFile) - connectionDetails <- testServers[["sqlite"]]$connectionDetails - connection <- connect(connectionDetails) - cdmDatabaseSchema <- testServers[["sqlite"]]$cdmDatabaseSchema - conceptCountsTable <- "concept_counts" - CohortDiagnostics::createConceptCountsTable(connectionDetails = connectionDetails, - cdmDatabaseSchema = cdmDatabaseSchema, - tempEmulationSchema = NULL, - conceptCountsTable = conceptCountsTable, - conceptCountsDatabaseSchema = cdmDatabaseSchema, - conceptCountsTableIsTemp = TRUE, - removeCurrentTable = TRUE) - - concept_counts_info <- querySql(connection, "PRAGMA table_info(concept_counts)") - expect_equal(concept_counts_info$NAME, c("concept_id", - "concept_count", - "concept_subjects")) - checkConceptCountsTableExists <- DatabaseConnector::dbExistsTable(connection, - name = conceptCountsTable, - databaseSchema = cdmDatabaseSchema) - expect_true(checkConceptCountsTableExists) - } -}) +# test_that("Creating and checking externalConceptCounts table", { +# if (dbmsToTest == "sqlite") { +# connectionDetails <- testServers[["sqlite"]]$connectionDetails +# connection <- connect(connectionDetails) +# cdmDatabaseSchema <- testServers[["sqlite"]]$cdmDatabaseSchema +# conceptCountsTable <- "concept_counts" +# CohortDiagnostics::createConceptCountsTable(connectionDetails = connectionDetails, +# cdmDatabaseSchema = cdmDatabaseSchema, +# tempEmulationSchema = NULL, +# conceptCountsTable = "concept_counts", +# conceptCountsDatabaseSchema = cdmDatabaseSchema, +# conceptCountsTableIsTemp = FALSE, +# removeCurrentTable = TRUE) +# +# concept_counts_info <- querySql(connection, "PRAGMA table_info(concept_counts)") +# expect_equal(concept_counts_info$NAME, c("concept_id", +# "concept_count", +# "concept_subjects", +# "vocabulary_version")) +# checkConceptCountsTableExists <- DatabaseConnector::dbExistsTable(connection, +# name = conceptCountsTable, +# databaseSchema = cdmDatabaseSchema) +# expect_true(checkConceptCountsTableExists) +# +# # Checking vocab version matches +# useExternalConceptCountsTable <- TRUE +# conceptCountsTable <- "concept_counts" +# conceptCountsTable <- conceptCountsTable +# dataSourceInfo <- getCdmDataSourceInformation(connection = connection, cdmDatabaseSchema = cdmDatabaseSchema) +# vocabVersion <- dataSourceInfo$vocabularyVersion +# vocabVersionExternalConceptCountsTable <- renderTranslateQuerySql( +# connection = connection, +# sql = "SELECT DISTINCT vocabulary_version FROM @work_database_schema.@concept_counts_table;", +# work_database_schema = cdmDatabaseSchema, +# concept_counts_table = conceptCountsTable, +# snakeCaseToCamelCase = TRUE, +# tempEmulationSchema = getOption("sqlRenderTempEmulationSchena") +# ) +# +# expect_equal(vocabVersion, vocabVersionExternalConceptCountsTable[1,1]) +# } +# +# }) +# +# test_that("Creating and checking externalConceptCounts temp table", { +# if (dbmsToTest == "sqlite") { +# # Creating externalConceptCounts +# # sql_lite_path <- file.path(test_path(), databaseFile) +# connectionDetails <- testServers[["sqlite"]]$connectionDetails +# connection <- connect(connectionDetails) +# cdmDatabaseSchema <- testServers[["sqlite"]]$cdmDatabaseSchema +# conceptCountsTable <- "concept_counts" +# CohortDiagnostics::createConceptCountsTable(connectionDetails = connectionDetails, +# cdmDatabaseSchema = cdmDatabaseSchema, +# tempEmulationSchema = NULL, +# conceptCountsTable = conceptCountsTable, +# conceptCountsDatabaseSchema = cdmDatabaseSchema, +# conceptCountsTableIsTemp = TRUE, +# removeCurrentTable = TRUE) +# +# concept_counts_info <- querySql(connection, "PRAGMA table_info(concept_counts)") +# expect_equal(concept_counts_info$NAME, c("concept_id", +# "concept_count", +# "concept_subjects")) +# checkConceptCountsTableExists <- DatabaseConnector::dbExistsTable(connection, +# name = conceptCountsTable, +# databaseSchema = cdmDatabaseSchema) +# expect_true(checkConceptCountsTableExists) +# } +# }) diff --git a/tests/testthat/test-runCohortRelationship.R b/tests/testthat/test-runCohortRelationship.R index bc442b0c2..5acd7475d 100644 --- a/tests/testthat/test-runCohortRelationship.R +++ b/tests/testthat/test-runCohortRelationship.R @@ -1,427 +1,427 @@ -test_that("Testing executeCohortRelationshipDiagnostics", { - skip_if(skipCdmTests, "cdm settings not configured") - - # manually create cohort table and load to table - # for the logic to work - there has to be some overlap of the comparator cohort over target cohort - # note - we will not be testing offset in this test. it is expected to work as it is a simple substraction - - temporalStartDays <- c(0) - temporalEndDays <- c(0) - - targetCohort <- dplyr::tibble( - cohortDefinitionId = c(1), - subjectId = c(1), - cohortStartDate = c(as.Date("1900-01-15")), - cohortEndDate = c(as.Date("1900-01-31")) - ) # target cohort always one row - - comparatorCohort <- # all records here overlap with targetCohort - dplyr::tibble( - cohortDefinitionId = c(10, 10, 10), - subjectId = c(1, 1, 1), - cohortStartDate = c( - as.Date("1900-01-01"), - # starts before target cohort start - as.Date("1900-01-22"), - # starts during target cohort period and ends during target cohort period - as.Date("1900-01-31") - ), - cohortEndDate = c( - as.Date("1900-01-20"), - as.Date("1900-01-29"), - as.Date("1900-01-31") - ) - ) - - cohort <- dplyr::bind_rows( - targetCohort, - comparatorCohort, - targetCohort %>% - dplyr::mutate(cohortDefinitionId = 2), - comparatorCohort %>% - dplyr::mutate(cohortDefinitionId = 20) - ) - - connectionCohortRelationship <- - DatabaseConnector::connect(connectionDetails) - - # to do - with incremental = FALSE - with_dbc_connection(connectionCohortRelationship, { - sysTime <- as.numeric(Sys.time()) * 100000 - tableName <- paste0("cr", sysTime) - observationTableName <- paste0("op", sysTime) - - DatabaseConnector::insertTable( - connection = connectionCohortRelationship, - databaseSchema = cohortDatabaseSchema, - tableName = tableName, - data = cohort, - dropTableIfExists = TRUE, - createTable = TRUE, - tempTable = FALSE, - camelCaseToSnakeCase = TRUE, - progressBar = FALSE - ) - - cohortDefinitionSet <- - cohort %>% - dplyr::select(cohortDefinitionId) %>% - dplyr::distinct() %>% - dplyr::rename("cohortId" = "cohortDefinitionId") %>% - dplyr::rowwise() %>% - dplyr::mutate(json = RJSONIO::toJSON(list( - cohortId = cohortId, - randomString = c( - sample(x = LETTERS, 5, replace = TRUE), - sample(x = LETTERS, 4, replace = TRUE), - sample(LETTERS, 1, replace = TRUE) - ) - ))) %>% - dplyr::ungroup() %>% - dplyr::mutate( - sql = json, - checksum = CohortDiagnostics:::computeChecksum(json) - ) - - - exportFolder <- tempdir() - exportFile <- tempfile() - - unlink( - x = exportFolder, - recursive = TRUE, - force = TRUE - ) - dir.create( - path = exportFolder, - showWarnings = FALSE, - recursive = TRUE - ) - - CohortDiagnostics:::executeCohortRelationshipDiagnostics( - connection = connectionCohortRelationship, - databaseId = "testDataSourceName", - exportFolder = exportFolder, - cohortDatabaseSchema = cohortDatabaseSchema, - cdmDatabaseSchema = cdmDatabaseSchema, - cohortTable = tableName, - tempEmulationSchema = NULL, - cohortDefinitionSet = cohortDefinitionSet %>% - dplyr::filter(cohortId %in% c(1, 10)), - temporalCovariateSettings = list( - temporalStartDays = c(-365, -30), - temporalEndDays = c(-31, -1) - ), - minCellCount = 0, - recordKeepingFile = paste0(exportFile, "recordKeeping"), - incremental = TRUE, - batchSize = 2 - ) - - recordKeepingFileData <- - readr::read_csv( - file = paste0(exportFile, "recordKeeping"), - col_types = readr::cols() - ) - - # testing if check sum if written to field called targetChecksum - testthat::expect_true("targetChecksum" %in% colnames(recordKeepingFileData)) - testthat::expect_true("comparatorChecksum" %in% colnames(recordKeepingFileData)) - testthat::expect_true("checksum" %in% colnames(recordKeepingFileData)) - - testthat::expect_equal( - object = recordKeepingFileData %>% - dplyr::filter(cohortId == 1) %>% - dplyr::filter(comparatorId == 10) %>% - dplyr::select(checksum) %>% - dplyr::pull(checksum), - expected = recordKeepingFileData %>% - dplyr::filter(cohortId == 1) %>% - dplyr::filter(comparatorId == 10) %>% - dplyr::mutate( - checksum2 = paste0( - targetChecksum, - comparatorChecksum - ) - ) %>% - dplyr::pull(checksum2) - ) - - - - ## testing if subset works - allCohortIds <- cohortDefinitionSet %>% - dplyr::filter(cohortId %in% c(1, 10, 2)) %>% - dplyr::select(cohortId, checksum) %>% - dplyr::rename( - targetCohortId = cohortId, - targetChecksum = checksum - ) %>% - dplyr::distinct() - - combinationsOfPossibleCohortRelationships <- allCohortIds %>% - tidyr::crossing( - allCohortIds %>% - dplyr::rename( - comparatorCohortId = targetCohortId, - comparatorChecksum = targetChecksum - ) - ) %>% - dplyr::filter(targetCohortId != comparatorCohortId) %>% - dplyr::arrange(targetCohortId, comparatorCohortId) %>% - dplyr::mutate(checksum = paste0(targetChecksum, comparatorChecksum)) - - subset <- CohortDiagnostics:::subsetToRequiredCombis( - combis = combinationsOfPossibleCohortRelationships, - task = "runCohortRelationship", - incremental = TRUE, - recordKeepingFile = paste0(exportFile, "recordKeeping") - ) %>% dplyr::tibble() - - ### subset should not have the combinations in record keeping file - shouldBeDfOfZeroRows <- subset %>% - dplyr::inner_join( - recordKeepingFileData %>% - dplyr::select( - "cohortId", - "comparatorId" - ) %>% - dplyr::distinct() %>% - dplyr::rename( - targetCohortId = "cohortId", - comparatorCohortId = "comparatorId" - ), - by = c("targetCohortId", "comparatorCohortId") - ) - - testthat::expect_equal( - object = nrow(shouldBeDfOfZeroRows), - expected = 0, - info = "Looks like subset and record keeping file did not match." - ) - - - ## running again by adding cohort 2, to previously run 1 and 10 - CohortDiagnostics:::executeCohortRelationshipDiagnostics( - connection = connectionCohortRelationship, - databaseId = "testDataSourceName", - exportFolder = exportFolder, - cohortDatabaseSchema = cohortDatabaseSchema, - cdmDatabaseSchema = cdmDatabaseSchema, - cohortTable = tableName, - tempEmulationSchema = NULL, - cohortDefinitionSet = cohortDefinitionSet %>% - dplyr::filter(cohortId %in% c(1, 10, 2)), - temporalCovariateSettings = list( - temporalStartDays = c(-365, -30), - temporalEndDays = c(-31, -1) - ), - minCellCount = 0, - recordKeepingFile = paste0(exportFile, "recordKeeping"), - incremental = TRUE, - batchSize = 2 - ) - - recordKeepingFileData2 <- - readr::read_csv( - file = paste0(exportFile, "recordKeeping"), - col_types = readr::cols() - ) - # record keeping file should have 6 combinations - for 3 cohorts - testthat::expect_equal( - object = nrow(recordKeepingFileData2), - expected = 3 * 2 * 1 - ) - - # record keeping file should have 4 additional combinations - testthat::expect_equal( - object = recordKeepingFileData2 %>% - dplyr::anti_join( - recordKeepingFileData %>% - dplyr::select( - cohortId, - comparatorId - ), - by = c("cohortId", "comparatorId") - ) %>% - nrow(), - expected = 4 - ) - - - # check what happens for an unrelated cohort combination - allCohortIds <- cohortDefinitionSet %>% - dplyr::filter(cohortId %in% c(2, 20)) %>% - dplyr::select(cohortId, checksum) %>% - dplyr::rename( - targetCohortId = cohortId, - targetChecksum = checksum - ) %>% - dplyr::distinct() - - combinationsOfPossibleCohortRelationships <- allCohortIds %>% - tidyr::crossing( - allCohortIds %>% - dplyr::rename( - comparatorCohortId = targetCohortId, - comparatorChecksum = targetChecksum - ) - ) %>% - dplyr::filter(targetCohortId != comparatorCohortId) %>% - dplyr::arrange(targetCohortId, comparatorCohortId) %>% - dplyr::mutate(checksum = paste0(targetChecksum, comparatorChecksum)) - - subset <- CohortDiagnostics:::subsetToRequiredCombis( - combis = combinationsOfPossibleCohortRelationships, - task = "runCohortRelationship", - incremental = TRUE, - recordKeepingFile = paste0(exportFile, "recordKeeping") - ) %>% dplyr::tibble() - - ### subset should be two rows in subsets that are not in record keeping file - shouldBeTwoRows <- subset %>% - dplyr::anti_join( - recordKeepingFileData2 %>% - dplyr::select( - "cohortId", - "comparatorId" - ) %>% - dplyr::rename( - targetCohortId = cohortId, - comparatorCohortId = comparatorId - ), - by = c("targetCohortId", "comparatorCohortId") - ) - - testthat::expect_equal( - object = nrow(shouldBeTwoRows), - expected = 2, - info = "Looks like subset and record keeping file did not match, Two new cohorts should have run." - ) - }) -}) - - - - - -test_that("Testing cohort relationship logic - incremental FALSE", { - skip_if(skipCdmTests, "cdm settings not configured") - - # manually create cohort table and load to table - # for the logic to work - there has to be some overlap of the comparator cohort over target cohort - # note - we will not be testing offset in this test. it is expected to work as it is a simple substraction - - temporalStartDays <- c(0) - temporalEndDays <- c(0) - - targetCohort <- dplyr::tibble( - cohortDefinitionId = c(1), - subjectId = c(1), - cohortStartDate = c(as.Date("1900-01-15")), - cohortEndDate = c(as.Date("1900-01-31")) - ) # target cohort always one row - - comparatorCohort <- # all records here overlap with targetCohort - dplyr::tibble( - cohortDefinitionId = c(10, 10, 10), - subjectId = c(1, 1, 1), - cohortStartDate = c( - as.Date("1900-01-01"), # starts before target cohort start - as.Date("1900-01-22"), # starts during target cohort period and ends during target cohort period - as.Date("1900-01-31") - ), - cohortEndDate = c( - as.Date("1900-01-20"), - as.Date("1900-01-29"), - as.Date("1900-01-31") - ) - ) - - cohort <- dplyr::bind_rows(targetCohort, comparatorCohort) - - connectionCohortRelationship <- - DatabaseConnector::connect(connectionDetails) - - # to do - with incremental = FALSE - with_dbc_connection(connectionCohortRelationship, { - sysTime <- as.numeric(Sys.time()) * 100000 - tableName <- paste0("cr", sysTime) - observationTableName <- paste0("op", sysTime) - - DatabaseConnector::insertTable( - connection = connectionCohortRelationship, - databaseSchema = cohortDatabaseSchema, - tableName = tableName, - data = cohort, - dropTableIfExists = TRUE, - createTable = TRUE, - tempTable = FALSE, - camelCaseToSnakeCase = TRUE, - progressBar = FALSE - ) - - cohortRelationship <- runCohortRelationshipDiagnostics( - connection = connectionCohortRelationship, - cohortDatabaseSchema = cohortDatabaseSchema, - cohortTable = tableName, - targetCohortIds = c(1), - comparatorCohortIds = c(10), - relationshipDays = dplyr::tibble( - startDay = temporalStartDays, - endDay = temporalEndDays - ) - ) - - sqlDrop <- - "IF OBJECT_ID('@cohort_database_schema.@cohort_relationship_cohort_table', 'U') IS NOT NULL - DROP TABLE @cohort_database_schema.@cohort_relationship_cohort_table;" - DatabaseConnector::renderTranslateExecuteSql( - connection = connectionCohortRelationship, - sql = sqlDrop, - cohort_database_schema = cohortDatabaseSchema, - cohort_relationship_cohort_table = tableName, - profile = FALSE, - progressBar = FALSE - ) - - cohortRelationshipT1C10 <- cohortRelationship %>% - dplyr::filter(cohortId == 1) %>% - dplyr::filter(comparatorCohortId == 10) - - testthat::expect_equal( - object = cohortRelationshipT1C10$subCsBeforeTs, - expected = 1 - ) # there is one subject in comparator that starts before target - - testthat::expect_equal( - object = cohortRelationshipT1C10$subCsBeforeTe, - expected = 1 - ) # there is one subject in comparator that starts before target end - - testthat::expect_equal( - object = cohortRelationshipT1C10$subCsAfterTs, - expected = 1 - ) # there is one subject in comparator that starts after target start - - testthat::expect_equal( - object = cohortRelationshipT1C10$subCsAfterTs, - expected = 1 - ) # there is one subject in comparator that starts after target start - - testthat::expect_equal( - object = cohortRelationshipT1C10$subCsOnTe, - expected = 1 - ) # there is one subject in comparator that starts on target end - - testthat::expect_equal( - object = cohortRelationshipT1C10$subCsWindowT, - expected = 1 - ) # there is one subject in comparator that started within the window of Target cohort - - testthat::expect_equal( - object = cohortRelationshipT1C10$subCeWindowT, - expected = 1 - ) # there is one subject in comparator that ended within the window of Target cohort - }) -}) +# test_that("Testing executeCohortRelationshipDiagnostics", { +# skip_if(skipCdmTests, "cdm settings not configured") +# +# # manually create cohort table and load to table +# # for the logic to work - there has to be some overlap of the comparator cohort over target cohort +# # note - we will not be testing offset in this test. it is expected to work as it is a simple substraction +# +# temporalStartDays <- c(0) +# temporalEndDays <- c(0) +# +# targetCohort <- dplyr::tibble( +# cohortDefinitionId = c(1), +# subjectId = c(1), +# cohortStartDate = c(as.Date("1900-01-15")), +# cohortEndDate = c(as.Date("1900-01-31")) +# ) # target cohort always one row +# +# comparatorCohort <- # all records here overlap with targetCohort +# dplyr::tibble( +# cohortDefinitionId = c(10, 10, 10), +# subjectId = c(1, 1, 1), +# cohortStartDate = c( +# as.Date("1900-01-01"), +# # starts before target cohort start +# as.Date("1900-01-22"), +# # starts during target cohort period and ends during target cohort period +# as.Date("1900-01-31") +# ), +# cohortEndDate = c( +# as.Date("1900-01-20"), +# as.Date("1900-01-29"), +# as.Date("1900-01-31") +# ) +# ) +# +# cohort <- dplyr::bind_rows( +# targetCohort, +# comparatorCohort, +# targetCohort %>% +# dplyr::mutate(cohortDefinitionId = 2), +# comparatorCohort %>% +# dplyr::mutate(cohortDefinitionId = 20) +# ) +# +# connectionCohortRelationship <- +# DatabaseConnector::connect(connectionDetails) +# +# # to do - with incremental = FALSE +# with_dbc_connection(connectionCohortRelationship, { +# sysTime <- as.numeric(Sys.time()) * 100000 +# tableName <- paste0("cr", sysTime) +# observationTableName <- paste0("op", sysTime) +# +# DatabaseConnector::insertTable( +# connection = connectionCohortRelationship, +# databaseSchema = cohortDatabaseSchema, +# tableName = tableName, +# data = cohort, +# dropTableIfExists = TRUE, +# createTable = TRUE, +# tempTable = FALSE, +# camelCaseToSnakeCase = TRUE, +# progressBar = FALSE +# ) +# +# cohortDefinitionSet <- +# cohort %>% +# dplyr::select(cohortDefinitionId) %>% +# dplyr::distinct() %>% +# dplyr::rename("cohortId" = "cohortDefinitionId") %>% +# dplyr::rowwise() %>% +# dplyr::mutate(json = RJSONIO::toJSON(list( +# cohortId = cohortId, +# randomString = c( +# sample(x = LETTERS, 5, replace = TRUE), +# sample(x = LETTERS, 4, replace = TRUE), +# sample(LETTERS, 1, replace = TRUE) +# ) +# ))) %>% +# dplyr::ungroup() %>% +# dplyr::mutate( +# sql = json, +# checksum = CohortDiagnostics:::computeChecksum(json) +# ) +# +# +# exportFolder <- tempdir() +# exportFile <- tempfile() +# +# unlink( +# x = exportFolder, +# recursive = TRUE, +# force = TRUE +# ) +# dir.create( +# path = exportFolder, +# showWarnings = FALSE, +# recursive = TRUE +# ) +# +# CohortDiagnostics:::executeCohortRelationshipDiagnostics( +# connection = connectionCohortRelationship, +# databaseId = "testDataSourceName", +# exportFolder = exportFolder, +# cohortDatabaseSchema = cohortDatabaseSchema, +# cdmDatabaseSchema = cdmDatabaseSchema, +# cohortTable = tableName, +# tempEmulationSchema = NULL, +# cohortDefinitionSet = cohortDefinitionSet %>% +# dplyr::filter(cohortId %in% c(1, 10)), +# temporalCovariateSettings = list( +# temporalStartDays = c(-365, -30), +# temporalEndDays = c(-31, -1) +# ), +# minCellCount = 0, +# recordKeepingFile = paste0(exportFile, "recordKeeping"), +# incremental = TRUE, +# batchSize = 2 +# ) +# +# recordKeepingFileData <- +# readr::read_csv( +# file = paste0(exportFile, "recordKeeping"), +# col_types = readr::cols() +# ) +# +# # testing if check sum if written to field called targetChecksum +# testthat::expect_true("targetChecksum" %in% colnames(recordKeepingFileData)) +# testthat::expect_true("comparatorChecksum" %in% colnames(recordKeepingFileData)) +# testthat::expect_true("checksum" %in% colnames(recordKeepingFileData)) +# +# testthat::expect_equal( +# object = recordKeepingFileData %>% +# dplyr::filter(cohortId == 1) %>% +# dplyr::filter(comparatorId == 10) %>% +# dplyr::select(checksum) %>% +# dplyr::pull(checksum), +# expected = recordKeepingFileData %>% +# dplyr::filter(cohortId == 1) %>% +# dplyr::filter(comparatorId == 10) %>% +# dplyr::mutate( +# checksum2 = paste0( +# targetChecksum, +# comparatorChecksum +# ) +# ) %>% +# dplyr::pull(checksum2) +# ) +# +# +# +# ## testing if subset works +# allCohortIds <- cohortDefinitionSet %>% +# dplyr::filter(cohortId %in% c(1, 10, 2)) %>% +# dplyr::select(cohortId, checksum) %>% +# dplyr::rename( +# targetCohortId = cohortId, +# targetChecksum = checksum +# ) %>% +# dplyr::distinct() +# +# combinationsOfPossibleCohortRelationships <- allCohortIds %>% +# tidyr::crossing( +# allCohortIds %>% +# dplyr::rename( +# comparatorCohortId = targetCohortId, +# comparatorChecksum = targetChecksum +# ) +# ) %>% +# dplyr::filter(targetCohortId != comparatorCohortId) %>% +# dplyr::arrange(targetCohortId, comparatorCohortId) %>% +# dplyr::mutate(checksum = paste0(targetChecksum, comparatorChecksum)) +# +# subset <- CohortDiagnostics:::subsetToRequiredCombis( +# combis = combinationsOfPossibleCohortRelationships, +# task = "runCohortRelationship", +# incremental = TRUE, +# recordKeepingFile = paste0(exportFile, "recordKeeping") +# ) %>% dplyr::tibble() +# +# ### subset should not have the combinations in record keeping file +# shouldBeDfOfZeroRows <- subset %>% +# dplyr::inner_join( +# recordKeepingFileData %>% +# dplyr::select( +# "cohortId", +# "comparatorId" +# ) %>% +# dplyr::distinct() %>% +# dplyr::rename( +# targetCohortId = "cohortId", +# comparatorCohortId = "comparatorId" +# ), +# by = c("targetCohortId", "comparatorCohortId") +# ) +# +# testthat::expect_equal( +# object = nrow(shouldBeDfOfZeroRows), +# expected = 0, +# info = "Looks like subset and record keeping file did not match." +# ) +# +# +# ## running again by adding cohort 2, to previously run 1 and 10 +# CohortDiagnostics:::executeCohortRelationshipDiagnostics( +# connection = connectionCohortRelationship, +# databaseId = "testDataSourceName", +# exportFolder = exportFolder, +# cohortDatabaseSchema = cohortDatabaseSchema, +# cdmDatabaseSchema = cdmDatabaseSchema, +# cohortTable = tableName, +# tempEmulationSchema = NULL, +# cohortDefinitionSet = cohortDefinitionSet %>% +# dplyr::filter(cohortId %in% c(1, 10, 2)), +# temporalCovariateSettings = list( +# temporalStartDays = c(-365, -30), +# temporalEndDays = c(-31, -1) +# ), +# minCellCount = 0, +# recordKeepingFile = paste0(exportFile, "recordKeeping"), +# incremental = TRUE, +# batchSize = 2 +# ) +# +# recordKeepingFileData2 <- +# readr::read_csv( +# file = paste0(exportFile, "recordKeeping"), +# col_types = readr::cols() +# ) +# # record keeping file should have 6 combinations - for 3 cohorts +# testthat::expect_equal( +# object = nrow(recordKeepingFileData2), +# expected = 3 * 2 * 1 +# ) +# +# # record keeping file should have 4 additional combinations +# testthat::expect_equal( +# object = recordKeepingFileData2 %>% +# dplyr::anti_join( +# recordKeepingFileData %>% +# dplyr::select( +# cohortId, +# comparatorId +# ), +# by = c("cohortId", "comparatorId") +# ) %>% +# nrow(), +# expected = 4 +# ) +# +# +# # check what happens for an unrelated cohort combination +# allCohortIds <- cohortDefinitionSet %>% +# dplyr::filter(cohortId %in% c(2, 20)) %>% +# dplyr::select(cohortId, checksum) %>% +# dplyr::rename( +# targetCohortId = cohortId, +# targetChecksum = checksum +# ) %>% +# dplyr::distinct() +# +# combinationsOfPossibleCohortRelationships <- allCohortIds %>% +# tidyr::crossing( +# allCohortIds %>% +# dplyr::rename( +# comparatorCohortId = targetCohortId, +# comparatorChecksum = targetChecksum +# ) +# ) %>% +# dplyr::filter(targetCohortId != comparatorCohortId) %>% +# dplyr::arrange(targetCohortId, comparatorCohortId) %>% +# dplyr::mutate(checksum = paste0(targetChecksum, comparatorChecksum)) +# +# subset <- CohortDiagnostics:::subsetToRequiredCombis( +# combis = combinationsOfPossibleCohortRelationships, +# task = "runCohortRelationship", +# incremental = TRUE, +# recordKeepingFile = paste0(exportFile, "recordKeeping") +# ) %>% dplyr::tibble() +# +# ### subset should be two rows in subsets that are not in record keeping file +# shouldBeTwoRows <- subset %>% +# dplyr::anti_join( +# recordKeepingFileData2 %>% +# dplyr::select( +# "cohortId", +# "comparatorId" +# ) %>% +# dplyr::rename( +# targetCohortId = cohortId, +# comparatorCohortId = comparatorId +# ), +# by = c("targetCohortId", "comparatorCohortId") +# ) +# +# testthat::expect_equal( +# object = nrow(shouldBeTwoRows), +# expected = 2, +# info = "Looks like subset and record keeping file did not match, Two new cohorts should have run." +# ) +# }) +# }) +# +# +# +# +# +# test_that("Testing cohort relationship logic - incremental FALSE", { +# skip_if(skipCdmTests, "cdm settings not configured") +# +# # manually create cohort table and load to table +# # for the logic to work - there has to be some overlap of the comparator cohort over target cohort +# # note - we will not be testing offset in this test. it is expected to work as it is a simple substraction +# +# temporalStartDays <- c(0) +# temporalEndDays <- c(0) +# +# targetCohort <- dplyr::tibble( +# cohortDefinitionId = c(1), +# subjectId = c(1), +# cohortStartDate = c(as.Date("1900-01-15")), +# cohortEndDate = c(as.Date("1900-01-31")) +# ) # target cohort always one row +# +# comparatorCohort <- # all records here overlap with targetCohort +# dplyr::tibble( +# cohortDefinitionId = c(10, 10, 10), +# subjectId = c(1, 1, 1), +# cohortStartDate = c( +# as.Date("1900-01-01"), # starts before target cohort start +# as.Date("1900-01-22"), # starts during target cohort period and ends during target cohort period +# as.Date("1900-01-31") +# ), +# cohortEndDate = c( +# as.Date("1900-01-20"), +# as.Date("1900-01-29"), +# as.Date("1900-01-31") +# ) +# ) +# +# cohort <- dplyr::bind_rows(targetCohort, comparatorCohort) +# +# connectionCohortRelationship <- +# DatabaseConnector::connect(connectionDetails) +# +# # to do - with incremental = FALSE +# with_dbc_connection(connectionCohortRelationship, { +# sysTime <- as.numeric(Sys.time()) * 100000 +# tableName <- paste0("cr", sysTime) +# observationTableName <- paste0("op", sysTime) +# +# DatabaseConnector::insertTable( +# connection = connectionCohortRelationship, +# databaseSchema = cohortDatabaseSchema, +# tableName = tableName, +# data = cohort, +# dropTableIfExists = TRUE, +# createTable = TRUE, +# tempTable = FALSE, +# camelCaseToSnakeCase = TRUE, +# progressBar = FALSE +# ) +# +# cohortRelationship <- runCohortRelationshipDiagnostics( +# connection = connectionCohortRelationship, +# cohortDatabaseSchema = cohortDatabaseSchema, +# cohortTable = tableName, +# targetCohortIds = c(1), +# comparatorCohortIds = c(10), +# relationshipDays = dplyr::tibble( +# startDay = temporalStartDays, +# endDay = temporalEndDays +# ) +# ) +# +# sqlDrop <- +# "IF OBJECT_ID('@cohort_database_schema.@cohort_relationship_cohort_table', 'U') IS NOT NULL +# DROP TABLE @cohort_database_schema.@cohort_relationship_cohort_table;" +# DatabaseConnector::renderTranslateExecuteSql( +# connection = connectionCohortRelationship, +# sql = sqlDrop, +# cohort_database_schema = cohortDatabaseSchema, +# cohort_relationship_cohort_table = tableName, +# profile = FALSE, +# progressBar = FALSE +# ) +# +# cohortRelationshipT1C10 <- cohortRelationship %>% +# dplyr::filter(cohortId == 1) %>% +# dplyr::filter(comparatorCohortId == 10) +# +# testthat::expect_equal( +# object = cohortRelationshipT1C10$subCsBeforeTs, +# expected = 1 +# ) # there is one subject in comparator that starts before target +# +# testthat::expect_equal( +# object = cohortRelationshipT1C10$subCsBeforeTe, +# expected = 1 +# ) # there is one subject in comparator that starts before target end +# +# testthat::expect_equal( +# object = cohortRelationshipT1C10$subCsAfterTs, +# expected = 1 +# ) # there is one subject in comparator that starts after target start +# +# testthat::expect_equal( +# object = cohortRelationshipT1C10$subCsAfterTs, +# expected = 1 +# ) # there is one subject in comparator that starts after target start +# +# testthat::expect_equal( +# object = cohortRelationshipT1C10$subCsOnTe, +# expected = 1 +# ) # there is one subject in comparator that starts on target end +# +# testthat::expect_equal( +# object = cohortRelationshipT1C10$subCsWindowT, +# expected = 1 +# ) # there is one subject in comparator that started within the window of Target cohort +# +# testthat::expect_equal( +# object = cohortRelationshipT1C10$subCeWindowT, +# expected = 1 +# ) # there is one subject in comparator that ended within the window of Target cohort +# }) +# }) diff --git a/tests/testthat/test-runIncidenceRate.R b/tests/testthat/test-runIncidenceRate.R index 835e21391..5877f2df8 100644 --- a/tests/testthat/test-runIncidenceRate.R +++ b/tests/testthat/test-runIncidenceRate.R @@ -1,63 +1,63 @@ - -# test getIncidenceRate on all dbms -for (nm in names(testServers)) { - server <- testServers[[nm]] - - test_that(paste("getIncidenceRate works on", nm), { - - connection <- DatabaseConnector::connect(server$connectionDetails) - result <- getIncidenceRate( - connection = connection, - cohortDatabaseSchema = server$cohortDatabaseSchema, - cohortTable = server$cohortTable, - cdmDatabaseSchema = server$cdmDatabaseSchema, - vocabularyDatabaseSchema = server$vocabularyDatabaseSchema, - tempEmulationSchema = server$tempEmulationSchema, - firstOccurrenceOnly = TRUE, - washoutPeriod = 365, - cohortId = server$cohortDefinitionSet$cohortId[1]) - - expect_true(is.data.frame(result)) - - # getResultsDataModelSpecifications("incidence_rate")$columnName - expect_equal( - names(result), - c("cohortCount", "personYears", "gender", "ageGroup", "calendarYear", "incidenceRate") - ) - - DatabaseConnector::disconnect(connection) - }) -} - - -# only test runIncidenceRate on sqlite (or duckdb) -test_that("runIncidenceRate", { - skip_if_not("sqlite" %in% names(testServers)) - - server <- testServers[["sqlite"]] - exportFolder <- tempfile() - dir.create(exportFolder) - - incrementalFolder <- tempfile() - dir.create(incrementalFolder) - - connection <- DatabaseConnector::connect(server$connectionDetails) - - runIncidenceRate( - connection, - cohortDefinitionSet = server$cohortDefinitionSet[1:2,], - tempEmulationSchema = server$tempEmulationSchema, - cdmDatabaseSchema = server$cdmDatabaseSchema, - cohortDatabaseSchema = server$cohortDatabaseSchema, - cohortTable = server$cohortTable, - databaseId = "GiBleed", - exportFolder = exportFolder, - minCellCount = 1, - washoutPeriod = 0, - incremental = F) - DatabaseConnector::disconnect(connection) - expect_true(file.exists(file.path(exportFolder, "incidence_rate.csv"))) -}) - - - +# +# # test getIncidenceRate on all dbms +# for (nm in names(testServers)) { +# server <- testServers[[nm]] +# +# test_that(paste("getIncidenceRate works on", nm), { +# +# connection <- DatabaseConnector::connect(server$connectionDetails) +# result <- getIncidenceRate( +# connection = connection, +# cohortDatabaseSchema = server$cohortDatabaseSchema, +# cohortTable = server$cohortTable, +# cdmDatabaseSchema = server$cdmDatabaseSchema, +# vocabularyDatabaseSchema = server$vocabularyDatabaseSchema, +# tempEmulationSchema = server$tempEmulationSchema, +# firstOccurrenceOnly = TRUE, +# washoutPeriod = 365, +# cohortId = server$cohortDefinitionSet$cohortId[1]) +# +# expect_true(is.data.frame(result)) +# +# # getResultsDataModelSpecifications("incidence_rate")$columnName +# expect_equal( +# names(result), +# c("cohortCount", "personYears", "gender", "ageGroup", "calendarYear", "incidenceRate") +# ) +# +# DatabaseConnector::disconnect(connection) +# }) +# } +# +# +# # only test runIncidenceRate on sqlite (or duckdb) +# test_that("runIncidenceRate", { +# skip_if_not("sqlite" %in% names(testServers)) +# +# server <- testServers[["sqlite"]] +# exportFolder <- tempfile() +# dir.create(exportFolder) +# +# incrementalFolder <- tempfile() +# dir.create(incrementalFolder) +# +# connection <- DatabaseConnector::connect(server$connectionDetails) +# +# runIncidenceRate( +# connection, +# cohortDefinitionSet = server$cohortDefinitionSet[1:2,], +# tempEmulationSchema = server$tempEmulationSchema, +# cdmDatabaseSchema = server$cdmDatabaseSchema, +# cohortDatabaseSchema = server$cohortDatabaseSchema, +# cohortTable = server$cohortTable, +# databaseId = "GiBleed", +# exportFolder = exportFolder, +# minCellCount = 1, +# washoutPeriod = 0, +# incremental = F) +# DatabaseConnector::disconnect(connection) +# expect_true(file.exists(file.path(exportFolder, "incidence_rate.csv"))) +# }) +# +# +# diff --git a/tests/testthat/test-runInclusionStatistics.R b/tests/testthat/test-runInclusionStatistics.R index 2ab1a4d23..f1eb3d09c 100644 --- a/tests/testthat/test-runInclusionStatistics.R +++ b/tests/testthat/test-runInclusionStatistics.R @@ -1,76 +1,76 @@ -# Copyright 2024 Observational Health Data Sciences and Informatics -# -# This file is part of CohortDiagnostics -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -for (nm in names(testServers)) { - - server <- testServers[[nm]] - con <- connect(server$connectionDetails) - - exportFolder <- file.path(tempdir(), paste0(nm, "exp")) - databaseId <- "myDB" - minCellCount <- 5 - recordKeepingFile <- file.path(exportFolder, "record.csv") - cohortTableNames <- CohortGenerator::getCohortTableNames(cohortTable = server$cohortTable) - - test_that(paste("test run inclusion statistics output", nm), { - - dir.create(exportFolder) - runInclusionStatistics(connection = con, - exportFolder = exportFolder, - databaseId = databaseId, - cohortDefinitionSet = server$cohortDefinitionSet, - cohortDatabaseSchema = server$cohortDatabaseSchema, - cohortTableNames = cohortTableNames, - incremental = TRUE, - minCellCount = minCellCount, - recordKeepingFile = recordKeepingFile) - - # Check cohort_inc_result - expect_true(file.exists(file.path(exportFolder, "cohort_inc_result.csv"))) - incResult <- read.csv(file.path(exportFolder, "cohort_inc_result.csv")) - expect_equal(colnames(incResult), c("database_id", "cohort_id", "mode_id", "inclusion_rule_mask" , "person_count")) - expect_equal(unique(incResult$database_id), databaseId) - expect_true(all(incResult$cohort_id %in% server$cohortDefinitionSet$cohortId)) - - # Check cohort_inc_stats - expect_true(file.exists(file.path(exportFolder, "cohort_inc_stats.csv"))) - incStatsResult <- read.csv(file.path(exportFolder, "cohort_inc_stats.csv")) - expect_equal(colnames(incStatsResult), c("cohort_definition_id", "rule_sequence", "person_count", "gain_count" , "person_total", "mode_id")) - - # Check cohort_inclusion - expect_true(file.exists(file.path(exportFolder, "cohort_inclusion.csv"))) - inclusionResult <- read.csv(file.path(exportFolder, "cohort_inclusion.csv")) - expect_equal(colnames(inclusionResult), c("database_id", "cohort_id", "rule_sequence", "name" , "description")) - expect_equal(unique(inclusionResult$database_id), databaseId) - expect_true(all(inclusionResult$cohort_id %in% server$cohortDefinitionSet$cohortId)) - - # Check cohort_summary_stats - expect_true(file.exists(file.path(exportFolder, "cohort_summary_stats.csv"))) - sumStatsResult <- read.csv(file.path(exportFolder, "cohort_summary_stats.csv")) - expect_equal(colnames(sumStatsResult), c("database_id", "cohort_id", "mode_id", "base_count" , "final_count")) - expect_equal(unique(sumStatsResult$database_id), databaseId) - expect_true(all(sumStatsResult$cohort_id %in% server$cohortDefinitionSet$cohortId)) - - # Check recordKeepingFile - expect_true(file.exists(recordKeepingFile)) - recordKeeping <- read.csv(recordKeepingFile) - expect_equal(colnames(recordKeeping), c("cohortId", "task", "checksum" , "timeStamp")) - expect_equal(unique(recordKeeping$task), "runInclusionStatistics") - expect_true(all(recordKeeping$cohortId %in% server$cohortDefinitionSet$cohortId)) - - unlink(exportFolder) - }) -} +# # Copyright 2024 Observational Health Data Sciences and Informatics +# # +# # This file is part of CohortDiagnostics +# # +# # Licensed under the Apache License, Version 2.0 (the "License"); +# # you may not use this file except in compliance with the License. +# # You may obtain a copy of the License at +# # +# # http://www.apache.org/licenses/LICENSE-2.0 +# # +# # Unless required by applicable law or agreed to in writing, software +# # distributed under the License is distributed on an "AS IS" BASIS, +# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# # See the License for the specific language governing permissions and +# # limitations under the License. +# +# for (nm in names(testServers)) { +# +# server <- testServers[[nm]] +# con <- connect(server$connectionDetails) +# +# exportFolder <- file.path(tempdir(), paste0(nm, "exp")) +# databaseId <- "myDB" +# minCellCount <- 5 +# recordKeepingFile <- file.path(exportFolder, "record.csv") +# cohortTableNames <- CohortGenerator::getCohortTableNames(cohortTable = server$cohortTable) +# +# test_that(paste("test run inclusion statistics output", nm), { +# +# dir.create(exportFolder) +# runInclusionStatistics(connection = con, +# exportFolder = exportFolder, +# databaseId = databaseId, +# cohortDefinitionSet = server$cohortDefinitionSet, +# cohortDatabaseSchema = server$cohortDatabaseSchema, +# cohortTableNames = cohortTableNames, +# incremental = TRUE, +# minCellCount = minCellCount, +# recordKeepingFile = recordKeepingFile) +# +# # Check cohort_inc_result +# expect_true(file.exists(file.path(exportFolder, "cohort_inc_result.csv"))) +# incResult <- read.csv(file.path(exportFolder, "cohort_inc_result.csv")) +# expect_equal(colnames(incResult), c("database_id", "cohort_id", "mode_id", "inclusion_rule_mask" , "person_count")) +# expect_equal(unique(incResult$database_id), databaseId) +# expect_true(all(incResult$cohort_id %in% server$cohortDefinitionSet$cohortId)) +# +# # Check cohort_inc_stats +# expect_true(file.exists(file.path(exportFolder, "cohort_inc_stats.csv"))) +# incStatsResult <- read.csv(file.path(exportFolder, "cohort_inc_stats.csv")) +# expect_equal(colnames(incStatsResult), c("cohort_definition_id", "rule_sequence", "person_count", "gain_count" , "person_total", "mode_id")) +# +# # Check cohort_inclusion +# expect_true(file.exists(file.path(exportFolder, "cohort_inclusion.csv"))) +# inclusionResult <- read.csv(file.path(exportFolder, "cohort_inclusion.csv")) +# expect_equal(colnames(inclusionResult), c("database_id", "cohort_id", "rule_sequence", "name" , "description")) +# expect_equal(unique(inclusionResult$database_id), databaseId) +# expect_true(all(inclusionResult$cohort_id %in% server$cohortDefinitionSet$cohortId)) +# +# # Check cohort_summary_stats +# expect_true(file.exists(file.path(exportFolder, "cohort_summary_stats.csv"))) +# sumStatsResult <- read.csv(file.path(exportFolder, "cohort_summary_stats.csv")) +# expect_equal(colnames(sumStatsResult), c("database_id", "cohort_id", "mode_id", "base_count" , "final_count")) +# expect_equal(unique(sumStatsResult$database_id), databaseId) +# expect_true(all(sumStatsResult$cohort_id %in% server$cohortDefinitionSet$cohortId)) +# +# # Check recordKeepingFile +# expect_true(file.exists(recordKeepingFile)) +# recordKeeping <- read.csv(recordKeepingFile) +# expect_equal(colnames(recordKeeping), c("cohortId", "task", "checksum" , "timeStamp")) +# expect_equal(unique(recordKeeping$task), "runInclusionStatistics") +# expect_true(all(recordKeeping$cohortId %in% server$cohortDefinitionSet$cohortId)) +# +# unlink(exportFolder) +# }) +# } diff --git a/tests/testthat/test-runResolvedConceptSets.R b/tests/testthat/test-runResolvedConceptSets.R index 92a735036..287797d1a 100644 --- a/tests/testthat/test-runResolvedConceptSets.R +++ b/tests/testthat/test-runResolvedConceptSets.R @@ -1,45 +1,45 @@ - -server <- testServers[[1]] - -for (server in testServers) { - test_that(paste("getResolvedConceptSets works on", server$connectionDetails$dbms), { - - connection <- DatabaseConnector::connect(server$connectionDetails) - result <- getResolvedConceptSets( - connection = connection, - cohortDefinitionSet = server$cohortDefinitionSet, - vocabularyDatabaseSchema = server$vocabularyDatabaseSchema, - tempEmulationSchema = server$tempEmulationSchema - ) - - expect_true(is.data.frame(result)) - expect_named(result, c("cohortId", "conceptSetId", "conceptId")) - expect_true(tempTableExists("concept_ids")) - expect_true(tempTableExists("inst_concept_sets")) - DatabaseConnector::disconnect(connection) - }) -} - -test_that("runResolvedConceptSets works", { - skip_if_not("sqlite" %in% names(testServers)) - server <- testServers[["sqlite"]] - connection <- DatabaseConnector::connect(server$connectionDetails) - exportFolder <- tempfile() - dir.create(exportFolder) - - runResolvedConceptSets( - connection = connection, - cohortDefinitionSet = server$cohortDefinitionSet, - databaseId = server$connectionDetails$dbms, - exportFolder = exportFolder, - minCellCount = 1, - vocabularyDatabaseSchema = server$vocabularyDatabaseSchema, - tempEmulationSchema = server$tempEmulationSchema - ) - - DatabaseConnector::disconnect(connection) - result <- readr::read_csv(file.path(exportFolder, "resolved_concepts.csv"), show_col_types = F) - expect_true(is.data.frame(result)) - expect_named(result, c("cohort_id", "concept_set_id", "concept_id", "database_id")) -}) - +# +# server <- testServers[[1]] +# +# for (server in testServers) { +# test_that(paste("getResolvedConceptSets works on", server$connectionDetails$dbms), { +# +# connection <- DatabaseConnector::connect(server$connectionDetails) +# result <- getResolvedConceptSets( +# connection = connection, +# cohortDefinitionSet = server$cohortDefinitionSet, +# vocabularyDatabaseSchema = server$vocabularyDatabaseSchema, +# tempEmulationSchema = server$tempEmulationSchema +# ) +# +# expect_true(is.data.frame(result)) +# expect_named(result, c("cohortId", "conceptSetId", "conceptId")) +# expect_true(tempTableExists("concept_ids")) +# expect_true(tempTableExists("inst_concept_sets")) +# DatabaseConnector::disconnect(connection) +# }) +# } +# +# test_that("runResolvedConceptSets works", { +# skip_if_not("sqlite" %in% names(testServers)) +# server <- testServers[["sqlite"]] +# connection <- DatabaseConnector::connect(server$connectionDetails) +# exportFolder <- tempfile() +# dir.create(exportFolder) +# +# runResolvedConceptSets( +# connection = connection, +# cohortDefinitionSet = server$cohortDefinitionSet, +# databaseId = server$connectionDetails$dbms, +# exportFolder = exportFolder, +# minCellCount = 1, +# vocabularyDatabaseSchema = server$vocabularyDatabaseSchema, +# tempEmulationSchema = server$tempEmulationSchema +# ) +# +# DatabaseConnector::disconnect(connection) +# result <- readr::read_csv(file.path(exportFolder, "resolved_concepts.csv"), show_col_types = F) +# expect_true(is.data.frame(result)) +# expect_named(result, c("cohort_id", "concept_set_id", "concept_id", "database_id")) +# }) +# diff --git a/tests/testthat/test-runTimeSeries.R b/tests/testthat/test-runTimeSeries.R index b8b5feee2..e01518b0b 100644 --- a/tests/testthat/test-runTimeSeries.R +++ b/tests/testthat/test-runTimeSeries.R @@ -1,337 +1,337 @@ -test_that("Testing cohort time series execution", { - skip_if(skipCdmTests, "cdm settings not configured") - - connectionTimeSeries <- - DatabaseConnector::connect(connectionDetails) - - # to do - with incremental = FALSE - with_dbc_connection(connectionTimeSeries, { - cohort <- dplyr::tibble( - cohortDefinitionId = c(1, 1, 2, 2), - subjectId = c(1, 1, 1, 2), - cohortStartDate = c( - as.Date("2005-01-15"), - as.Date("2005-07-15"), - as.Date("2005-01-15"), - as.Date("2005-07-15") - ), - cohortEndDate = c( - as.Date("2005-05-15"), - as.Date("2005-09-15"), - as.Date("2005-05-15"), - as.Date("2005-09-15") - ) - ) - - cohort <- dplyr::bind_rows( - cohort, - cohort %>% - dplyr::mutate(cohortDefinitionId = cohortDefinitionId * 1000) - ) - - cohortDefinitionSet <- - cohort %>% - dplyr::select(cohortDefinitionId) %>% - dplyr::distinct() %>% - dplyr::rename("cohortId" = "cohortDefinitionId") %>% - dplyr::rowwise() %>% - dplyr::mutate(json = RJSONIO::toJSON(list( - cohortId = cohortId, - randomString = c( - sample(x = LETTERS, 5, replace = TRUE), - sample(x = LETTERS, 4, replace = TRUE), - sample(LETTERS, 1, replace = TRUE) - ) - ))) %>% - dplyr::ungroup() %>% - dplyr::mutate( - sql = json, - checksum = as.character(CohortDiagnostics:::computeChecksum(json)) - ) %>% - dplyr::ungroup() - - exportFolder <- tempdir() - exportFile <- tempfile() - - unlink( - x = exportFolder, - recursive = TRUE, - force = TRUE - ) - dir.create( - path = exportFolder, - showWarnings = FALSE, - recursive = TRUE - ) - - cohortTable <- - paste0( - "ct_", - format(Sys.time(), "%s"), - sample(1:100, 1) - ) - - DatabaseConnector::insertTable( - connection = connectionTimeSeries, - databaseSchema = cohortDatabaseSchema, - tableName = cohortTable, - data = cohort, - dropTableIfExists = TRUE, - createTable = TRUE, - tempTable = FALSE, - camelCaseToSnakeCase = TRUE, - progressBar = FALSE - ) - - CohortDiagnostics:::executeTimeSeriesDiagnostics( - connection = connectionTimeSeries, - tempEmulationSchema = tempEmulationSchema, - cdmDatabaseSchema = cdmDatabaseSchema, - cohortDatabaseSchema = cohortDatabaseSchema, - cohortTable = cohortTable, - cohortDefinitionSet = cohortDefinitionSet %>% - dplyr::filter(cohortId %in% c(1, 2)), - runCohortTimeSeries = TRUE, - runDataSourceTimeSeries = FALSE, - databaseId = "testDatabaseId", - exportFolder = exportFolder, - minCellCount = 0, - instantiatedCohorts = cohort$cohortDefinitionId, - incremental = TRUE, - recordKeepingFile = paste0(exportFile, "recordKeeping"), - observationPeriodDateRange = dplyr::tibble( - observationPeriodMinDate = as.Date("2004-01-01"), - observationPeriodMaxDate = as.Date("2007-12-31") - ), - batchSize = 1 - ) - - recordKeepingFileData <- - readr::read_csv( - file = paste0(exportFile, "recordKeeping"), - col_types = readr::cols() - ) - - # testing if check sum is written - testthat::expect_true("checksum" %in% colnames(recordKeepingFileData)) - - # result - timeSeriesResults1 <- - readr::read_csv( - file = file.path(exportFolder, "time_series.csv"), - col_types = readr::cols() - ) - - subset <- CohortDiagnostics:::subsetToRequiredCohorts( - cohorts = cohortDefinitionSet, - task = "runCohortTimeSeries", - incremental = TRUE, - recordKeepingFile = paste0(exportFile, "recordKeeping") - ) %>% - dplyr::arrange(cohortId) - - testthat::expect_equal( - object = subset$cohortId, - expected = c(1000, 2000) - ) - - - # delete the previously written results file. To see if the previously executed cohorts will have results after deletion - unlink( - x = file.path(exportFolder, "time_series.csv"), - recursive = TRUE, - force = TRUE - ) - - CohortDiagnostics:::executeTimeSeriesDiagnostics( - connection = connectionTimeSeries, - tempEmulationSchema = tempEmulationSchema, - cdmDatabaseSchema = cdmDatabaseSchema, - cohortDatabaseSchema = cohortDatabaseSchema, - cohortTable = cohortTable, - cohortDefinitionSet = cohortDefinitionSet, - runCohortTimeSeries = TRUE, - runDataSourceTimeSeries = FALSE, - databaseId = "testDatabaseId", - exportFolder = exportFolder, - minCellCount = 0, - instantiatedCohorts = cohort$cohortDefinitionId, - incremental = TRUE, - recordKeepingFile = paste0(exportFile, "recordKeeping"), - observationPeriodDateRange = dplyr::tibble( - observationPeriodMinDate = as.Date("2004-01-01"), - observationPeriodMaxDate = as.Date("2007-12-31") - ), - batchSize = 100 - ) - resultsNew <- - readr::read_csv( - file = file.path(exportFolder, "time_series.csv"), - col_types = readr::cols() - ) - - testthat::expect_equal( - object = resultsNew$cohort_id %>% unique() %>% sort(), - expected = c(1000, 2000) - ) - }) -}) - - - - -test_that("Testing time series logic", { - skip_if(skipCdmTests, "cdm settings not configured") - - connectionTimeSeries <- - DatabaseConnector::connect(connectionDetails) - - # to do - with incremental = FALSE - with_dbc_connection(connectionTimeSeries, { - # manually create cohort table and load to table - # Cohort table has a total of four records, with each cohort id having two each - # cohort 1 has one subject with two different cohort entries - # cohort 2 has two subject with two different cohort entries - cohort <- dplyr::tibble( - cohortDefinitionId = c(1, 1, 2, 2), - subjectId = c(1, 1, 1, 2), - cohortStartDate = c(as.Date("2005-01-15"), as.Date("2005-07-15"), as.Date("2005-01-15"), as.Date("2005-07-15")), - cohortEndDate = c(as.Date("2005-05-15"), as.Date("2005-09-15"), as.Date("2005-05-15"), as.Date("2005-09-15")) - ) - - cohortTable <- - paste0("ct_", Sys.getpid(), format(Sys.time(), "%s"), sample(1:100, 1)) - - DatabaseConnector::insertTable( - connection = connectionTimeSeries, - databaseSchema = cohortDatabaseSchema, - tableName = cohortTable, - data = cohort, - dropTableIfExists = TRUE, - createTable = TRUE, - tempTable = FALSE, - camelCaseToSnakeCase = TRUE, - progressBar = FALSE - ) - - timeSeries <- - runCohortTimeSeriesDiagnostics( - connection = connectionTimeSeries, - tempEmulationSchema = tempEmulationSchema, - cdmDatabaseSchema = cdmDatabaseSchema, - cohortDatabaseSchema = cohortDatabaseSchema, - cohortTable = cohortTable, - runCohortTimeSeries = TRUE, - runDataSourceTimeSeries = FALSE, # cannot test data source time series because we are using simulated cohort table - timeSeriesMinDate = as.Date("2004-01-01"), - timeSeriesMaxDate = as.Date("2006-12-31"), - cohortIds = c(1, 2), - stratifyByGender = FALSE, # cannot test stratification because it will require cohort table to be built from cdm - stratifyByAgeGroup = FALSE # this test is using simulated cohort table - ) - - # testing if values returned for cohort 1 is as expected - timeSeriesCohort <- timeSeries %>% - dplyr::filter(.data$cohortId == 1) %>% - dplyr::filter(.data$seriesType == "T1") %>% - dplyr::filter(.data$calendarInterval == "m") - - # there should be 8 records in this data frame, representing 8 months for the one subject in the cohort id = 1 - testthat::expect_equal( - object = nrow(timeSeriesCohort), - expected = 8 - ) - - # there should be 2 records in this data frame, representing the 2 starts for the one subject in the cohort id = 1 - testthat::expect_equal( - object = nrow(timeSeriesCohort %>% dplyr::filter(.data$recordsStart == 1)), - expected = 2 - ) - - # there should be 1 records in this data frame, representing the 1 incident start for the one subject in the cohort id = 1 - testthat::expect_equal( - object = nrow(timeSeriesCohort %>% dplyr::filter(.data$subjectsStartIn == 1)), - expected = 1 - ) - }) -}) - - -test_that("Testing Data source time series execution", { - skip_if(skipCdmTests, "cdm settings not configured") - - connectionTimeSeries <- - DatabaseConnector::connect(connectionDetails) - - # to do - with incremental = FALSE - with_dbc_connection(connectionTimeSeries, { - cohortDefinitionSet <- dplyr::tibble( - cohortId = -44819062, - # cohort id is identified by an omop concept id https://athena.ohdsi.org/search-terms/terms/44819062 - checksum = CohortDiagnostics:::computeChecksum(column = "data source time series") - ) - - exportFolder <- tempdir() - exportFile <- tempfile() - - unlink( - x = exportFolder, - recursive = TRUE, - force = TRUE - ) - dir.create( - path = exportFolder, - showWarnings = FALSE, - recursive = TRUE - ) - - executeTimeSeriesDiagnostics( - connection = connectionTimeSeries, - tempEmulationSchema = tempEmulationSchema, - cdmDatabaseSchema = cdmDatabaseSchema, - cohortDatabaseSchema = cohortDatabaseSchema, - cohortDefinitionSet = data.frame(), - runCohortTimeSeries = FALSE, - runDataSourceTimeSeries = TRUE, - databaseId = "testDatabaseId", - exportFolder = exportFolder, - minCellCount = 0, - incremental = TRUE, - recordKeepingFile = paste0(exportFile, "recordKeeping"), - observationPeriodDateRange = dplyr::tibble( - observationPeriodMinDate = as.Date("2004-01-01"), - observationPeriodMaxDate = as.Date("2007-12-31") - ) - ) - - recordKeepingFileData <- - readr::read_csv( - file = paste0(exportFile, "recordKeeping"), - col_types = readr::cols() - ) - - # testing if check sum is written - testthat::expect_true("checksum" %in% colnames(recordKeepingFileData)) - testthat::expect_equal(object = recordKeepingFileData$cohortId, expected = -44819062) - - # result - dataSourceTimeSeriesResult <- - readr::read_csv( - file = file.path(exportFolder, "time_series.csv"), - col_types = readr::cols() - ) - - subset <- subsetToRequiredCohorts( - cohorts = cohortDefinitionSet, - task = "runDataSourceTimeSeries", - incremental = TRUE, - recordKeepingFile = paste0(exportFile, "recordKeeping") - ) %>% - dplyr::arrange(cohortId) - - testthat::expect_equal( - object = nrow(subset), - expected = 0 - ) - }) -}) +# test_that("Testing cohort time series execution", { +# skip_if(skipCdmTests, "cdm settings not configured") +# +# connectionTimeSeries <- +# DatabaseConnector::connect(connectionDetails) +# +# # to do - with incremental = FALSE +# with_dbc_connection(connectionTimeSeries, { +# cohort <- dplyr::tibble( +# cohortDefinitionId = c(1, 1, 2, 2), +# subjectId = c(1, 1, 1, 2), +# cohortStartDate = c( +# as.Date("2005-01-15"), +# as.Date("2005-07-15"), +# as.Date("2005-01-15"), +# as.Date("2005-07-15") +# ), +# cohortEndDate = c( +# as.Date("2005-05-15"), +# as.Date("2005-09-15"), +# as.Date("2005-05-15"), +# as.Date("2005-09-15") +# ) +# ) +# +# cohort <- dplyr::bind_rows( +# cohort, +# cohort %>% +# dplyr::mutate(cohortDefinitionId = cohortDefinitionId * 1000) +# ) +# +# cohortDefinitionSet <- +# cohort %>% +# dplyr::select(cohortDefinitionId) %>% +# dplyr::distinct() %>% +# dplyr::rename("cohortId" = "cohortDefinitionId") %>% +# dplyr::rowwise() %>% +# dplyr::mutate(json = RJSONIO::toJSON(list( +# cohortId = cohortId, +# randomString = c( +# sample(x = LETTERS, 5, replace = TRUE), +# sample(x = LETTERS, 4, replace = TRUE), +# sample(LETTERS, 1, replace = TRUE) +# ) +# ))) %>% +# dplyr::ungroup() %>% +# dplyr::mutate( +# sql = json, +# checksum = as.character(CohortDiagnostics:::computeChecksum(json)) +# ) %>% +# dplyr::ungroup() +# +# exportFolder <- tempdir() +# exportFile <- tempfile() +# +# unlink( +# x = exportFolder, +# recursive = TRUE, +# force = TRUE +# ) +# dir.create( +# path = exportFolder, +# showWarnings = FALSE, +# recursive = TRUE +# ) +# +# cohortTable <- +# paste0( +# "ct_", +# format(Sys.time(), "%s"), +# sample(1:100, 1) +# ) +# +# DatabaseConnector::insertTable( +# connection = connectionTimeSeries, +# databaseSchema = cohortDatabaseSchema, +# tableName = cohortTable, +# data = cohort, +# dropTableIfExists = TRUE, +# createTable = TRUE, +# tempTable = FALSE, +# camelCaseToSnakeCase = TRUE, +# progressBar = FALSE +# ) +# +# CohortDiagnostics:::executeTimeSeriesDiagnostics( +# connection = connectionTimeSeries, +# tempEmulationSchema = tempEmulationSchema, +# cdmDatabaseSchema = cdmDatabaseSchema, +# cohortDatabaseSchema = cohortDatabaseSchema, +# cohortTable = cohortTable, +# cohortDefinitionSet = cohortDefinitionSet %>% +# dplyr::filter(cohortId %in% c(1, 2)), +# runCohortTimeSeries = TRUE, +# runDataSourceTimeSeries = FALSE, +# databaseId = "testDatabaseId", +# exportFolder = exportFolder, +# minCellCount = 0, +# instantiatedCohorts = cohort$cohortDefinitionId, +# incremental = TRUE, +# recordKeepingFile = paste0(exportFile, "recordKeeping"), +# observationPeriodDateRange = dplyr::tibble( +# observationPeriodMinDate = as.Date("2004-01-01"), +# observationPeriodMaxDate = as.Date("2007-12-31") +# ), +# batchSize = 1 +# ) +# +# recordKeepingFileData <- +# readr::read_csv( +# file = paste0(exportFile, "recordKeeping"), +# col_types = readr::cols() +# ) +# +# # testing if check sum is written +# testthat::expect_true("checksum" %in% colnames(recordKeepingFileData)) +# +# # result +# timeSeriesResults1 <- +# readr::read_csv( +# file = file.path(exportFolder, "time_series.csv"), +# col_types = readr::cols() +# ) +# +# subset <- CohortDiagnostics:::subsetToRequiredCohorts( +# cohorts = cohortDefinitionSet, +# task = "runCohortTimeSeries", +# incremental = TRUE, +# recordKeepingFile = paste0(exportFile, "recordKeeping") +# ) %>% +# dplyr::arrange(cohortId) +# +# testthat::expect_equal( +# object = subset$cohortId, +# expected = c(1000, 2000) +# ) +# +# +# # delete the previously written results file. To see if the previously executed cohorts will have results after deletion +# unlink( +# x = file.path(exportFolder, "time_series.csv"), +# recursive = TRUE, +# force = TRUE +# ) +# +# CohortDiagnostics:::executeTimeSeriesDiagnostics( +# connection = connectionTimeSeries, +# tempEmulationSchema = tempEmulationSchema, +# cdmDatabaseSchema = cdmDatabaseSchema, +# cohortDatabaseSchema = cohortDatabaseSchema, +# cohortTable = cohortTable, +# cohortDefinitionSet = cohortDefinitionSet, +# runCohortTimeSeries = TRUE, +# runDataSourceTimeSeries = FALSE, +# databaseId = "testDatabaseId", +# exportFolder = exportFolder, +# minCellCount = 0, +# instantiatedCohorts = cohort$cohortDefinitionId, +# incremental = TRUE, +# recordKeepingFile = paste0(exportFile, "recordKeeping"), +# observationPeriodDateRange = dplyr::tibble( +# observationPeriodMinDate = as.Date("2004-01-01"), +# observationPeriodMaxDate = as.Date("2007-12-31") +# ), +# batchSize = 100 +# ) +# resultsNew <- +# readr::read_csv( +# file = file.path(exportFolder, "time_series.csv"), +# col_types = readr::cols() +# ) +# +# testthat::expect_equal( +# object = resultsNew$cohort_id %>% unique() %>% sort(), +# expected = c(1000, 2000) +# ) +# }) +# }) +# +# +# +# +# test_that("Testing time series logic", { +# skip_if(skipCdmTests, "cdm settings not configured") +# +# connectionTimeSeries <- +# DatabaseConnector::connect(connectionDetails) +# +# # to do - with incremental = FALSE +# with_dbc_connection(connectionTimeSeries, { +# # manually create cohort table and load to table +# # Cohort table has a total of four records, with each cohort id having two each +# # cohort 1 has one subject with two different cohort entries +# # cohort 2 has two subject with two different cohort entries +# cohort <- dplyr::tibble( +# cohortDefinitionId = c(1, 1, 2, 2), +# subjectId = c(1, 1, 1, 2), +# cohortStartDate = c(as.Date("2005-01-15"), as.Date("2005-07-15"), as.Date("2005-01-15"), as.Date("2005-07-15")), +# cohortEndDate = c(as.Date("2005-05-15"), as.Date("2005-09-15"), as.Date("2005-05-15"), as.Date("2005-09-15")) +# ) +# +# cohortTable <- +# paste0("ct_", Sys.getpid(), format(Sys.time(), "%s"), sample(1:100, 1)) +# +# DatabaseConnector::insertTable( +# connection = connectionTimeSeries, +# databaseSchema = cohortDatabaseSchema, +# tableName = cohortTable, +# data = cohort, +# dropTableIfExists = TRUE, +# createTable = TRUE, +# tempTable = FALSE, +# camelCaseToSnakeCase = TRUE, +# progressBar = FALSE +# ) +# +# timeSeries <- +# runCohortTimeSeriesDiagnostics( +# connection = connectionTimeSeries, +# tempEmulationSchema = tempEmulationSchema, +# cdmDatabaseSchema = cdmDatabaseSchema, +# cohortDatabaseSchema = cohortDatabaseSchema, +# cohortTable = cohortTable, +# runCohortTimeSeries = TRUE, +# runDataSourceTimeSeries = FALSE, # cannot test data source time series because we are using simulated cohort table +# timeSeriesMinDate = as.Date("2004-01-01"), +# timeSeriesMaxDate = as.Date("2006-12-31"), +# cohortIds = c(1, 2), +# stratifyByGender = FALSE, # cannot test stratification because it will require cohort table to be built from cdm +# stratifyByAgeGroup = FALSE # this test is using simulated cohort table +# ) +# +# # testing if values returned for cohort 1 is as expected +# timeSeriesCohort <- timeSeries %>% +# dplyr::filter(.data$cohortId == 1) %>% +# dplyr::filter(.data$seriesType == "T1") %>% +# dplyr::filter(.data$calendarInterval == "m") +# +# # there should be 8 records in this data frame, representing 8 months for the one subject in the cohort id = 1 +# testthat::expect_equal( +# object = nrow(timeSeriesCohort), +# expected = 8 +# ) +# +# # there should be 2 records in this data frame, representing the 2 starts for the one subject in the cohort id = 1 +# testthat::expect_equal( +# object = nrow(timeSeriesCohort %>% dplyr::filter(.data$recordsStart == 1)), +# expected = 2 +# ) +# +# # there should be 1 records in this data frame, representing the 1 incident start for the one subject in the cohort id = 1 +# testthat::expect_equal( +# object = nrow(timeSeriesCohort %>% dplyr::filter(.data$subjectsStartIn == 1)), +# expected = 1 +# ) +# }) +# }) +# +# +# test_that("Testing Data source time series execution", { +# skip_if(skipCdmTests, "cdm settings not configured") +# +# connectionTimeSeries <- +# DatabaseConnector::connect(connectionDetails) +# +# # to do - with incremental = FALSE +# with_dbc_connection(connectionTimeSeries, { +# cohortDefinitionSet <- dplyr::tibble( +# cohortId = -44819062, +# # cohort id is identified by an omop concept id https://athena.ohdsi.org/search-terms/terms/44819062 +# checksum = CohortDiagnostics:::computeChecksum(column = "data source time series") +# ) +# +# exportFolder <- tempdir() +# exportFile <- tempfile() +# +# unlink( +# x = exportFolder, +# recursive = TRUE, +# force = TRUE +# ) +# dir.create( +# path = exportFolder, +# showWarnings = FALSE, +# recursive = TRUE +# ) +# +# executeTimeSeriesDiagnostics( +# connection = connectionTimeSeries, +# tempEmulationSchema = tempEmulationSchema, +# cdmDatabaseSchema = cdmDatabaseSchema, +# cohortDatabaseSchema = cohortDatabaseSchema, +# cohortDefinitionSet = data.frame(), +# runCohortTimeSeries = FALSE, +# runDataSourceTimeSeries = TRUE, +# databaseId = "testDatabaseId", +# exportFolder = exportFolder, +# minCellCount = 0, +# incremental = TRUE, +# recordKeepingFile = paste0(exportFile, "recordKeeping"), +# observationPeriodDateRange = dplyr::tibble( +# observationPeriodMinDate = as.Date("2004-01-01"), +# observationPeriodMaxDate = as.Date("2007-12-31") +# ) +# ) +# +# recordKeepingFileData <- +# readr::read_csv( +# file = paste0(exportFile, "recordKeeping"), +# col_types = readr::cols() +# ) +# +# # testing if check sum is written +# testthat::expect_true("checksum" %in% colnames(recordKeepingFileData)) +# testthat::expect_equal(object = recordKeepingFileData$cohortId, expected = -44819062) +# +# # result +# dataSourceTimeSeriesResult <- +# readr::read_csv( +# file = file.path(exportFolder, "time_series.csv"), +# col_types = readr::cols() +# ) +# +# subset <- subsetToRequiredCohorts( +# cohorts = cohortDefinitionSet, +# task = "runDataSourceTimeSeries", +# incremental = TRUE, +# recordKeepingFile = paste0(exportFile, "recordKeeping") +# ) %>% +# dplyr::arrange(cohortId) +# +# testthat::expect_equal( +# object = nrow(subset), +# expected = 0 +# ) +# }) +# }) diff --git a/tests/testthat/test-runVisitContext.R b/tests/testthat/test-runVisitContext.R index b67af694d..b792f413e 100644 --- a/tests/testthat/test-runVisitContext.R +++ b/tests/testthat/test-runVisitContext.R @@ -1,551 +1,551 @@ -library(SqlRender) -library(readxl) -library(dplyr) -library(readr) - - -for (nm in names(testServers)) { - - server <- testServers[[nm]] - - con <- DatabaseConnector::connect(server$connectionDetails) - - exportFolder <- tempfile() - - dir.create(exportFolder) - - test_that(paste("test temporary table #concept_ids creation"), { - - DatabaseConnector::disconnect(con) - con <- DatabaseConnector::connect(server$connectionDetails) - - getVisitContext(connection = con, - cdmDatabaseSchema = server$cdmDatabaseSchema, - tempEmulationSchema = server$tempEmulationSchema, - cohortDatabaseSchema = server$cohortDatabaseSchema, - cohortTable = server$cohortTable, - cohortIds = server$cohortIds, - conceptIdTable = "#concept_ids", - cdmVersion = 5 - ) - - expect_true(tempTableExists(con, "concept_ids")) - - }) - - - test_that(paste("test no duplicates in concept_ids table for getVisitContext function"), { - - DatabaseConnector::disconnect(con) - con <- DatabaseConnector::connect(server$connectionDetails) - - sql <- "SELECT * FROM #concept_ids" - - translatedSql <- translate(sql, targetDialect = server$connectionDetails$dbms) - - firstTime <- system.time( - - visitContextResult <- getVisitContext(connection = con, - cdmDatabaseSchema = server$cdmDatabaseSchema, - tempEmulationSchema = server$tempEmulationSchema, - cohortDatabaseSchema = server$cohortDatabaseSchema, - cohortTable = server$cohortTable, - cohortIds = server$cohortIds, - conceptIdTable = "#concept_ids", - cdmVersion = 5 - ) - ) - - firstResult <- querySql(con, translatedSql) - - secondTime <- system.time( - - visitContextResult <- getVisitContext(connection = con, - cdmDatabaseSchema = server$cdmDatabaseSchema, - tempEmulationSchema = server$tempEmulationSchema, - cohortDatabaseSchema = server$cohortDatabaseSchema, - cohortTable = server$cohortTable, - cohortIds = server$cohortIds, - conceptIdTable = "#concept_ids", - cdmVersion = 5 - ) - ) - - secondResult <- querySql(con, translatedSql) - - expect_equal(firstResult, secondResult) - - }) - - - # For testing the runVisitContext, there is no need to run it on multiple database systems since no sql other than - # the one included in the getVisitContext is executed. - if (nm == "sqlite"){ - - test_that(paste("test that when incremental is FALSE the incremental file is not generated"), { - - DatabaseConnector::disconnect(con) - con <- DatabaseConnector::connect(server$connectionDetails) - exportFolder <- tempfile() - dir.create(exportFolder) - - - expect_false(file.exists(file.path(exportFolder,"incremental"))) - - runVisitContext(connection = con, - cohortDefinitionSet = server$cohortDefinitionSet, - exportFolder = exportFolder, - databaseId = nm , - cohortDatabaseSchema = server$cohortDatabaseSchema, - cdmDatabaseSchema = server$cdmDatabaseSchema, - minCellCount = 0, - incremental = FALSE - ) - - expect_false(file.exists(file.path(exportFolder,"incremental"))) - }) - - test_that(paste("test that when incremental is TRUE the incremental file is generated when it doesn't exist"), { - - DatabaseConnector::disconnect(con) - con <- DatabaseConnector::connect(server$connectionDetails) - exportFolder <- tempfile() - dir.create(exportFolder) - - expect_false(file.exists(file.path(exportFolder, "incremental"))) - - runVisitContext(connection = con, - cohortDefinitionSet = server$cohortDefinitionSet, - exportFolder = exportFolder, - databaseId = nm , - cohortDatabaseSchema = server$cohortDatabaseSchema, - cdmDatabaseSchema = server$cdmDatabaseSchema, - minCellCount = 0, - incremental = TRUE - ) - - expect_true(file.exists(file.path(exportFolder, "incremental"))) - - }) - - - test_that(paste("test that the output file visit_context.csv is generated and is identical with the output of getVisitContext()"), { - - DatabaseConnector::disconnect(con) - con <- DatabaseConnector::connect(server$connectionDetails) - exportFolder <- tempfile() - dir.create(exportFolder) - - getVisitContextResult <- getVisitContext(connection = con, - cdmDatabaseSchema = server$cdmDatabaseSchema, - tempEmulationSchema = server$tempEmulationSchema, - cohortDatabaseSchema = server$cohortDatabaseSchema, - cohortTable = server$cohortTable, - cohortIds = server$cohortIds, - conceptIdTable = "#concept_ids", - cdmVersion = 5 - ) - - getVisitContextResult <- unname(getVisitContextResult) - - runVisitContext(connection = con, - cohortDefinitionSet = server$cohortDefinitionSet, - exportFolder = exportFolder, - databaseId = nm, - cohortTable = server$cohortTable, - cohortDatabaseSchema = server$cohortDatabaseSchema, - cdmDatabaseSchema = server$cdmDatabaseSchema, - minCellCount = 0, - incremental = FALSE - ) - - resultCsv <- file.path(exportFolder, "visit_context.csv") - - expect_true(file.exists(resultCsv)) - - runVisitContextResult <- read.csv(resultCsv, header = TRUE, sep = ",") - runVisitContextResult$database_id <- NULL - runVisitContextResult <- unname(runVisitContextResult) - - expect_equal(getVisitContextResult, runVisitContextResult) - - }) - - - test_that(paste("test that incremental logic is correct: incremental run for the first time"), { - - DatabaseConnector::disconnect(con) - con <- DatabaseConnector::connect(server$connectionDetails) - exportFolder <- tempfile() - dir.create(exportFolder) - - cohortIds <- c(17492) - - runVisitContext(connection = con, - cohortDefinitionSet = loadTestCohortDefinitionSet(cohortIds, useSubsets = FALSE), - exportFolder = exportFolder, - databaseId = nm, - cohortTable = server$cohortTable, - cohortDatabaseSchema = server$cohortDatabaseSchema, - cdmDatabaseSchema = server$cdmDatabaseSchema, - minCellCount = 0, - incremental = TRUE - ) - - resultCsv <- file.path(exportFolder, "visit_context.csv") - - expect_true(file.exists(resultCsv)) - - results <- read.csv(resultCsv, header = TRUE, stringsAsFactors = FALSE) - - # csv should contain results only from the specified cohort - expect_equal(unique(results$cohort_id), c(17492)) - - }) - - test_that(paste("test that incremental logic is correct: no new cohorts"), { - - DatabaseConnector::disconnect(con) - con <- DatabaseConnector::connect(server$connectionDetails) - exportFolder <- tempfile() - dir.create(exportFolder) - - cohortIds <- c(17492) - - runVisitContext(connection = con, - cohortDefinitionSet = loadTestCohortDefinitionSet(cohortIds, useSubsets = FALSE), - exportFolder = exportFolder, - databaseId = nm, - cohortTable = server$cohortTable, - cohortDatabaseSchema = server$cohortDatabaseSchema, - cdmDatabaseSchema = server$cdmDatabaseSchema, - minCellCount = 0, - incremental = TRUE - ) - - resultCsv <- file.path(exportFolder, "visit_context.csv") - - expect_true(file.exists(resultCsv)) - - results1 <- read.csv(resultCsv, header = TRUE, stringsAsFactors = FALSE) - - runVisitContext(connection = con, - cohortDefinitionSet = loadTestCohortDefinitionSet(cohortIds, useSubsets = FALSE), - exportFolder = exportFolder, - databaseId = nm, - cohortTable = server$cohortTable, - cohortDatabaseSchema = server$cohortDatabaseSchema, - cdmDatabaseSchema = server$cdmDatabaseSchema, - minCellCount = 0, - incremental = TRUE - ) - - resultCsv <- file.path(exportFolder, "visit_context.csv") - - expect_true(file.exists(resultCsv)) - - results2 <- read.csv(resultCsv, header = TRUE, stringsAsFactors = FALSE) - - # csv should contain the same result after the first run and the second run as no new cohorts were added - expect_equal(results1, results2) - - }) - - test_that(paste("test that incremental logic is correct: output visit_context.csv must contain results for new cohorts"), { - - DatabaseConnector::disconnect(con) - con <- DatabaseConnector::connect(server$connectionDetails) - exportFolder <- tempfile() - dir.create(exportFolder) - - cohortIds <- c(17492) - - runVisitContext(connection = con, - cohortDefinitionSet = loadTestCohortDefinitionSet(cohortIds, useSubsets = FALSE), - exportFolder = exportFolder, - databaseId = nm, - cohortTable = server$cohortTable, - cohortDatabaseSchema = server$cohortDatabaseSchema, - cdmDatabaseSchema = server$cdmDatabaseSchema, - minCellCount = 0, - incremental = TRUE - ) - - resultCsv <- file.path(exportFolder, "visit_context.csv") - - expect_true(file.exists(resultCsv)) - - results1 <- read.csv(resultCsv, header = TRUE, stringsAsFactors = FALSE) - - # csv should contain results only from the specified cohort - expect_equal(unique(results1$cohort_id), c(17492)) - - cohortIds <- c(17492, 17493) - - runVisitContext(connection = con, - cohortDefinitionSet = loadTestCohortDefinitionSet(cohortIds, useSubsets = FALSE), - exportFolder = exportFolder, - databaseId = nm, - cohortTable = server$cohortTable, - cohortDatabaseSchema = server$cohortDatabaseSchema, - cdmDatabaseSchema = server$cdmDatabaseSchema, - minCellCount = 0, - incremental = TRUE - ) - - resultCsv <- file.path(exportFolder, "visit_context.csv") - - expect_true(file.exists(resultCsv)) - - results2 <- read.csv(resultCsv, header = TRUE, stringsAsFactors = FALSE) - - # csv should contain results from both runs, hence both cohorts - expect_equal(unique(results2$cohort_id), c(17492, 17493)) - - }) - } -} - - -##### Test cases with custom data ##### - -test_that(paste("test that the subject counts per cohort, visit concept and visit context are correct"), { - - cohortDataFilePath <- system.file("test_cases/runVisitContext/testSubjectCounts/test_getVisitContext_cohort.xlsx", package = "CohortDiagnostics") - - patientDataFilePath <- "test_cases/runVisitContext/testSubjectCounts/test_getVisitContext_patientData.json" - - connectionDetailsCustomCDM <- createCustomCdm(patientDataFilePath) - - - connection <- DatabaseConnector::connect(connectionDetails = connectionDetailsCustomCDM) - - addCohortTable(connection, cohortDataFilePath) - - - visitContextResult <- getVisitContext(connection = connection, - cdmDatabaseSchema = "main", - tempEmulationSchema = "main", - cohortDatabaseSchema = "main", - cohortTable = "cohort", - cohortIds = list(1,2), - conceptIdTable = "#concept_ids", - cdmVersion = 5 - ) - - resultPath <- system.file("test_cases/runVisitContext/testSubjectCounts/expectedResult.xlsx", package = "CohortDiagnostics") - - resultData <- readxl::read_excel(resultPath, col_types = c("numeric", "numeric", "text", - "numeric")) - - visitContextResult <- visitContextResult[order(visitContextResult$cohortId, visitContextResult$visitConceptId, visitContextResult$visitContext, visitContextResult$subjects), ] - visitContextResult <- as.data.frame(lapply(visitContextResult, as.character), stringsAsFactors = FALSE) - - resultData <- resultData[order(resultData$cohortId, resultData$visitConceptId, resultData$visitContext, resultData$subjects), ] - resultData <- as.data.frame(lapply(resultData, as.character), stringsAsFactors = FALSE) - - are_equal <- identical(visitContextResult, resultData) - - expect_true(are_equal) - -}) - -test_that(paste("test that only the new visit_concept_id are inserted into the #concept_ids table"), { - - cohortDataFilePath <- system.file("test_cases/runVisitContext/testSubjectCounts/test_getVisitContext_cohort.xlsx", package = "CohortDiagnostics") - - patientDataFilePath <- "test_cases/runVisitContext/testSubjectCounts/test_getVisitContext_patientData.json" - - connectionDetailsCustomCDM <- createCustomCdm(patientDataFilePath) - - connection <- DatabaseConnector::connect(connectionDetails = connectionDetailsCustomCDM) - - addCohortTable(connection, cohortDataFilePath) - - - getVisitContext(connection = connection, - cdmDatabaseSchema = "main", - tempEmulationSchema = "main", - cohortDatabaseSchema = "main", - cohortTable = "cohort", - cohortIds = list(1,2), - conceptIdTable = "#concept_ids", - cdmVersion = 5 - ) - - sql <- "select * from #concept_ids" - - translatedSQL <- translate(sql, targetDialect = "sqlite") - - res1 <- querySql(connection = connection, sql = translatedSQL) - - - are_equal <- all(sort(unlist(list(262, 9201))) == sort(unlist(res1$CONCEPT_ID))) - - expect_true(are_equal) - - new_row <- data.frame( - visit_occurrence_id = 5, - person_id = 2, - visit_concept_id = 261, - visit_start_date = as.Date("2015-01-10"), - visit_start_datetime = as.POSIXct("2015-01-10 08:00:00", format = "%Y-%m-%d %H:%M:%S", tz = "UTC"), - visit_end_date = as.Date("2015-01-10"), - visit_end_datetime = as.POSIXct("2015-01-10 18:00:00", format = "%Y-%m-%d %H:%M:%S", tz = "UTC"), - visit_type_concept_id = 32817, - provider_id = 1, - care_site_id = 1, - visit_source_value = 0, - visit_source_concept_id = 0, - admitting_source_concept_id = 8870, - admitting_source_value = "TRANSFER FROM HOSPITAL", - discharge_to_concept_id = 581476, - discharge_to_source_value = "HOME HEALTH CARE", - preceding_visit_occurrence_id = 0 - ) - - DBI::dbAppendTable(connection, "visit_occurrence", new_row) - - getVisitContext(connection = connection, - cdmDatabaseSchema = "main", - tempEmulationSchema = "main", - cohortDatabaseSchema = "main", - cohortTable = "cohort", - cohortIds = list(1,2), - conceptIdTable = "#concept_ids", - cdmVersion = 5 - ) - - sql <- "select * from #concept_ids" - - translatedSQL <- translate(sql, targetDialect = "sqlite") - - res2 <- querySql(connection = connection, sql = translatedSQL) - - are_equal <- all(sort(unlist(list(262, 9201, 261))) == sort(unlist(res2$CONCEPT_ID))) - - expect_true(are_equal) -}) - - - -test_that(paste("test that to infer subject counts per cohort, visit concept, and visit context, visits within 30 days before or after cohort creation are considered"), { - - cohortDataFilePath <- system.file("test_cases/runVisitContext/testSubjectCountsDates/test_getVisitContext_cohort.xlsx", package = "CohortDiagnostics") - - patientDataFilePath <- "test_cases/runVisitContext/testSubjectCountsDates/test_getVisitContext_patientData.json" - - connectionDetailsCustomCDM <- createCustomCdm(patientDataFilePath) - - connection <- DatabaseConnector::connect(connectionDetails = connectionDetailsCustomCDM) - - addCohortTable(connection, cohortDataFilePath) - - visitContextResult <- getVisitContext(connection = connection, - cdmDatabaseSchema = "main", - tempEmulationSchema = "main", - cohortDatabaseSchema = "main", - cohortTable = "cohort", - cohortIds = list(1,2), - conceptIdTable = "#concept_ids", - cdmVersion = 5 - ) - - resultPath <- system.file("test_cases/runVisitContext/testSubjectCountsDates/expectedResult.xlsx", package = "CohortDiagnostics") - - resultData <- readxl::read_excel(resultPath, col_types = c("numeric", "numeric", "text", - "numeric")) - - visitContextResult <- visitContextResult[order(visitContextResult$cohortId, visitContextResult$visitConceptId, visitContextResult$visitContext, visitContextResult$subjects), ] - visitContextResult <- as.data.frame(lapply(visitContextResult, as.character), stringsAsFactors = FALSE) - - resultData <- resultData[order(resultData$cohortId, resultData$visitConceptId, resultData$visitContext, resultData$subjects), ] - resultData <- as.data.frame(lapply(resultData, as.character), stringsAsFactors = FALSE) - - are_equal <- identical(visitContextResult, resultData) - - expect_true(are_equal) - -}) - -test_that(paste("test that to infer subject counts per cohort, visit concept, and visit context, visits within 30 days before or after cohort creation are considered"), { - - cohortDataFilePath <- system.file("test_cases/runVisitContext/testSubjectCountsDates/test_getVisitContext_cohort.xlsx", package = "CohortDiagnostics") - - patientDataFilePath <- "test_cases/runVisitContext/testSubjectCountsDates/test_getVisitContext_patientData.json" - - connectionDetailsCustomCDM <- createCustomCdm(patientDataFilePath) - - connection <- DatabaseConnector::connect(connectionDetails = connectionDetailsCustomCDM) - - addCohortTable(connection, cohortDataFilePath) - - visitContextResult <- getVisitContext(connection = connection, - cdmDatabaseSchema = "main", - tempEmulationSchema = "main", - cohortDatabaseSchema = "main", - cohortTable = "cohort", - cohortIds = list(1,2), - conceptIdTable = "#concept_ids", - cdmVersion = 5 - ) - - resultPath <- system.file("test_cases/runVisitContext/testSubjectCountsDates/expectedResult.xlsx", package = "CohortDiagnostics") - - resultData <- readxl::read_excel(resultPath, col_types = c("numeric", "numeric", "text", - "numeric")) - - visitContextResult <- visitContextResult[order(visitContextResult$cohortId, visitContextResult$visitConceptId, visitContextResult$visitContext, visitContextResult$subjects), ] - visitContextResult <- as.data.frame(lapply(visitContextResult, as.character), stringsAsFactors = FALSE) - - resultData <- resultData[order(resultData$cohortId, resultData$visitConceptId, resultData$visitContext, resultData$subjects), ] - resultData <- as.data.frame(lapply(resultData, as.character), stringsAsFactors = FALSE) - - are_equal <- identical(visitContextResult, resultData) - - expect_true(are_equal) -}) - -test_that(paste("test that when the subjects in the cohort have no visits an empty data frame is returned"), { - - cohortDataFilePath <- system.file("test_cases/runVisitContext/testSubjectCountsNoVisits/test_getVisitContext_cohort.xlsx", package = "CohortDiagnostics") - - patientDataFilePath <- "test_cases/runVisitContext/testSubjectCountsNoVisits/test_getVisitContext_patientData.json" - - connectionDetailsCustomCDM <- createCustomCdm(patientDataFilePath) - - connection <- DatabaseConnector::connect(connectionDetails = connectionDetailsCustomCDM) - - addCohortTable(connection, cohortDataFilePath) - - sql <- "DELETE FROM visit_occurrence;" - - translatedSQL <- translate(sql = sql, targetDialect = "sqlite") - - executeSql(connection = connection, sql = translatedSQL) - - visitContextResult <- getVisitContext(connection = connection, - cdmDatabaseSchema = "main", - tempEmulationSchema = "main", - cohortDatabaseSchema = "main", - cohortTable = "cohort", - cohortIds = list(1,2), - conceptIdTable = "#concept_ids", - cdmVersion = 5 - ) - - resultPath <- system.file("test_cases/runVisitContext/testSubjectCountsNoVisits/expectedResult.xlsx", package = "CohortDiagnostics") - - resultData <- readxl::read_excel(resultPath, col_types = c("numeric", "numeric", "text", - "numeric")) - - visitContextResult <- visitContextResult[order(visitContextResult$cohortId, visitContextResult$visitConceptId, visitContextResult$visitContext, visitContextResult$subjects), ] - visitContextResult <- as.data.frame(lapply(visitContextResult, as.character), stringsAsFactors = FALSE) - - resultData <- resultData[order(resultData$cohortId, resultData$visitConceptId, resultData$visitContext, resultData$subjects), ] - resultData <- as.data.frame(lapply(resultData, as.character), stringsAsFactors = FALSE) - - are_equal <- identical(visitContextResult, resultData) - - expect_true(are_equal) -}) \ No newline at end of file +# library(SqlRender) +# library(readxl) +# library(dplyr) +# library(readr) +# +# +# for (nm in names(testServers)) { +# +# server <- testServers[[nm]] +# +# con <- DatabaseConnector::connect(server$connectionDetails) +# +# exportFolder <- tempfile() +# +# dir.create(exportFolder) +# +# test_that(paste("test temporary table #concept_ids creation"), { +# +# DatabaseConnector::disconnect(con) +# con <- DatabaseConnector::connect(server$connectionDetails) +# +# getVisitContext(connection = con, +# cdmDatabaseSchema = server$cdmDatabaseSchema, +# tempEmulationSchema = server$tempEmulationSchema, +# cohortDatabaseSchema = server$cohortDatabaseSchema, +# cohortTable = server$cohortTable, +# cohortIds = server$cohortIds, +# conceptIdTable = "#concept_ids", +# cdmVersion = 5 +# ) +# +# expect_true(tempTableExists(con, "concept_ids")) +# +# }) +# +# +# test_that(paste("test no duplicates in concept_ids table for getVisitContext function"), { +# +# DatabaseConnector::disconnect(con) +# con <- DatabaseConnector::connect(server$connectionDetails) +# +# sql <- "SELECT * FROM #concept_ids" +# +# translatedSql <- translate(sql, targetDialect = server$connectionDetails$dbms) +# +# firstTime <- system.time( +# +# visitContextResult <- getVisitContext(connection = con, +# cdmDatabaseSchema = server$cdmDatabaseSchema, +# tempEmulationSchema = server$tempEmulationSchema, +# cohortDatabaseSchema = server$cohortDatabaseSchema, +# cohortTable = server$cohortTable, +# cohortIds = server$cohortIds, +# conceptIdTable = "#concept_ids", +# cdmVersion = 5 +# ) +# ) +# +# firstResult <- querySql(con, translatedSql) +# +# secondTime <- system.time( +# +# visitContextResult <- getVisitContext(connection = con, +# cdmDatabaseSchema = server$cdmDatabaseSchema, +# tempEmulationSchema = server$tempEmulationSchema, +# cohortDatabaseSchema = server$cohortDatabaseSchema, +# cohortTable = server$cohortTable, +# cohortIds = server$cohortIds, +# conceptIdTable = "#concept_ids", +# cdmVersion = 5 +# ) +# ) +# +# secondResult <- querySql(con, translatedSql) +# +# expect_equal(firstResult, secondResult) +# +# }) +# +# +# # For testing the runVisitContext, there is no need to run it on multiple database systems since no sql other than +# # the one included in the getVisitContext is executed. +# if (nm == "sqlite"){ +# +# test_that(paste("test that when incremental is FALSE the incremental file is not generated"), { +# +# DatabaseConnector::disconnect(con) +# con <- DatabaseConnector::connect(server$connectionDetails) +# exportFolder <- tempfile() +# dir.create(exportFolder) +# +# +# expect_false(file.exists(file.path(exportFolder,"incremental"))) +# +# runVisitContext(connection = con, +# cohortDefinitionSet = server$cohortDefinitionSet, +# exportFolder = exportFolder, +# databaseId = nm , +# cohortDatabaseSchema = server$cohortDatabaseSchema, +# cdmDatabaseSchema = server$cdmDatabaseSchema, +# minCellCount = 0, +# incremental = FALSE +# ) +# +# expect_false(file.exists(file.path(exportFolder,"incremental"))) +# }) +# +# test_that(paste("test that when incremental is TRUE the incremental file is generated when it doesn't exist"), { +# +# DatabaseConnector::disconnect(con) +# con <- DatabaseConnector::connect(server$connectionDetails) +# exportFolder <- tempfile() +# dir.create(exportFolder) +# +# expect_false(file.exists(file.path(exportFolder, "incremental"))) +# +# runVisitContext(connection = con, +# cohortDefinitionSet = server$cohortDefinitionSet, +# exportFolder = exportFolder, +# databaseId = nm , +# cohortDatabaseSchema = server$cohortDatabaseSchema, +# cdmDatabaseSchema = server$cdmDatabaseSchema, +# minCellCount = 0, +# incremental = TRUE +# ) +# +# expect_true(file.exists(file.path(exportFolder, "incremental"))) +# +# }) +# +# +# test_that(paste("test that the output file visit_context.csv is generated and is identical with the output of getVisitContext()"), { +# +# DatabaseConnector::disconnect(con) +# con <- DatabaseConnector::connect(server$connectionDetails) +# exportFolder <- tempfile() +# dir.create(exportFolder) +# +# getVisitContextResult <- getVisitContext(connection = con, +# cdmDatabaseSchema = server$cdmDatabaseSchema, +# tempEmulationSchema = server$tempEmulationSchema, +# cohortDatabaseSchema = server$cohortDatabaseSchema, +# cohortTable = server$cohortTable, +# cohortIds = server$cohortIds, +# conceptIdTable = "#concept_ids", +# cdmVersion = 5 +# ) +# +# getVisitContextResult <- unname(getVisitContextResult) +# +# runVisitContext(connection = con, +# cohortDefinitionSet = server$cohortDefinitionSet, +# exportFolder = exportFolder, +# databaseId = nm, +# cohortTable = server$cohortTable, +# cohortDatabaseSchema = server$cohortDatabaseSchema, +# cdmDatabaseSchema = server$cdmDatabaseSchema, +# minCellCount = 0, +# incremental = FALSE +# ) +# +# resultCsv <- file.path(exportFolder, "visit_context.csv") +# +# expect_true(file.exists(resultCsv)) +# +# runVisitContextResult <- read.csv(resultCsv, header = TRUE, sep = ",") +# runVisitContextResult$database_id <- NULL +# runVisitContextResult <- unname(runVisitContextResult) +# +# expect_equal(getVisitContextResult, runVisitContextResult) +# +# }) +# +# +# test_that(paste("test that incremental logic is correct: incremental run for the first time"), { +# +# DatabaseConnector::disconnect(con) +# con <- DatabaseConnector::connect(server$connectionDetails) +# exportFolder <- tempfile() +# dir.create(exportFolder) +# +# cohortIds <- c(17492) +# +# runVisitContext(connection = con, +# cohortDefinitionSet = loadTestCohortDefinitionSet(cohortIds, useSubsets = FALSE), +# exportFolder = exportFolder, +# databaseId = nm, +# cohortTable = server$cohortTable, +# cohortDatabaseSchema = server$cohortDatabaseSchema, +# cdmDatabaseSchema = server$cdmDatabaseSchema, +# minCellCount = 0, +# incremental = TRUE +# ) +# +# resultCsv <- file.path(exportFolder, "visit_context.csv") +# +# expect_true(file.exists(resultCsv)) +# +# results <- read.csv(resultCsv, header = TRUE, stringsAsFactors = FALSE) +# +# # csv should contain results only from the specified cohort +# expect_equal(unique(results$cohort_id), c(17492)) +# +# }) +# +# test_that(paste("test that incremental logic is correct: no new cohorts"), { +# +# DatabaseConnector::disconnect(con) +# con <- DatabaseConnector::connect(server$connectionDetails) +# exportFolder <- tempfile() +# dir.create(exportFolder) +# +# cohortIds <- c(17492) +# +# runVisitContext(connection = con, +# cohortDefinitionSet = loadTestCohortDefinitionSet(cohortIds, useSubsets = FALSE), +# exportFolder = exportFolder, +# databaseId = nm, +# cohortTable = server$cohortTable, +# cohortDatabaseSchema = server$cohortDatabaseSchema, +# cdmDatabaseSchema = server$cdmDatabaseSchema, +# minCellCount = 0, +# incremental = TRUE +# ) +# +# resultCsv <- file.path(exportFolder, "visit_context.csv") +# +# expect_true(file.exists(resultCsv)) +# +# results1 <- read.csv(resultCsv, header = TRUE, stringsAsFactors = FALSE) +# +# runVisitContext(connection = con, +# cohortDefinitionSet = loadTestCohortDefinitionSet(cohortIds, useSubsets = FALSE), +# exportFolder = exportFolder, +# databaseId = nm, +# cohortTable = server$cohortTable, +# cohortDatabaseSchema = server$cohortDatabaseSchema, +# cdmDatabaseSchema = server$cdmDatabaseSchema, +# minCellCount = 0, +# incremental = TRUE +# ) +# +# resultCsv <- file.path(exportFolder, "visit_context.csv") +# +# expect_true(file.exists(resultCsv)) +# +# results2 <- read.csv(resultCsv, header = TRUE, stringsAsFactors = FALSE) +# +# # csv should contain the same result after the first run and the second run as no new cohorts were added +# expect_equal(results1, results2) +# +# }) +# +# test_that(paste("test that incremental logic is correct: output visit_context.csv must contain results for new cohorts"), { +# +# DatabaseConnector::disconnect(con) +# con <- DatabaseConnector::connect(server$connectionDetails) +# exportFolder <- tempfile() +# dir.create(exportFolder) +# +# cohortIds <- c(17492) +# +# runVisitContext(connection = con, +# cohortDefinitionSet = loadTestCohortDefinitionSet(cohortIds, useSubsets = FALSE), +# exportFolder = exportFolder, +# databaseId = nm, +# cohortTable = server$cohortTable, +# cohortDatabaseSchema = server$cohortDatabaseSchema, +# cdmDatabaseSchema = server$cdmDatabaseSchema, +# minCellCount = 0, +# incremental = TRUE +# ) +# +# resultCsv <- file.path(exportFolder, "visit_context.csv") +# +# expect_true(file.exists(resultCsv)) +# +# results1 <- read.csv(resultCsv, header = TRUE, stringsAsFactors = FALSE) +# +# # csv should contain results only from the specified cohort +# expect_equal(unique(results1$cohort_id), c(17492)) +# +# cohortIds <- c(17492, 17493) +# +# runVisitContext(connection = con, +# cohortDefinitionSet = loadTestCohortDefinitionSet(cohortIds, useSubsets = FALSE), +# exportFolder = exportFolder, +# databaseId = nm, +# cohortTable = server$cohortTable, +# cohortDatabaseSchema = server$cohortDatabaseSchema, +# cdmDatabaseSchema = server$cdmDatabaseSchema, +# minCellCount = 0, +# incremental = TRUE +# ) +# +# resultCsv <- file.path(exportFolder, "visit_context.csv") +# +# expect_true(file.exists(resultCsv)) +# +# results2 <- read.csv(resultCsv, header = TRUE, stringsAsFactors = FALSE) +# +# # csv should contain results from both runs, hence both cohorts +# expect_equal(unique(results2$cohort_id), c(17492, 17493)) +# +# }) +# } +# } +# +# +# ##### Test cases with custom data ##### +# +# test_that(paste("test that the subject counts per cohort, visit concept and visit context are correct"), { +# +# cohortDataFilePath <- system.file("test_cases/runVisitContext/testSubjectCounts/test_getVisitContext_cohort.xlsx", package = "CohortDiagnostics") +# +# patientDataFilePath <- "test_cases/runVisitContext/testSubjectCounts/test_getVisitContext_patientData.json" +# +# connectionDetailsCustomCDM <- createCustomCdm(patientDataFilePath) +# +# +# connection <- DatabaseConnector::connect(connectionDetails = connectionDetailsCustomCDM) +# +# addCohortTable(connection, cohortDataFilePath) +# +# +# visitContextResult <- getVisitContext(connection = connection, +# cdmDatabaseSchema = "main", +# tempEmulationSchema = "main", +# cohortDatabaseSchema = "main", +# cohortTable = "cohort", +# cohortIds = list(1,2), +# conceptIdTable = "#concept_ids", +# cdmVersion = 5 +# ) +# +# resultPath <- system.file("test_cases/runVisitContext/testSubjectCounts/expectedResult.xlsx", package = "CohortDiagnostics") +# +# resultData <- readxl::read_excel(resultPath, col_types = c("numeric", "numeric", "text", +# "numeric")) +# +# visitContextResult <- visitContextResult[order(visitContextResult$cohortId, visitContextResult$visitConceptId, visitContextResult$visitContext, visitContextResult$subjects), ] +# visitContextResult <- as.data.frame(lapply(visitContextResult, as.character), stringsAsFactors = FALSE) +# +# resultData <- resultData[order(resultData$cohortId, resultData$visitConceptId, resultData$visitContext, resultData$subjects), ] +# resultData <- as.data.frame(lapply(resultData, as.character), stringsAsFactors = FALSE) +# +# are_equal <- identical(visitContextResult, resultData) +# +# expect_true(are_equal) +# +# }) +# +# test_that(paste("test that only the new visit_concept_id are inserted into the #concept_ids table"), { +# +# cohortDataFilePath <- system.file("test_cases/runVisitContext/testSubjectCounts/test_getVisitContext_cohort.xlsx", package = "CohortDiagnostics") +# +# patientDataFilePath <- "test_cases/runVisitContext/testSubjectCounts/test_getVisitContext_patientData.json" +# +# connectionDetailsCustomCDM <- createCustomCdm(patientDataFilePath) +# +# connection <- DatabaseConnector::connect(connectionDetails = connectionDetailsCustomCDM) +# +# addCohortTable(connection, cohortDataFilePath) +# +# +# getVisitContext(connection = connection, +# cdmDatabaseSchema = "main", +# tempEmulationSchema = "main", +# cohortDatabaseSchema = "main", +# cohortTable = "cohort", +# cohortIds = list(1,2), +# conceptIdTable = "#concept_ids", +# cdmVersion = 5 +# ) +# +# sql <- "select * from #concept_ids" +# +# translatedSQL <- translate(sql, targetDialect = "sqlite") +# +# res1 <- querySql(connection = connection, sql = translatedSQL) +# +# +# are_equal <- all(sort(unlist(list(262, 9201))) == sort(unlist(res1$CONCEPT_ID))) +# +# expect_true(are_equal) +# +# new_row <- data.frame( +# visit_occurrence_id = 5, +# person_id = 2, +# visit_concept_id = 261, +# visit_start_date = as.Date("2015-01-10"), +# visit_start_datetime = as.POSIXct("2015-01-10 08:00:00", format = "%Y-%m-%d %H:%M:%S", tz = "UTC"), +# visit_end_date = as.Date("2015-01-10"), +# visit_end_datetime = as.POSIXct("2015-01-10 18:00:00", format = "%Y-%m-%d %H:%M:%S", tz = "UTC"), +# visit_type_concept_id = 32817, +# provider_id = 1, +# care_site_id = 1, +# visit_source_value = 0, +# visit_source_concept_id = 0, +# admitting_source_concept_id = 8870, +# admitting_source_value = "TRANSFER FROM HOSPITAL", +# discharge_to_concept_id = 581476, +# discharge_to_source_value = "HOME HEALTH CARE", +# preceding_visit_occurrence_id = 0 +# ) +# +# DBI::dbAppendTable(connection, "visit_occurrence", new_row) +# +# getVisitContext(connection = connection, +# cdmDatabaseSchema = "main", +# tempEmulationSchema = "main", +# cohortDatabaseSchema = "main", +# cohortTable = "cohort", +# cohortIds = list(1,2), +# conceptIdTable = "#concept_ids", +# cdmVersion = 5 +# ) +# +# sql <- "select * from #concept_ids" +# +# translatedSQL <- translate(sql, targetDialect = "sqlite") +# +# res2 <- querySql(connection = connection, sql = translatedSQL) +# +# are_equal <- all(sort(unlist(list(262, 9201, 261))) == sort(unlist(res2$CONCEPT_ID))) +# +# expect_true(are_equal) +# }) +# +# +# +# test_that(paste("test that to infer subject counts per cohort, visit concept, and visit context, visits within 30 days before or after cohort creation are considered"), { +# +# cohortDataFilePath <- system.file("test_cases/runVisitContext/testSubjectCountsDates/test_getVisitContext_cohort.xlsx", package = "CohortDiagnostics") +# +# patientDataFilePath <- "test_cases/runVisitContext/testSubjectCountsDates/test_getVisitContext_patientData.json" +# +# connectionDetailsCustomCDM <- createCustomCdm(patientDataFilePath) +# +# connection <- DatabaseConnector::connect(connectionDetails = connectionDetailsCustomCDM) +# +# addCohortTable(connection, cohortDataFilePath) +# +# visitContextResult <- getVisitContext(connection = connection, +# cdmDatabaseSchema = "main", +# tempEmulationSchema = "main", +# cohortDatabaseSchema = "main", +# cohortTable = "cohort", +# cohortIds = list(1,2), +# conceptIdTable = "#concept_ids", +# cdmVersion = 5 +# ) +# +# resultPath <- system.file("test_cases/runVisitContext/testSubjectCountsDates/expectedResult.xlsx", package = "CohortDiagnostics") +# +# resultData <- readxl::read_excel(resultPath, col_types = c("numeric", "numeric", "text", +# "numeric")) +# +# visitContextResult <- visitContextResult[order(visitContextResult$cohortId, visitContextResult$visitConceptId, visitContextResult$visitContext, visitContextResult$subjects), ] +# visitContextResult <- as.data.frame(lapply(visitContextResult, as.character), stringsAsFactors = FALSE) +# +# resultData <- resultData[order(resultData$cohortId, resultData$visitConceptId, resultData$visitContext, resultData$subjects), ] +# resultData <- as.data.frame(lapply(resultData, as.character), stringsAsFactors = FALSE) +# +# are_equal <- identical(visitContextResult, resultData) +# +# expect_true(are_equal) +# +# }) +# +# test_that(paste("test that to infer subject counts per cohort, visit concept, and visit context, visits within 30 days before or after cohort creation are considered"), { +# +# cohortDataFilePath <- system.file("test_cases/runVisitContext/testSubjectCountsDates/test_getVisitContext_cohort.xlsx", package = "CohortDiagnostics") +# +# patientDataFilePath <- "test_cases/runVisitContext/testSubjectCountsDates/test_getVisitContext_patientData.json" +# +# connectionDetailsCustomCDM <- createCustomCdm(patientDataFilePath) +# +# connection <- DatabaseConnector::connect(connectionDetails = connectionDetailsCustomCDM) +# +# addCohortTable(connection, cohortDataFilePath) +# +# visitContextResult <- getVisitContext(connection = connection, +# cdmDatabaseSchema = "main", +# tempEmulationSchema = "main", +# cohortDatabaseSchema = "main", +# cohortTable = "cohort", +# cohortIds = list(1,2), +# conceptIdTable = "#concept_ids", +# cdmVersion = 5 +# ) +# +# resultPath <- system.file("test_cases/runVisitContext/testSubjectCountsDates/expectedResult.xlsx", package = "CohortDiagnostics") +# +# resultData <- readxl::read_excel(resultPath, col_types = c("numeric", "numeric", "text", +# "numeric")) +# +# visitContextResult <- visitContextResult[order(visitContextResult$cohortId, visitContextResult$visitConceptId, visitContextResult$visitContext, visitContextResult$subjects), ] +# visitContextResult <- as.data.frame(lapply(visitContextResult, as.character), stringsAsFactors = FALSE) +# +# resultData <- resultData[order(resultData$cohortId, resultData$visitConceptId, resultData$visitContext, resultData$subjects), ] +# resultData <- as.data.frame(lapply(resultData, as.character), stringsAsFactors = FALSE) +# +# are_equal <- identical(visitContextResult, resultData) +# +# expect_true(are_equal) +# }) +# +# test_that(paste("test that when the subjects in the cohort have no visits an empty data frame is returned"), { +# +# cohortDataFilePath <- system.file("test_cases/runVisitContext/testSubjectCountsNoVisits/test_getVisitContext_cohort.xlsx", package = "CohortDiagnostics") +# +# patientDataFilePath <- "test_cases/runVisitContext/testSubjectCountsNoVisits/test_getVisitContext_patientData.json" +# +# connectionDetailsCustomCDM <- createCustomCdm(patientDataFilePath) +# +# connection <- DatabaseConnector::connect(connectionDetails = connectionDetailsCustomCDM) +# +# addCohortTable(connection, cohortDataFilePath) +# +# sql <- "DELETE FROM visit_occurrence;" +# +# translatedSQL <- translate(sql = sql, targetDialect = "sqlite") +# +# executeSql(connection = connection, sql = translatedSQL) +# +# visitContextResult <- getVisitContext(connection = connection, +# cdmDatabaseSchema = "main", +# tempEmulationSchema = "main", +# cohortDatabaseSchema = "main", +# cohortTable = "cohort", +# cohortIds = list(1,2), +# conceptIdTable = "#concept_ids", +# cdmVersion = 5 +# ) +# +# resultPath <- system.file("test_cases/runVisitContext/testSubjectCountsNoVisits/expectedResult.xlsx", package = "CohortDiagnostics") +# +# resultData <- readxl::read_excel(resultPath, col_types = c("numeric", "numeric", "text", +# "numeric")) +# +# visitContextResult <- visitContextResult[order(visitContextResult$cohortId, visitContextResult$visitConceptId, visitContextResult$visitContext, visitContextResult$subjects), ] +# visitContextResult <- as.data.frame(lapply(visitContextResult, as.character), stringsAsFactors = FALSE) +# +# resultData <- resultData[order(resultData$cohortId, resultData$visitConceptId, resultData$visitContext, resultData$subjects), ] +# resultData <- as.data.frame(lapply(resultData, as.character), stringsAsFactors = FALSE) +# +# are_equal <- identical(visitContextResult, resultData) +# +# expect_true(are_equal) +# }) \ No newline at end of file diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 0dc1bf0ab..185d60499 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -1,217 +1,217 @@ -library(testthat) - -# check makeDataExportable function -test_that("Check function makeDataExportable", { - cohortCountTableCorrect <- dplyr::tibble( - cohortId = 3423, - cohortEntries = 432432, - cohortSubjects = 34234, - databaseId = "ccae" - ) - cohortCountTableCorrect <- - CohortDiagnostics:::makeDataExportable( - x = cohortCountTableCorrect, - tableName = "cohort_count" - ) - cohortCountTableCorrectNames <- - SqlRender::camelCaseToSnakeCase(names(cohortCountTableCorrect)) %>% sort() - - resultsDataModel <- getResultsDataModelSpecifications() %>% - dplyr::filter(tableName == "cohort_count") %>% - dplyr::select(columnName) %>% - dplyr::pull() %>% - sort() - - expect_true(identical(cohortCountTableCorrectNames, resultsDataModel)) - - cohortCountTableInCorrect <- dplyr::tibble( - cohortIdXXX = 3423, - cohortEntryXXX = 432432, - cohortSubjectsXXX = 34234 - ) - - expect_error( - CohortDiagnostics:::makeDataExportable( - x = cohortCountTableInCorrect, - tableName = "cohort_count" - ) - ) - - cohortCountTableCorrectDuplicated <- - dplyr::bind_rows( - cohortCountTableCorrect, - cohortCountTableCorrect - ) - expect_error( - CohortDiagnostics:::makeDataExportable( - x = cohortCountTableCorrectDuplicated, - tableName = "cohort_count" - ) - ) -}) - -test_that("timeExecutions function", { - readr::local_edition(1) - temp <- tempfile() - on.exit(unlink(temp, force = TRUE, recursive = TRUE)) - dir.create(temp) - - # Basic test - timeExecution( - exportFolder = temp, - taskName = "test_task1", - cohortIds = c(1, 2, 3, 4), - expr = { - Sys.sleep(0.001) - } - ) - expectedFilePath <- file.path(temp, "executionTimes.csv") - checkmate::expect_file_exists(expectedFilePath) - result <- readr::read_csv(expectedFilePath, col_types = readr::cols()) - checkmate::expect_data_frame(result, nrows = 1, ncols = 5) - - expect_false(all(is.na(result$startTime))) - expect_false(all(is.na(result$executionTime))) - - # Test append - timeExecution( - exportFolder = temp, - taskName = "test_task2", - cohortIds = NULL, - expr = { - Sys.sleep(0.001) - } - ) - - result <- readr::read_csv(expectedFilePath, col_types = readr::cols()) - checkmate::expect_data_frame(result, nrows = 2, ncols = 5) - - # Parent string - timeExecution( - exportFolder = temp, - taskName = "test_task3", - parent = "testthat", - cohortIds = NULL, - expr = { - Sys.sleep(0.001) - } - ) - - result <- readr::read_csv(expectedFilePath, col_types = readr::cols()) - checkmate::expect_data_frame(result, nrows = 3, ncols = 5) - - # custom start/end times - timeExecution( - exportFolder = temp, - taskName = "test_task4", - parent = "testthat", - cohortIds = NULL, - start = "foo", - execTime = "Foo" - ) - - result <- readr::read_csv(expectedFilePath, col_types = readr::cols()) - checkmate::expect_data_frame(result, nrows = 4, ncols = 5) - - timeExecution( - exportFolder = temp, - taskName = "test_task5", - parent = "testthat", - cohortIds = NULL, - start = Sys.time() - ) - - result <- readr::read_csv(expectedFilePath, col_types = readr::cols()) - checkmate::expect_data_frame(result, nrows = 5, ncols = 5) - expect_false(all(is.na(result$startTime))) -}) - -test_that("enforceMinCellValue replaces values below minimum with negative of minimum", { - data <- data.frame(a = c(1, 2, 3, 4, 5)) - minValues <- 3 - result <- enforceMinCellValue(data, "a", minValues, silent = TRUE) - - expect_equal(result$a, c(-3, -3, 3, 4, 5)) -}) - -test_that("enforceMinCellValue does not replace NA values", { - data <- data.frame(a = c(1, 2, NA, 4, 5)) - minValues <- 3 - result <- enforceMinCellValue(data, "a", minValues, silent = TRUE) - - expect_equal(result$a, c(-3, -3, NA, 4, 5)) -}) - -test_that("enforceMinCellValue does not replace zero values", { - data <- data.frame(a = c(0, 2, 3, 4, 5)) - minValues <- 3 - result <- enforceMinCellValue(data, "a", minValues, silent = TRUE) - - expect_equal(result$a, c(0, -3, 3, 4, 5)) -}) - -test_that("enforceMinCellValue works with vector of minimum values", { - data <- data.frame(a = c(1, 2, 3, 4, 5)) - minValues <- c(1, 2, 3, 4, 5) - result <- enforceMinCellValue(data, "a", minValues, silent = TRUE) - - expect_equal(result$a, c(1, 2, 3, 4, 5)) -}) - -test_that("timeExecution uses minutes as unit", { - exportFolder <- tempfile() - dir.create(exportFolder) - timeExecution(exportFolder, - taskName = "test 1 second", - expr = Sys.sleep(1)) - - start <- as.POSIXct("2024-10-09 03:37:46") - oneMinute <- start - as.POSIXct("2024-10-09 03:36:46") - timeExecution(exportFolder, - taskName = "test 1 minute", - start = start, - execTime = oneMinute) - - start <- as.POSIXct("2024-10-09 03:37:46") - oneHour <- start - as.POSIXct("2024-10-09 02:37:46") - timeExecution(exportFolder, - taskName = "test 1 hour", - start = start, - execTime = oneHour) - - list.files(exportFolder) - df <- readr::read_csv(file.path(exportFolder, "executionTimes.csv"), show_col_types = F) - - expect_equal(df$task, c("test 1 second", "test 1 minute", "test 1 hour")) - expect_equal(df$executionTime, c(round(1/60, 4), 1, 60)) -}) - -for (server in testServers) { - test_that(paste("tempTableExists works on ", server$connectionDetails$dbms), { - con <- DatabaseConnector::connect(server$connectionDetails) - DatabaseConnector::renderTranslateExecuteSql(con, "create table #tmp110010 (a int);", - progressBar = F, - reportOverallTime = F) - expect_false(tempTableExists(con, "tmp98765")) - expect_true(tempTableExists(con, "tmp110010")) - DatabaseConnector::renderTranslateExecuteSql(con, "drop table #tmp110010;", - progressBar = F, - reportOverallTime = F) - DatabaseConnector::disconnect(con) - }) -} - -test_that("assertCohortDefinitionSetContainsAllParents works", { - cohorts <- loadTestCohortDefinitionSet() - - expect_no_error( - CohortDiagnostics:::assertCohortDefinitionSetContainsAllParents(cohorts) - ) - - expect_error( - CohortDiagnostics:::assertCohortDefinitionSetContainsAllParents( - dplyr::filter(cohorts, !(.data$cohortId %in% cohorts$subsetParent)) - ) - ) -}) +# library(testthat) +# +# # check makeDataExportable function +# test_that("Check function makeDataExportable", { +# cohortCountTableCorrect <- dplyr::tibble( +# cohortId = 3423, +# cohortEntries = 432432, +# cohortSubjects = 34234, +# databaseId = "ccae" +# ) +# cohortCountTableCorrect <- +# CohortDiagnostics:::makeDataExportable( +# x = cohortCountTableCorrect, +# tableName = "cohort_count" +# ) +# cohortCountTableCorrectNames <- +# SqlRender::camelCaseToSnakeCase(names(cohortCountTableCorrect)) %>% sort() +# +# resultsDataModel <- getResultsDataModelSpecifications() %>% +# dplyr::filter(tableName == "cohort_count") %>% +# dplyr::select(columnName) %>% +# dplyr::pull() %>% +# sort() +# +# expect_true(identical(cohortCountTableCorrectNames, resultsDataModel)) +# +# cohortCountTableInCorrect <- dplyr::tibble( +# cohortIdXXX = 3423, +# cohortEntryXXX = 432432, +# cohortSubjectsXXX = 34234 +# ) +# +# expect_error( +# CohortDiagnostics:::makeDataExportable( +# x = cohortCountTableInCorrect, +# tableName = "cohort_count" +# ) +# ) +# +# cohortCountTableCorrectDuplicated <- +# dplyr::bind_rows( +# cohortCountTableCorrect, +# cohortCountTableCorrect +# ) +# expect_error( +# CohortDiagnostics:::makeDataExportable( +# x = cohortCountTableCorrectDuplicated, +# tableName = "cohort_count" +# ) +# ) +# }) +# +# test_that("timeExecutions function", { +# readr::local_edition(1) +# temp <- tempfile() +# on.exit(unlink(temp, force = TRUE, recursive = TRUE)) +# dir.create(temp) +# +# # Basic test +# timeExecution( +# exportFolder = temp, +# taskName = "test_task1", +# cohortIds = c(1, 2, 3, 4), +# expr = { +# Sys.sleep(0.001) +# } +# ) +# expectedFilePath <- file.path(temp, "executionTimes.csv") +# checkmate::expect_file_exists(expectedFilePath) +# result <- readr::read_csv(expectedFilePath, col_types = readr::cols()) +# checkmate::expect_data_frame(result, nrows = 1, ncols = 5) +# +# expect_false(all(is.na(result$startTime))) +# expect_false(all(is.na(result$executionTime))) +# +# # Test append +# timeExecution( +# exportFolder = temp, +# taskName = "test_task2", +# cohortIds = NULL, +# expr = { +# Sys.sleep(0.001) +# } +# ) +# +# result <- readr::read_csv(expectedFilePath, col_types = readr::cols()) +# checkmate::expect_data_frame(result, nrows = 2, ncols = 5) +# +# # Parent string +# timeExecution( +# exportFolder = temp, +# taskName = "test_task3", +# parent = "testthat", +# cohortIds = NULL, +# expr = { +# Sys.sleep(0.001) +# } +# ) +# +# result <- readr::read_csv(expectedFilePath, col_types = readr::cols()) +# checkmate::expect_data_frame(result, nrows = 3, ncols = 5) +# +# # custom start/end times +# timeExecution( +# exportFolder = temp, +# taskName = "test_task4", +# parent = "testthat", +# cohortIds = NULL, +# start = "foo", +# execTime = "Foo" +# ) +# +# result <- readr::read_csv(expectedFilePath, col_types = readr::cols()) +# checkmate::expect_data_frame(result, nrows = 4, ncols = 5) +# +# timeExecution( +# exportFolder = temp, +# taskName = "test_task5", +# parent = "testthat", +# cohortIds = NULL, +# start = Sys.time() +# ) +# +# result <- readr::read_csv(expectedFilePath, col_types = readr::cols()) +# checkmate::expect_data_frame(result, nrows = 5, ncols = 5) +# expect_false(all(is.na(result$startTime))) +# }) +# +# test_that("enforceMinCellValue replaces values below minimum with negative of minimum", { +# data <- data.frame(a = c(1, 2, 3, 4, 5)) +# minValues <- 3 +# result <- enforceMinCellValue(data, "a", minValues, silent = TRUE) +# +# expect_equal(result$a, c(-3, -3, 3, 4, 5)) +# }) +# +# test_that("enforceMinCellValue does not replace NA values", { +# data <- data.frame(a = c(1, 2, NA, 4, 5)) +# minValues <- 3 +# result <- enforceMinCellValue(data, "a", minValues, silent = TRUE) +# +# expect_equal(result$a, c(-3, -3, NA, 4, 5)) +# }) +# +# test_that("enforceMinCellValue does not replace zero values", { +# data <- data.frame(a = c(0, 2, 3, 4, 5)) +# minValues <- 3 +# result <- enforceMinCellValue(data, "a", minValues, silent = TRUE) +# +# expect_equal(result$a, c(0, -3, 3, 4, 5)) +# }) +# +# test_that("enforceMinCellValue works with vector of minimum values", { +# data <- data.frame(a = c(1, 2, 3, 4, 5)) +# minValues <- c(1, 2, 3, 4, 5) +# result <- enforceMinCellValue(data, "a", minValues, silent = TRUE) +# +# expect_equal(result$a, c(1, 2, 3, 4, 5)) +# }) +# +# test_that("timeExecution uses minutes as unit", { +# exportFolder <- tempfile() +# dir.create(exportFolder) +# timeExecution(exportFolder, +# taskName = "test 1 second", +# expr = Sys.sleep(1)) +# +# start <- as.POSIXct("2024-10-09 03:37:46") +# oneMinute <- start - as.POSIXct("2024-10-09 03:36:46") +# timeExecution(exportFolder, +# taskName = "test 1 minute", +# start = start, +# execTime = oneMinute) +# +# start <- as.POSIXct("2024-10-09 03:37:46") +# oneHour <- start - as.POSIXct("2024-10-09 02:37:46") +# timeExecution(exportFolder, +# taskName = "test 1 hour", +# start = start, +# execTime = oneHour) +# +# list.files(exportFolder) +# df <- readr::read_csv(file.path(exportFolder, "executionTimes.csv"), show_col_types = F) +# +# expect_equal(df$task, c("test 1 second", "test 1 minute", "test 1 hour")) +# expect_equal(df$executionTime, c(round(1/60, 4), 1, 60)) +# }) +# +# for (server in testServers) { +# test_that(paste("tempTableExists works on ", server$connectionDetails$dbms), { +# con <- DatabaseConnector::connect(server$connectionDetails) +# DatabaseConnector::renderTranslateExecuteSql(con, "create table #tmp110010 (a int);", +# progressBar = F, +# reportOverallTime = F) +# expect_false(tempTableExists(con, "tmp98765")) +# expect_true(tempTableExists(con, "tmp110010")) +# DatabaseConnector::renderTranslateExecuteSql(con, "drop table #tmp110010;", +# progressBar = F, +# reportOverallTime = F) +# DatabaseConnector::disconnect(con) +# }) +# } +# +# test_that("assertCohortDefinitionSetContainsAllParents works", { +# cohorts <- loadTestCohortDefinitionSet() +# +# expect_no_error( +# CohortDiagnostics:::assertCohortDefinitionSetContainsAllParents(cohorts) +# ) +# +# expect_error( +# CohortDiagnostics:::assertCohortDefinitionSetContainsAllParents( +# dplyr::filter(cohorts, !(.data$cohortId %in% cohorts$subsetParent)) +# ) +# ) +# }) From 4639ab9c0b9a6d26730fafce5f210e7bd75eb16f Mon Sep 17 00:00:00 2001 From: Ger Inberg Date: Thu, 10 Oct 2024 14:20:56 +0200 Subject: [PATCH 03/18] fixes --- R/runTemporalCohortCharacterization.R | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/R/runTemporalCohortCharacterization.R b/R/runTemporalCohortCharacterization.R index c451886e3..ffabb61e8 100644 --- a/R/runTemporalCohortCharacterization.R +++ b/R/runTemporalCohortCharacterization.R @@ -25,6 +25,7 @@ exportCharacterization <- function(characteristics, timeRefFileName = NULL, counts, minCellCount) { + if (!"covariates" %in% names(characteristics)) { warning("No characterization output for submitted cohorts") } else if (dplyr::pull(dplyr::count(characteristics$covariateRef)) > 0) { @@ -55,7 +56,6 @@ exportCharacterization <- function(characteristics, dplyr::select(-"cohortEntries", -"cohortSubjects") %>% dplyr::distinct() %>% exportDataToCsv( - data = characteristics$filteredCovariates, tableName = "temporal_covariate_value", fileName = covariateValueFileName, minCellCount = minCellCount, @@ -64,31 +64,34 @@ exportCharacterization <- function(characteristics, if (dplyr::pull(dplyr::count(characteristics$filteredCovariates)) > 0) { + covariateRef <- characteristics$covariateRef exportDataToCsv( data = characteristics$covariateRef, tableName = "temporal_covariate_ref", fileName = covariateRefFileName, minCellCount = minCellCount, incremental = TRUE, - covariateId = covariateRef$covariateId + covariateId = covariateRef %>% dplyr::pull(covariateId) ) + analysisRef <- characteristics$analysisRef exportDataToCsv( - data = characteristics$analysisRef, + data = analysisRef, tableName = "temporal_analysis_ref", fileName = analysisRefFileName, minCellCount = minCellCount, incremental = TRUE, - analysisId = analysisRef$analysisId + analysisId = analysisRef %>% dplyr::pull(analysisId) ) + timeRef <- characteristics$timeRef exportDataToCsv( data = characteristics$timeRef, tableName = "temporal_time_ref", fileName = timeRefFileName, minCellCount = minCellCount, incremental = TRUE, - analysisId = timeRef$timeId + analysisId = timeRef %>% dplyr::pull(timeId) ) } } From 3ea6390d48f398ba68a4fb34e17864553080af08 Mon Sep 17 00:00:00 2001 From: Ger Inberg Date: Thu, 10 Oct 2024 15:08:13 +0200 Subject: [PATCH 04/18] set temp covariate settings --- tests/testthat/setup.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index c000dced2..fbb572850 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -39,6 +39,10 @@ if (Sys.getenv("DONT_DOWNLOAD_JDBC_DRIVERS", "") != "TRUE") { temporalCovariateSettings <- FeatureExtraction::createTemporalCovariateSettings( useConditionOccurrence = TRUE, + useDrugEraStart = TRUE, + useProcedureOccurrence = TRUE, + useMeasurement = TRUE, + useCharlsonIndex = TRUE, temporalStartDays = c(-365, -30, 0, 1, 31), temporalEndDays = c(-31, -1, 0, 30, 365) ) From 71e47bc84235634bb5afa47e6a22d48906f59a20 Mon Sep 17 00:00:00 2001 From: Ger Inberg Date: Thu, 10 Oct 2024 15:28:32 +0200 Subject: [PATCH 05/18] cohortDefinitionSet --- tests/testthat/test-runTemporalCohortCharacterization.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-runTemporalCohortCharacterization.R b/tests/testthat/test-runTemporalCohortCharacterization.R index 75a79d27d..24ffe9bee 100644 --- a/tests/testthat/test-runTemporalCohortCharacterization.R +++ b/tests/testthat/test-runTemporalCohortCharacterization.R @@ -38,7 +38,7 @@ test_that("Execute and export characterization", { covariateSettings = temporalCovariateSettings, tempEmulationSchema = server$tempEmulationSchema, cdmVersion = 5, - cohorts = cohortDefinitionSet[1:3, ], + cohorts = server$cohortDefinitionSet[1:3, ], cohortCounts = cohortCounts, minCellCount = 5, instantiatedCohorts = server$cohortDefinitionSet$cohortId, @@ -61,7 +61,7 @@ test_that("Execute and export characterization", { # check if subset works subset <- subsetToRequiredCohorts( - cohorts = cohortDefinitionSet, + cohorts = server$cohortDefinitionSet, task = "runTemporalCohortCharacterization", incremental = TRUE, recordKeepingFile = recordKeepingFile @@ -71,7 +71,7 @@ test_that("Execute and export characterization", { testthat::expect_equal( object = nrow(subset %>% dplyr::filter( - cohortId %in% c(cohortDefinitionSet[1:3, ]$cohortId) + cohortId %in% c(server$cohortDefinitionSet[1:3, ]$cohortId) )), expected = 0 ) From 931b06f7e7265a1a26563d14f8c7c71ddf42f0a3 Mon Sep 17 00:00:00 2001 From: Ger Inberg Date: Thu, 10 Oct 2024 21:53:34 +0200 Subject: [PATCH 06/18] doc --- R/runTemporalCohortCharacterization.R | 70 +++++++++++++----------- man/runTemporalCohortCharacterization.Rd | 45 +++++++++------ 2 files changed, 64 insertions(+), 51 deletions(-) diff --git a/R/runTemporalCohortCharacterization.R b/R/runTemporalCohortCharacterization.R index ffabb61e8..c54b85624 100644 --- a/R/runTemporalCohortCharacterization.R +++ b/R/runTemporalCohortCharacterization.R @@ -22,7 +22,7 @@ exportCharacterization <- function(characteristics, covariateValueContFileName, covariateRefFileName, analysisRefFileName, - timeRefFileName = NULL, + timeRefFileName, counts, minCellCount) { @@ -111,8 +111,7 @@ exportCharacterization <- function(characteristics, } -getCohortCharacteristics <- function(connectionDetails = NULL, - connection = NULL, +getCohortCharacteristics <- function(connection = NULL, cdmDatabaseSchema, tempEmulationSchema = NULL, cohortDatabaseSchema = cdmDatabaseSchema, @@ -123,10 +122,6 @@ getCohortCharacteristics <- function(connectionDetails = NULL, exportFolder, minCharacterizationMean = 0.001) { startTime <- Sys.time() - if (is.null(connection)) { - connection <- DatabaseConnector::connect(connectionDetails) - on.exit(DatabaseConnector::disconnect(connection)) - } results <- Andromeda::andromeda() timeExecution( exportFolder, @@ -306,34 +301,43 @@ getCohortCharacteristics <- function(connectionDetails = NULL, return(results) } -#' Title +#' TODO: explain runTemporalCohortCharacterization +#' +#' @description +#' A short description... +#' +#' +#' @template connection +#' @template databaseId +#' @template exportFolder +#' @template cdmDatabaseSchema +#' @template cohortDatabaseSchema +#' @template cohortTable +#' @template tempEmulationSchema +#' @template cdmVersion +#' @template minCellCount +#' @template instantiatedCohorts +#' @template incremental +#' @template recordKeepingFile +#' @template batchSize #' -#' @param connection -#' @param databaseId -#' @param exportFolder -#' @param cdmDatabaseSchema -#' @param cohortDatabaseSchema -#' @param cohortTable -#' @param covariateSettings -#' @param tempEmulationSchema -#' @param cdmVersion -#' @param cohorts -#' @param cohortCounts -#' @param minCellCount -#' @param instantiatedCohorts -#' @param incremental -#' @param recordKeepingFile -#' @param task -#' @param jobName -#' @param covariateValueFileName -#' @param covariateValueContFileName -#' @param covariateRefFileName -#' @param analysisRefFileName -#' @param timeRefFileName -#' @param minCharacterizationMean -#' @param batchSize +#' @param cohorts cohorts +#' @param cohortCounts A dataframe with the cohort counts +#' @param covariateSettings Either an object of type \code{covariateSettings} as created using one of +#' the createTemporalCovariateSettings function in the FeatureExtraction package, or a list +#' of such objects. +#' @param task Name of this task +#' @param jobName Name of this job +#' @param covariateValueFileName Filename of the covariate value output +#' @param covariateValueContFileName Filename of the contineous covariate output +#' @param covariateRefFileName Filename of the covariate reference +#' @param analysisRefFileName Filename of the analysis reference +#' @param timeRefFileName Filename of the time reference +#' @param minCharacterizationMean The minimum mean value for characterization output. Values below this will be cut off from output. This +#' will help reduce the file size of the characterization output, but will remove information +#' on covariates that have very low values. The default is 0.001 (i.e. 0.1 percent) #' -#' @return +#' @return None, it will write results to disk #' @export #' #' @examples diff --git a/man/runTemporalCohortCharacterization.Rd b/man/runTemporalCohortCharacterization.Rd index 6763b568e..fec780a8f 100644 --- a/man/runTemporalCohortCharacterization.Rd +++ b/man/runTemporalCohortCharacterization.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/runTemporalCohortCharacterization.R \name{runTemporalCohortCharacterization} \alias{runTemporalCohortCharacterization} -\title{Title} +\title{TODO: explain runTemporalCohortCharacterization} \usage{ runTemporalCohortCharacterization( connection, @@ -33,35 +33,41 @@ runTemporalCohortCharacterization( ) } \arguments{ -\item{connection}{} +\item{connection}{An object of type \code{connection} as created using the +\code{\link[DatabaseConnector]{connect}} function in the +DatabaseConnector package.} -\item{databaseId}{} +\item{databaseId}{A short string for identifying the database (e.g. 'Synpuf').} -\item{exportFolder}{} +\item{exportFolder}{The folder where the results will be exported to} -\item{cdmDatabaseSchema}{} +\item{cdmDatabaseSchema}{Schema name where your patient-level data in OMOP CDM format resides. +Note that for SQL Server, this should include both the database and +schema name, for example 'cdm_data.dbo'.} -\item{cohortDatabaseSchema}{} +\item{cohortDatabaseSchema}{Schema name where your cohort table resides. Note that for SQL Server, +this should include both the database and schema name, for example +'scratch.dbo'.} -\item{cohortTable}{} +\item{cohortTable}{Name of the cohort table.} -\item{covariateSettings}{} +\item{tempEmulationSchema}{Some database platforms like Oracle and Impala do not truly support temp tables. To emulate temp +tables, provide a schema with write privileges where temp tables can be created.} -\item{tempEmulationSchema}{} - -\item{cdmVersion}{} +\item{cdmVersion}{The version of the OMOP CDM. Default 5. (Note: only 5 is supported.)} \item{cohorts}{} \item{cohortCounts}{} -\item{minCellCount}{} - -\item{instantiatedCohorts}{} +\item{minCellCount}{The minimum cell count for fields contains person counts or fractions} -\item{incremental}{} +\item{instantiatedCohorts}{cohortIds of the cohorts that have been already been instantiated} -\item{recordKeepingFile}{} +\item{incremental}{`TRUE` or `FALSE` (default). If TRUE diagnostics for cohorts in the +cohort definition set that have not changed will be skipped and existing results +csv files will be updated. If FALSE then diagnostics for all cohorts in the cohort +definition set will be executed and pre-existing results files will be deleted.} \item{task}{} @@ -79,8 +85,11 @@ runTemporalCohortCharacterization( \item{minCharacterizationMean}{} -\item{batchSize}{} +\item{batchSize}{In case of batch processing, this specifies the size of the batch} + +\item{incrementalFolder}{If \code{incremental = TRUE}, specify a folder where records are kept +of which cohort diagnostics has been executed.} } \description{ -Title +A short description... } From 319f4d320e67fd93920e33d7ac22aa857e247136 Mon Sep 17 00:00:00 2001 From: Ger Inberg Date: Fri, 11 Oct 2024 08:52:23 +0200 Subject: [PATCH 07/18] testing --- tests/testthat/setup.R | 10 +- tests/testthat/test-runTimeSeries.R | 1068 ++++++++++++----------- tests/testthat/test-runVisitContext.R | 1130 ++++++++++++------------- 3 files changed, 1096 insertions(+), 1112 deletions(-) diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index bbdc89648..325f45424 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -2,11 +2,11 @@ library(CohortDiagnostics) library(testthat) dbmsToTest <- c( - "sqlite", - "duckdb", - "postgresql", - "redshift", - "sql server" + "sqlite"#, + #"duckdb", + #"postgresql", + #"redshift", + #"sql server" ) useAllCovariates <- FALSE diff --git a/tests/testthat/test-runTimeSeries.R b/tests/testthat/test-runTimeSeries.R index aa238b2e4..1ffb0aca2 100644 --- a/tests/testthat/test-runTimeSeries.R +++ b/tests/testthat/test-runTimeSeries.R @@ -1,542 +1,526 @@ -# Copyright 2024 Observational Health Data Sciences and Informatics -# -# This file is part of CohortDiagnostics -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Test getTimeSeries on all testServers -for (nm in names(testServers)) { - - server <- testServers[[nm]] - con <- connect(server$connectionDetails) - exportFolder <- file.path(tempdir(), paste0(nm, "exp")) - recordKeepingFile <- file.path(exportFolder, "record.csv") - - test_that("Testing time series logic", { - skip_if(skipCdmTests, "cdm settings not configured") - - # to do - with incremental = FALSE - with_dbc_connection(con, { - # manually create cohort table and load to table - # Cohort table has a total of four records, with each cohort id having two each - # cohort 1 has one subject with two different cohort entries - # cohort 2 has two subject with two different cohort entries - cohort <- dplyr::tibble( - cohortDefinitionId = c(1, 1, 2, 2), - subjectId = c(1, 1, 1, 2), - cohortStartDate = c(as.Date("2005-01-15"), as.Date("2005-07-15"), as.Date("2005-01-15"), as.Date("2005-07-15")), - cohortEndDate = c(as.Date("2005-05-15"), as.Date("2005-09-15"), as.Date("2005-05-15"), as.Date("2005-09-15")) - ) - - cohortTable <- - paste0("ct_", Sys.getpid(), format(Sys.time(), "%s"), sample(1:100, 1)) - - DatabaseConnector::insertTable( - connection = con, - databaseSchema = server$cohortDatabaseSchema, - tableName = cohortTable, - data = cohort, - dropTableIfExists = TRUE, - createTable = TRUE, - tempTable = FALSE, - camelCaseToSnakeCase = TRUE, - progressBar = FALSE - ) - - timeSeries <- CohortDiagnostics:::getTimeSeries( - connection = con, - tempEmulationSchema = server$tempEmulationSchema, - cdmDatabaseSchema = server$cdmDatabaseSchema, - cohortDatabaseSchema = server$cohortDatabaseSchema, - cohortTable = cohortTable, - runCohortTimeSeries = TRUE, - runDataSourceTimeSeries = FALSE, # cannot test data source time series because we are using simulated cohort table - timeSeriesMinDate = as.Date("2004-01-01"), - timeSeriesMaxDate = as.Date("2006-12-31"), - cohortIds = c(1, 2), - stratifyByGender = FALSE, # cannot test stratification because it will require cohort table to be built from cdm - stratifyByAgeGroup = FALSE # this test is using simulated cohort table - ) - - # testing if values returned for cohort 1 is as expected - timeSeriesCohort <- timeSeries %>% - dplyr::filter(.data$cohortId == 1) %>% - dplyr::filter(.data$seriesType == "T1") %>% - dplyr::filter(.data$calendarInterval == "m") - - # there should be 8 records in this data frame, representing 8 months for the one subject in the cohort id = 1 - testthat::expect_equal( - object = nrow(timeSeriesCohort), - expected = 8 - ) - - # there should be 2 records in this data frame, representing the 2 starts for the one subject in the cohort id = 1 - testthat::expect_equal( - object = nrow(timeSeriesCohort %>% dplyr::filter(.data$recordsStart == 1)), - expected = 2 - ) - - # there should be 1 records in this data frame, representing the 1 incident start for the one subject in the cohort id = 1 - testthat::expect_equal( - object = nrow(timeSeriesCohort %>% dplyr::filter(.data$subjectsStartIn == 1)), - expected = 1 - ) - }) - }) -} - -test_that("Testing cohort time series execution, incremental = FALSE", { - testServer <- "sqlite" - skip_if_not(testServer %in% names(testServers)) - server <- testServers[[testServer]] - con <- DatabaseConnector::connect(server$connectionDetails) - exportFolder <- file.path(tempdir(), paste0(testServer, "exp")) - recordKeepingFile <- file.path(exportFolder, "record.csv") - incremental <- FALSE - - with_dbc_connection(con, { - cohort <- dplyr::tibble( - cohortDefinitionId = c(1, 1, 2, 2), - subjectId = c(1, 1, 1, 2), - cohortStartDate = c( - as.Date("2005-01-15"), - as.Date("2005-07-15"), - as.Date("2005-01-15"), - as.Date("2005-07-15") - ), - cohortEndDate = c( - as.Date("2005-05-15"), - as.Date("2005-09-15"), - as.Date("2005-05-15"), - as.Date("2005-09-15") - ) - ) - - cohort <- dplyr::bind_rows( - cohort, - cohort %>% - dplyr::mutate(cohortDefinitionId = cohortDefinitionId * 1000) - ) - - cohortDefinitionSet <- - cohort %>% - dplyr::select(cohortDefinitionId) %>% - dplyr::distinct() %>% - dplyr::rename("cohortId" = "cohortDefinitionId") %>% - dplyr::rowwise() %>% - dplyr::mutate(json = RJSONIO::toJSON(list( - cohortId = cohortId, - randomString = c( - sample(x = LETTERS, 5, replace = TRUE), - sample(x = LETTERS, 4, replace = TRUE), - sample(LETTERS, 1, replace = TRUE) - ) - ))) %>% - dplyr::ungroup() %>% - dplyr::mutate( - sql = json, - checksum = as.character(CohortDiagnostics:::computeChecksum(json)) - ) %>% - dplyr::ungroup() - - unlink( - x = exportFolder, - recursive = TRUE, - force = TRUE - ) - dir.create( - path = exportFolder, - showWarnings = FALSE, - recursive = TRUE - ) - - cohortTable <- - paste0( - "ct_", - format(Sys.time(), "%s"), - sample(1:100, 1) - ) - - DatabaseConnector::insertTable( - connection = con, - databaseSchema = server$cohortDatabaseSchema, - tableName = cohortTable, - data = cohort, - dropTableIfExists = TRUE, - createTable = TRUE, - tempTable = FALSE, - camelCaseToSnakeCase = TRUE, - progressBar = FALSE - ) - - CohortDiagnostics::runTimeSeries( - connection = con, - tempEmulationSchema = server$tempEmulationSchema, - cdmDatabaseSchema = server$cdmDatabaseSchema, - cohortDatabaseSchema = server$cohortDatabaseSchema, - cohortTable = cohortTable, - cohortDefinitionSet = cohortDefinitionSet %>% - dplyr::filter(cohortId %in% c(1, 2)), - runCohortTimeSeries = TRUE, - runDataSourceTimeSeries = FALSE, - databaseId = "testDatabaseId", - exportFolder = exportFolder, - minCellCount = 0, - instantiatedCohorts = cohort$cohortDefinitionId, - incremental = incremental, - recordKeepingFile = recordKeepingFile, - observationPeriodDateRange = dplyr::tibble( - observationPeriodMinDate = as.Date("2004-01-01"), - observationPeriodMaxDate = as.Date("2007-12-31") - ), - batchSize = 1 - ) - - # result - timeSeriesResults <- - readr::read_csv( - file = file.path(exportFolder, "time_series.csv"), - col_types = readr::cols() - ) - print(timeSeriesResults) - - testthat::expect_equal( - object = timeSeriesResults$cohort_id %>% unique() %>% sort(), - expected = c(1, 2) - ) - - subset <- CohortDiagnostics:::subsetToRequiredCohorts( - cohorts = cohortDefinitionSet, - task = "runCohortTimeSeries", - incremental = incremental - ) %>% - dplyr::arrange(cohortId) - - testthat::expect_equal( - object = subset$cohortId, - expected = c(1, 2, 1000, 2000) - ) - }) -}) - -test_that("Testing cohort time series execution, incremental = TRUE", { - testServer <- "sqlite" - skip_if_not(testServer %in% names(testServers)) - server <- testServers[[testServer]] - con <- DatabaseConnector::connect(server$connectionDetails) - exportFolder <- file.path(tempdir(), paste0(testServer, "exp")) - recordKeepingFile <- file.path(exportFolder, "record.csv") - incremental <- TRUE - - with_dbc_connection(con, { - cohort <- dplyr::tibble( - cohortDefinitionId = c(1, 1, 2, 2), - subjectId = c(1, 1, 1, 2), - cohortStartDate = c( - as.Date("2005-01-15"), - as.Date("2005-07-15"), - as.Date("2005-01-15"), - as.Date("2005-07-15") - ), - cohortEndDate = c( - as.Date("2005-05-15"), - as.Date("2005-09-15"), - as.Date("2005-05-15"), - as.Date("2005-09-15") - ) - ) - - cohort <- dplyr::bind_rows( - cohort, - cohort %>% - dplyr::mutate(cohortDefinitionId = cohortDefinitionId * 1000) - ) - - cohortDefinitionSet <- - cohort %>% - dplyr::select(cohortDefinitionId) %>% - dplyr::distinct() %>% - dplyr::rename("cohortId" = "cohortDefinitionId") %>% - dplyr::rowwise() %>% - dplyr::mutate(json = RJSONIO::toJSON(list( - cohortId = cohortId, - randomString = c( - sample(x = LETTERS, 5, replace = TRUE), - sample(x = LETTERS, 4, replace = TRUE), - sample(LETTERS, 1, replace = TRUE) - ) - ))) %>% - dplyr::ungroup() %>% - dplyr::mutate( - sql = json, - checksum = as.character(CohortDiagnostics:::computeChecksum(json)) - ) %>% - dplyr::ungroup() - - unlink( - x = exportFolder, - recursive = TRUE, - force = TRUE - ) - dir.create( - path = exportFolder, - showWarnings = FALSE, - recursive = TRUE - ) - - cohortTable <- - paste0( - "ct_", - format(Sys.time(), "%s"), - sample(1:100, 1) - ) - - DatabaseConnector::insertTable( - connection = con, - databaseSchema = server$cohortDatabaseSchema, - tableName = cohortTable, - data = cohort, - dropTableIfExists = TRUE, - createTable = TRUE, - tempTable = FALSE, - camelCaseToSnakeCase = TRUE, - progressBar = FALSE - ) - - CohortDiagnostics::runTimeSeries( - connection = con, - tempEmulationSchema = server$tempEmulationSchema, - cdmDatabaseSchema = server$cdmDatabaseSchema, - cohortDatabaseSchema = server$cohortDatabaseSchema, - cohortTable = cohortTable, - cohortDefinitionSet = cohortDefinitionSet %>% - dplyr::filter(cohortId %in% c(1, 2)), - runCohortTimeSeries = TRUE, - runDataSourceTimeSeries = FALSE, - databaseId = "testDatabaseId", - exportFolder = exportFolder, - minCellCount = 0, - instantiatedCohorts = cohort$cohortDefinitionId, - incremental = incremental, - recordKeepingFile = recordKeepingFile, - observationPeriodDateRange = dplyr::tibble( - observationPeriodMinDate = as.Date("2004-01-01"), - observationPeriodMaxDate = as.Date("2007-12-31") - ), - batchSize = 1 - ) - - recordKeepingFileData <- - readr::read_csv( - file = recordKeepingFile, - col_types = readr::cols() - ) - - # testing if check sum is written - testthat::expect_true("checksum" %in% colnames(recordKeepingFileData)) - - # result - timeSeriesResults1 <- - readr::read_csv( - file = file.path(exportFolder, "time_series.csv"), - col_types = readr::cols() - ) - - subset <- CohortDiagnostics:::subsetToRequiredCohorts( - cohorts = cohortDefinitionSet, - task = "runCohortTimeSeries", - incremental = incremental, - recordKeepingFile = recordKeepingFile - ) %>% - dplyr::arrange(cohortId) - - testthat::expect_equal( - object = subset$cohortId, - expected = c(1000, 2000) - ) - - # delete the previously written results file. To see if the previously executed cohorts will have results after deletion - unlink( - x = file.path(exportFolder, "time_series.csv"), - recursive = TRUE, - force = TRUE - ) - - CohortDiagnostics::runTimeSeries( - connection = con, - tempEmulationSchema = server$tempEmulationSchema, - cdmDatabaseSchema = server$cdmDatabaseSchema, - cohortDatabaseSchema = server$cohortDatabaseSchema, - cohortTable = cohortTable, - cohortDefinitionSet = cohortDefinitionSet, - runCohortTimeSeries = TRUE, - runDataSourceTimeSeries = FALSE, - databaseId = "testDatabaseId", - exportFolder = exportFolder, - minCellCount = 0, - instantiatedCohorts = cohort$cohortDefinitionId, - incremental = incremental, - recordKeepingFile = recordKeepingFile, - observationPeriodDateRange = dplyr::tibble( - observationPeriodMinDate = as.Date("2004-01-01"), - observationPeriodMaxDate = as.Date("2007-12-31") - ), - batchSize = 100 - ) - resultsNew <- - readr::read_csv( - file = file.path(exportFolder, "time_series.csv"), - col_types = readr::cols() - ) - - testthat::expect_equal( - object = resultsNew$cohort_id %>% unique() %>% sort(), - expected = c(1000, 2000) - ) - }) -}) - -test_that("Testing Data source time series execution, incremental = FALSE ", { - testServer <- "sqlite" - skip_if_not(testServer %in% names(testServers)) - server <- testServers[[testServer]] - con <- DatabaseConnector::connect(server$connectionDetails) - exportFolder <- file.path(tempdir(), paste0(testServer, "exp")) - recordKeepingFile <- file.path(exportFolder, "record.csv") - incremental <- FALSE - - with_dbc_connection(con, { - cohortDefinitionSet <- dplyr::tibble( - cohortId = -44819062, - # cohort id is identified by an omop concept id https://athena.ohdsi.org/search-terms/terms/44819062 - checksum = CohortDiagnostics:::computeChecksum(column = "data source time series") - ) - - unlink( - x = exportFolder, - recursive = TRUE, - force = TRUE - ) - dir.create( - path = exportFolder, - showWarnings = FALSE, - recursive = TRUE - ) - - CohortDiagnostics::runTimeSeries( - connection = con, - tempEmulationSchema = server$tempEmulationSchema, - cdmDatabaseSchema = server$cdmDatabaseSchema, - cohortDatabaseSchema = server$cohortDatabaseSchema, - cohortDefinitionSet = data.frame(), - runCohortTimeSeries = FALSE, - runDataSourceTimeSeries = TRUE, - databaseId = "testDatabaseId", - exportFolder = exportFolder, - minCellCount = 0, - incremental = incremental, - observationPeriodDateRange = dplyr::tibble( - observationPeriodMinDate = as.Date("2004-01-01"), - observationPeriodMaxDate = as.Date("2007-12-31") - ) - ) - - # result - dataSourceTimeSeriesResult <- readr::read_csv(file = file.path(exportFolder, "time_series.csv"), - col_types = readr::cols()) - - subset <- subsetToRequiredCohorts( - cohorts = cohortDefinitionSet, - task = "runDataSourceTimeSeries", - incremental = incremental - ) %>% - dplyr::arrange(cohortId) - - testthat::expect_equal( - object = nrow(subset), - expected = 1 - ) - }) -}) - -test_that("Testing Data source time series execution, incremental = TRUE ", { - testServer <- "sqlite" - skip_if_not(testServer %in% names(testServers)) - server <- testServers[[testServer]] - con <- DatabaseConnector::connect(server$connectionDetails) - exportFolder <- file.path(tempdir(), paste0(testServer, "exp")) - recordKeepingFile <- file.path(exportFolder, "record.csv") - incremental <- TRUE - - with_dbc_connection(con, { - cohortDefinitionSet <- dplyr::tibble( - cohortId = -44819062, - # cohort id is identified by an omop concept id https://athena.ohdsi.org/search-terms/terms/44819062 - checksum = CohortDiagnostics:::computeChecksum(column = "data source time series") - ) - - unlink( - x = exportFolder, - recursive = TRUE, - force = TRUE - ) - dir.create( - path = exportFolder, - showWarnings = FALSE, - recursive = TRUE - ) - - CohortDiagnostics::runTimeSeries( - connection = con, - tempEmulationSchema = server$tempEmulationSchema, - cdmDatabaseSchema = server$cdmDatabaseSchema, - cohortDatabaseSchema = server$cohortDatabaseSchema, - cohortDefinitionSet = data.frame(), - runCohortTimeSeries = FALSE, - runDataSourceTimeSeries = TRUE, - databaseId = "testDatabaseId", - exportFolder = exportFolder, - minCellCount = 0, - incremental = incremental, - recordKeepingFile = recordKeepingFile, - observationPeriodDateRange = dplyr::tibble( - observationPeriodMinDate = as.Date("2004-01-01"), - observationPeriodMaxDate = as.Date("2007-12-31") - ) - ) - - recordKeepingFileData <- readr::read_csv(file = recordKeepingFile, - col_types = readr::cols()) - - # testing if check sum is written - testthat::expect_true("checksum" %in% colnames(recordKeepingFileData)) - testthat::expect_equal(object = recordKeepingFileData$cohortId, expected = -44819062) - - # result - dataSourceTimeSeriesResult <- readr::read_csv(file = file.path(exportFolder, "time_series.csv"), - col_types = readr::cols()) - - subset <- subsetToRequiredCohorts( - cohorts = cohortDefinitionSet, - task = "runDataSourceTimeSeries", - incremental = incremental, - recordKeepingFile = recordKeepingFile - ) %>% - dplyr::arrange(cohortId) - - testthat::expect_equal( - object = nrow(subset), - expected = 0 - ) - }) -}) +# # Test getTimeSeries on all testServers +# for (nm in names(testServers)) { +# +# server <- testServers[[nm]] +# con <- connect(server$connectionDetails) +# exportFolder <- file.path(tempdir(), paste0(nm, "exp")) +# recordKeepingFile <- file.path(exportFolder, "record.csv") +# +# test_that("Testing time series logic", { +# skip_if(skipCdmTests, "cdm settings not configured") +# +# # to do - with incremental = FALSE +# with_dbc_connection(con, { +# # manually create cohort table and load to table +# # Cohort table has a total of four records, with each cohort id having two each +# # cohort 1 has one subject with two different cohort entries +# # cohort 2 has two subject with two different cohort entries +# cohort <- dplyr::tibble( +# cohortDefinitionId = c(1, 1, 2, 2), +# subjectId = c(1, 1, 1, 2), +# cohortStartDate = c(as.Date("2005-01-15"), as.Date("2005-07-15"), as.Date("2005-01-15"), as.Date("2005-07-15")), +# cohortEndDate = c(as.Date("2005-05-15"), as.Date("2005-09-15"), as.Date("2005-05-15"), as.Date("2005-09-15")) +# ) +# +# cohortTable <- +# paste0("ct_", Sys.getpid(), format(Sys.time(), "%s"), sample(1:100, 1)) +# +# DatabaseConnector::insertTable( +# connection = con, +# databaseSchema = server$cohortDatabaseSchema, +# tableName = cohortTable, +# data = cohort, +# dropTableIfExists = TRUE, +# createTable = TRUE, +# tempTable = FALSE, +# camelCaseToSnakeCase = TRUE, +# progressBar = FALSE +# ) +# +# timeSeries <- CohortDiagnostics:::getTimeSeries( +# connection = con, +# tempEmulationSchema = server$tempEmulationSchema, +# cdmDatabaseSchema = server$cdmDatabaseSchema, +# cohortDatabaseSchema = server$cohortDatabaseSchema, +# cohortTable = cohortTable, +# runCohortTimeSeries = TRUE, +# runDataSourceTimeSeries = FALSE, # cannot test data source time series because we are using simulated cohort table +# timeSeriesMinDate = as.Date("2004-01-01"), +# timeSeriesMaxDate = as.Date("2006-12-31"), +# cohortIds = c(1, 2), +# stratifyByGender = FALSE, # cannot test stratification because it will require cohort table to be built from cdm +# stratifyByAgeGroup = FALSE # this test is using simulated cohort table +# ) +# +# # testing if values returned for cohort 1 is as expected +# timeSeriesCohort <- timeSeries %>% +# dplyr::filter(.data$cohortId == 1) %>% +# dplyr::filter(.data$seriesType == "T1") %>% +# dplyr::filter(.data$calendarInterval == "m") +# +# # there should be 8 records in this data frame, representing 8 months for the one subject in the cohort id = 1 +# testthat::expect_equal( +# object = nrow(timeSeriesCohort), +# expected = 8 +# ) +# +# # there should be 2 records in this data frame, representing the 2 starts for the one subject in the cohort id = 1 +# testthat::expect_equal( +# object = nrow(timeSeriesCohort %>% dplyr::filter(.data$recordsStart == 1)), +# expected = 2 +# ) +# +# # there should be 1 records in this data frame, representing the 1 incident start for the one subject in the cohort id = 1 +# testthat::expect_equal( +# object = nrow(timeSeriesCohort %>% dplyr::filter(.data$subjectsStartIn == 1)), +# expected = 1 +# ) +# }) +# }) +# } +# +# test_that("Testing cohort time series execution, incremental = FALSE", { +# testServer <- "sqlite" +# skip_if_not(testServer %in% names(testServers)) +# server <- testServers[[testServer]] +# con <- DatabaseConnector::connect(server$connectionDetails) +# exportFolder <- file.path(tempdir(), paste0(testServer, "exp")) +# recordKeepingFile <- file.path(exportFolder, "record.csv") +# incremental <- FALSE +# +# with_dbc_connection(con, { +# cohort <- dplyr::tibble( +# cohortDefinitionId = c(1, 1, 2, 2), +# subjectId = c(1, 1, 1, 2), +# cohortStartDate = c( +# as.Date("2005-01-15"), +# as.Date("2005-07-15"), +# as.Date("2005-01-15"), +# as.Date("2005-07-15") +# ), +# cohortEndDate = c( +# as.Date("2005-05-15"), +# as.Date("2005-09-15"), +# as.Date("2005-05-15"), +# as.Date("2005-09-15") +# ) +# ) +# +# cohort <- dplyr::bind_rows( +# cohort, +# cohort %>% +# dplyr::mutate(cohortDefinitionId = cohortDefinitionId * 1000) +# ) +# +# cohortDefinitionSet <- +# cohort %>% +# dplyr::select(cohortDefinitionId) %>% +# dplyr::distinct() %>% +# dplyr::rename("cohortId" = "cohortDefinitionId") %>% +# dplyr::rowwise() %>% +# dplyr::mutate(json = RJSONIO::toJSON(list( +# cohortId = cohortId, +# randomString = c( +# sample(x = LETTERS, 5, replace = TRUE), +# sample(x = LETTERS, 4, replace = TRUE), +# sample(LETTERS, 1, replace = TRUE) +# ) +# ))) %>% +# dplyr::ungroup() %>% +# dplyr::mutate( +# sql = json, +# checksum = as.character(CohortDiagnostics:::computeChecksum(json)) +# ) %>% +# dplyr::ungroup() +# +# unlink( +# x = exportFolder, +# recursive = TRUE, +# force = TRUE +# ) +# dir.create( +# path = exportFolder, +# showWarnings = FALSE, +# recursive = TRUE +# ) +# +# cohortTable <- +# paste0( +# "ct_", +# format(Sys.time(), "%s"), +# sample(1:100, 1) +# ) +# +# DatabaseConnector::insertTable( +# connection = con, +# databaseSchema = server$cohortDatabaseSchema, +# tableName = cohortTable, +# data = cohort, +# dropTableIfExists = TRUE, +# createTable = TRUE, +# tempTable = FALSE, +# camelCaseToSnakeCase = TRUE, +# progressBar = FALSE +# ) +# +# CohortDiagnostics::runTimeSeries( +# connection = con, +# tempEmulationSchema = server$tempEmulationSchema, +# cdmDatabaseSchema = server$cdmDatabaseSchema, +# cohortDatabaseSchema = server$cohortDatabaseSchema, +# cohortTable = cohortTable, +# cohortDefinitionSet = cohortDefinitionSet %>% +# dplyr::filter(cohortId %in% c(1, 2)), +# runCohortTimeSeries = TRUE, +# runDataSourceTimeSeries = FALSE, +# databaseId = "testDatabaseId", +# exportFolder = exportFolder, +# minCellCount = 0, +# instantiatedCohorts = cohort$cohortDefinitionId, +# incremental = incremental, +# recordKeepingFile = recordKeepingFile, +# observationPeriodDateRange = dplyr::tibble( +# observationPeriodMinDate = as.Date("2004-01-01"), +# observationPeriodMaxDate = as.Date("2007-12-31") +# ), +# batchSize = 1 +# ) +# +# # result +# timeSeriesResults <- +# readr::read_csv( +# file = file.path(exportFolder, "time_series.csv"), +# col_types = readr::cols() +# ) +# print(timeSeriesResults) +# +# testthat::expect_equal( +# object = timeSeriesResults$cohort_id %>% unique() %>% sort(), +# expected = c(1, 2) +# ) +# +# subset <- CohortDiagnostics:::subsetToRequiredCohorts( +# cohorts = cohortDefinitionSet, +# task = "runCohortTimeSeries", +# incremental = incremental +# ) %>% +# dplyr::arrange(cohortId) +# +# testthat::expect_equal( +# object = subset$cohortId, +# expected = c(1, 2, 1000, 2000) +# ) +# }) +# }) +# +# test_that("Testing cohort time series execution, incremental = TRUE", { +# testServer <- "sqlite" +# skip_if_not(testServer %in% names(testServers)) +# server <- testServers[[testServer]] +# con <- DatabaseConnector::connect(server$connectionDetails) +# exportFolder <- file.path(tempdir(), paste0(testServer, "exp")) +# recordKeepingFile <- file.path(exportFolder, "record.csv") +# incremental <- TRUE +# +# with_dbc_connection(con, { +# cohort <- dplyr::tibble( +# cohortDefinitionId = c(1, 1, 2, 2), +# subjectId = c(1, 1, 1, 2), +# cohortStartDate = c( +# as.Date("2005-01-15"), +# as.Date("2005-07-15"), +# as.Date("2005-01-15"), +# as.Date("2005-07-15") +# ), +# cohortEndDate = c( +# as.Date("2005-05-15"), +# as.Date("2005-09-15"), +# as.Date("2005-05-15"), +# as.Date("2005-09-15") +# ) +# ) +# +# cohort <- dplyr::bind_rows( +# cohort, +# cohort %>% +# dplyr::mutate(cohortDefinitionId = cohortDefinitionId * 1000) +# ) +# +# cohortDefinitionSet <- +# cohort %>% +# dplyr::select(cohortDefinitionId) %>% +# dplyr::distinct() %>% +# dplyr::rename("cohortId" = "cohortDefinitionId") %>% +# dplyr::rowwise() %>% +# dplyr::mutate(json = RJSONIO::toJSON(list( +# cohortId = cohortId, +# randomString = c( +# sample(x = LETTERS, 5, replace = TRUE), +# sample(x = LETTERS, 4, replace = TRUE), +# sample(LETTERS, 1, replace = TRUE) +# ) +# ))) %>% +# dplyr::ungroup() %>% +# dplyr::mutate( +# sql = json, +# checksum = as.character(CohortDiagnostics:::computeChecksum(json)) +# ) %>% +# dplyr::ungroup() +# +# unlink( +# x = exportFolder, +# recursive = TRUE, +# force = TRUE +# ) +# dir.create( +# path = exportFolder, +# showWarnings = FALSE, +# recursive = TRUE +# ) +# +# cohortTable <- +# paste0( +# "ct_", +# format(Sys.time(), "%s"), +# sample(1:100, 1) +# ) +# +# DatabaseConnector::insertTable( +# connection = con, +# databaseSchema = server$cohortDatabaseSchema, +# tableName = cohortTable, +# data = cohort, +# dropTableIfExists = TRUE, +# createTable = TRUE, +# tempTable = FALSE, +# camelCaseToSnakeCase = TRUE, +# progressBar = FALSE +# ) +# +# CohortDiagnostics::runTimeSeries( +# connection = con, +# tempEmulationSchema = server$tempEmulationSchema, +# cdmDatabaseSchema = server$cdmDatabaseSchema, +# cohortDatabaseSchema = server$cohortDatabaseSchema, +# cohortTable = cohortTable, +# cohortDefinitionSet = cohortDefinitionSet %>% +# dplyr::filter(cohortId %in% c(1, 2)), +# runCohortTimeSeries = TRUE, +# runDataSourceTimeSeries = FALSE, +# databaseId = "testDatabaseId", +# exportFolder = exportFolder, +# minCellCount = 0, +# instantiatedCohorts = cohort$cohortDefinitionId, +# incremental = incremental, +# recordKeepingFile = recordKeepingFile, +# observationPeriodDateRange = dplyr::tibble( +# observationPeriodMinDate = as.Date("2004-01-01"), +# observationPeriodMaxDate = as.Date("2007-12-31") +# ), +# batchSize = 1 +# ) +# +# recordKeepingFileData <- +# readr::read_csv( +# file = recordKeepingFile, +# col_types = readr::cols() +# ) +# +# # testing if check sum is written +# testthat::expect_true("checksum" %in% colnames(recordKeepingFileData)) +# +# # result +# timeSeriesResults1 <- +# readr::read_csv( +# file = file.path(exportFolder, "time_series.csv"), +# col_types = readr::cols() +# ) +# +# subset <- CohortDiagnostics:::subsetToRequiredCohorts( +# cohorts = cohortDefinitionSet, +# task = "runCohortTimeSeries", +# incremental = incremental, +# recordKeepingFile = recordKeepingFile +# ) %>% +# dplyr::arrange(cohortId) +# +# testthat::expect_equal( +# object = subset$cohortId, +# expected = c(1000, 2000) +# ) +# +# # delete the previously written results file. To see if the previously executed cohorts will have results after deletion +# unlink( +# x = file.path(exportFolder, "time_series.csv"), +# recursive = TRUE, +# force = TRUE +# ) +# +# CohortDiagnostics::runTimeSeries( +# connection = con, +# tempEmulationSchema = server$tempEmulationSchema, +# cdmDatabaseSchema = server$cdmDatabaseSchema, +# cohortDatabaseSchema = server$cohortDatabaseSchema, +# cohortTable = cohortTable, +# cohortDefinitionSet = cohortDefinitionSet, +# runCohortTimeSeries = TRUE, +# runDataSourceTimeSeries = FALSE, +# databaseId = "testDatabaseId", +# exportFolder = exportFolder, +# minCellCount = 0, +# instantiatedCohorts = cohort$cohortDefinitionId, +# incremental = incremental, +# recordKeepingFile = recordKeepingFile, +# observationPeriodDateRange = dplyr::tibble( +# observationPeriodMinDate = as.Date("2004-01-01"), +# observationPeriodMaxDate = as.Date("2007-12-31") +# ), +# batchSize = 100 +# ) +# resultsNew <- +# readr::read_csv( +# file = file.path(exportFolder, "time_series.csv"), +# col_types = readr::cols() +# ) +# +# testthat::expect_equal( +# object = resultsNew$cohort_id %>% unique() %>% sort(), +# expected = c(1000, 2000) +# ) +# }) +# }) +# +# test_that("Testing Data source time series execution, incremental = FALSE ", { +# testServer <- "sqlite" +# skip_if_not(testServer %in% names(testServers)) +# server <- testServers[[testServer]] +# con <- DatabaseConnector::connect(server$connectionDetails) +# exportFolder <- file.path(tempdir(), paste0(testServer, "exp")) +# recordKeepingFile <- file.path(exportFolder, "record.csv") +# incremental <- FALSE +# +# with_dbc_connection(con, { +# cohortDefinitionSet <- dplyr::tibble( +# cohortId = -44819062, +# # cohort id is identified by an omop concept id https://athena.ohdsi.org/search-terms/terms/44819062 +# checksum = CohortDiagnostics:::computeChecksum(column = "data source time series") +# ) +# +# unlink( +# x = exportFolder, +# recursive = TRUE, +# force = TRUE +# ) +# dir.create( +# path = exportFolder, +# showWarnings = FALSE, +# recursive = TRUE +# ) +# +# CohortDiagnostics::runTimeSeries( +# connection = con, +# tempEmulationSchema = server$tempEmulationSchema, +# cdmDatabaseSchema = server$cdmDatabaseSchema, +# cohortDatabaseSchema = server$cohortDatabaseSchema, +# cohortDefinitionSet = data.frame(), +# runCohortTimeSeries = FALSE, +# runDataSourceTimeSeries = TRUE, +# databaseId = "testDatabaseId", +# exportFolder = exportFolder, +# minCellCount = 0, +# incremental = incremental, +# observationPeriodDateRange = dplyr::tibble( +# observationPeriodMinDate = as.Date("2004-01-01"), +# observationPeriodMaxDate = as.Date("2007-12-31") +# ) +# ) +# +# # result +# dataSourceTimeSeriesResult <- readr::read_csv(file = file.path(exportFolder, "time_series.csv"), +# col_types = readr::cols()) +# +# subset <- subsetToRequiredCohorts( +# cohorts = cohortDefinitionSet, +# task = "runDataSourceTimeSeries", +# incremental = incremental +# ) %>% +# dplyr::arrange(cohortId) +# +# testthat::expect_equal( +# object = nrow(subset), +# expected = 1 +# ) +# }) +# }) +# +# test_that("Testing Data source time series execution, incremental = TRUE ", { +# testServer <- "sqlite" +# skip_if_not(testServer %in% names(testServers)) +# server <- testServers[[testServer]] +# con <- DatabaseConnector::connect(server$connectionDetails) +# exportFolder <- file.path(tempdir(), paste0(testServer, "exp")) +# recordKeepingFile <- file.path(exportFolder, "record.csv") +# incremental <- TRUE +# +# with_dbc_connection(con, { +# cohortDefinitionSet <- dplyr::tibble( +# cohortId = -44819062, +# # cohort id is identified by an omop concept id https://athena.ohdsi.org/search-terms/terms/44819062 +# checksum = CohortDiagnostics:::computeChecksum(column = "data source time series") +# ) +# +# unlink( +# x = exportFolder, +# recursive = TRUE, +# force = TRUE +# ) +# dir.create( +# path = exportFolder, +# showWarnings = FALSE, +# recursive = TRUE +# ) +# +# CohortDiagnostics::runTimeSeries( +# connection = con, +# tempEmulationSchema = server$tempEmulationSchema, +# cdmDatabaseSchema = server$cdmDatabaseSchema, +# cohortDatabaseSchema = server$cohortDatabaseSchema, +# cohortDefinitionSet = data.frame(), +# runCohortTimeSeries = FALSE, +# runDataSourceTimeSeries = TRUE, +# databaseId = "testDatabaseId", +# exportFolder = exportFolder, +# minCellCount = 0, +# incremental = incremental, +# recordKeepingFile = recordKeepingFile, +# observationPeriodDateRange = dplyr::tibble( +# observationPeriodMinDate = as.Date("2004-01-01"), +# observationPeriodMaxDate = as.Date("2007-12-31") +# ) +# ) +# +# recordKeepingFileData <- readr::read_csv(file = recordKeepingFile, +# col_types = readr::cols()) +# +# # testing if check sum is written +# testthat::expect_true("checksum" %in% colnames(recordKeepingFileData)) +# testthat::expect_equal(object = recordKeepingFileData$cohortId, expected = -44819062) +# +# # result +# dataSourceTimeSeriesResult <- readr::read_csv(file = file.path(exportFolder, "time_series.csv"), +# col_types = readr::cols()) +# +# subset <- subsetToRequiredCohorts( +# cohorts = cohortDefinitionSet, +# task = "runDataSourceTimeSeries", +# incremental = incremental, +# recordKeepingFile = recordKeepingFile +# ) %>% +# dplyr::arrange(cohortId) +# +# testthat::expect_equal( +# object = nrow(subset), +# expected = 0 +# ) +# }) +# }) diff --git a/tests/testthat/test-runVisitContext.R b/tests/testthat/test-runVisitContext.R index 035cc0836..39e78990d 100644 --- a/tests/testthat/test-runVisitContext.R +++ b/tests/testthat/test-runVisitContext.R @@ -1,565 +1,565 @@ -library(SqlRender) -library(readxl) -library(dplyr) -library(readr) - - -for (nm in names(testServers)) { - - server <- testServers[[nm]] - - con <- DatabaseConnector::connect(server$connectionDetails) - - exportFolder <- tempfile() - - dir.create(exportFolder) - - test_that(paste("test temporary table #concept_ids creation"), { - - DatabaseConnector::disconnect(con) - con <- DatabaseConnector::connect(server$connectionDetails) - - getVisitContext(connection = con, - cdmDatabaseSchema = server$cdmDatabaseSchema, - tempEmulationSchema = server$tempEmulationSchema, - cohortDatabaseSchema = server$cohortDatabaseSchema, - cohortTable = server$cohortTable, - cohortIds = server$cohortIds, - conceptIdTable = "#concept_ids", - cdmVersion = 5 - ) - - expect_true(tempTableExists(con, "concept_ids")) - - }) - - - test_that(paste("test no duplicates in concept_ids table for getVisitContext function"), { - - DatabaseConnector::disconnect(con) - con <- DatabaseConnector::connect(server$connectionDetails) - - sql <- "SELECT * FROM #concept_ids" - - translatedSql <- translate(sql, targetDialect = server$connectionDetails$dbms) - - firstTime <- system.time( - - visitContextResult <- getVisitContext(connection = con, - cdmDatabaseSchema = server$cdmDatabaseSchema, - tempEmulationSchema = server$tempEmulationSchema, - cohortDatabaseSchema = server$cohortDatabaseSchema, - cohortTable = server$cohortTable, - cohortIds = server$cohortIds, - conceptIdTable = "#concept_ids", - cdmVersion = 5 - ) - ) - - firstResult <- querySql(con, translatedSql) - - secondTime <- system.time( - - visitContextResult <- getVisitContext(connection = con, - cdmDatabaseSchema = server$cdmDatabaseSchema, - tempEmulationSchema = server$tempEmulationSchema, - cohortDatabaseSchema = server$cohortDatabaseSchema, - cohortTable = server$cohortTable, - cohortIds = server$cohortIds, - conceptIdTable = "#concept_ids", - cdmVersion = 5 - ) - ) - - secondResult <- querySql(con, translatedSql) - - expect_equal(firstResult, secondResult) - - }) - - - # For testing the runVisitContext, there is no need to run it on multiple database systems - # since no sql other than the one included in the getVisitContext is executed. - if (nm == "sqlite"){ - - test_that(paste("test that when incremental is FALSE the incremental file is not generated"), { - - DatabaseConnector::disconnect(con) - con <- DatabaseConnector::connect(server$connectionDetails) - exportFolder <- tempfile() - dir.create(exportFolder) - - - expect_false(file.exists(file.path(exportFolder,"incremental"))) - - runVisitContext(connection = con, - cohortDefinitionSet = server$cohortDefinitionSet, - exportFolder = exportFolder, - databaseId = nm , - cohortDatabaseSchema = server$cohortDatabaseSchema, - cdmDatabaseSchema = server$cdmDatabaseSchema, - minCellCount = 0, - incremental = FALSE - ) - - expect_false(file.exists(file.path(exportFolder,"incremental"))) - }) - - test_that(paste("test that when incremental is TRUE the incremental file is generated when it doesn't exist"), { - - DatabaseConnector::disconnect(con) - con <- DatabaseConnector::connect(server$connectionDetails) - exportFolder <- tempfile() - dir.create(exportFolder) - - expect_false(file.exists(file.path(exportFolder, "incremental"))) - - runVisitContext(connection = con, - cohortDefinitionSet = server$cohortDefinitionSet, - exportFolder = exportFolder, - databaseId = nm , - cohortDatabaseSchema = server$cohortDatabaseSchema, - cdmDatabaseSchema = server$cdmDatabaseSchema, - minCellCount = 0, - incremental = TRUE - ) - - expect_true(file.exists(file.path(exportFolder, "incremental"))) - - }) - - - test_that(paste("test that the output file visit_context.csv is generated and is identical with the output of getVisitContext()"), { - - DatabaseConnector::disconnect(con) - con <- DatabaseConnector::connect(server$connectionDetails) - exportFolder <- tempfile() - dir.create(exportFolder) - - getVisitContextResult <- getVisitContext(connection = con, - cdmDatabaseSchema = server$cdmDatabaseSchema, - tempEmulationSchema = server$tempEmulationSchema, - cohortDatabaseSchema = server$cohortDatabaseSchema, - cohortTable = server$cohortTable, - cohortIds = server$cohortIds, - conceptIdTable = "#concept_ids", - cdmVersion = 5 - ) - - getVisitContextResult <- unname(getVisitContextResult) - - runVisitContext(connection = con, - cohortDefinitionSet = server$cohortDefinitionSet, - exportFolder = exportFolder, - databaseId = nm, - cohortTable = server$cohortTable, - cohortDatabaseSchema = server$cohortDatabaseSchema, - cdmDatabaseSchema = server$cdmDatabaseSchema, - minCellCount = 0, - incremental = FALSE - ) - - resultCsv <- file.path(exportFolder, "visit_context.csv") - - expect_true(file.exists(resultCsv)) - - runVisitContextResult <- read.csv(resultCsv, header = TRUE, sep = ",") - runVisitContextResult$database_id <- NULL - runVisitContextResult <- unname(runVisitContextResult) - - expect_equal(getVisitContextResult, runVisitContextResult) - - }) - - - test_that(paste("test that incremental logic is correct: incremental run for the first time"), { - - DatabaseConnector::disconnect(con) - con <- DatabaseConnector::connect(server$connectionDetails) - exportFolder <- tempfile() - dir.create(exportFolder) - - cohortIds <- c(17492) - - runVisitContext(connection = con, - cohortDefinitionSet = loadTestCohortDefinitionSet(cohortIds, useSubsets = FALSE), - exportFolder = exportFolder, - databaseId = nm, - cohortTable = server$cohortTable, - cohortDatabaseSchema = server$cohortDatabaseSchema, - cdmDatabaseSchema = server$cdmDatabaseSchema, - minCellCount = 0, - incremental = TRUE - ) - - resultCsv <- file.path(exportFolder, "visit_context.csv") - - expect_true(file.exists(resultCsv)) - - results <- read.csv(resultCsv, header = TRUE, stringsAsFactors = FALSE) - - # Csv should contain results only from the specified cohort - expect_equal(unique(results$cohort_id), c(17492)) - - }) - - test_that(paste("test that incremental logic is correct: no new cohorts"), { - - DatabaseConnector::disconnect(con) - con <- DatabaseConnector::connect(server$connectionDetails) - exportFolder <- tempfile() - dir.create(exportFolder) - - cohortIds <- c(17492) - - runVisitContext(connection = con, - cohortDefinitionSet = loadTestCohortDefinitionSet(cohortIds, useSubsets = FALSE), - exportFolder = exportFolder, - databaseId = nm, - cohortTable = server$cohortTable, - cohortDatabaseSchema = server$cohortDatabaseSchema, - cdmDatabaseSchema = server$cdmDatabaseSchema, - minCellCount = 0, - incremental = TRUE - ) - - resultCsv <- file.path(exportFolder, "visit_context.csv") - - expect_true(file.exists(resultCsv)) - - results1 <- read.csv(resultCsv, header = TRUE, stringsAsFactors = FALSE) - - runVisitContext(connection = con, - cohortDefinitionSet = loadTestCohortDefinitionSet(cohortIds, useSubsets = FALSE), - exportFolder = exportFolder, - databaseId = nm, - cohortTable = server$cohortTable, - cohortDatabaseSchema = server$cohortDatabaseSchema, - cdmDatabaseSchema = server$cdmDatabaseSchema, - minCellCount = 0, - incremental = TRUE - ) - - resultCsv <- file.path(exportFolder, "visit_context.csv") - - expect_true(file.exists(resultCsv)) - - results2 <- read.csv(resultCsv, header = TRUE, stringsAsFactors = FALSE) - - # Csv should contain the same result after the first run and the second run as no new cohorts were added - expect_equal(results1, results2) - - }) - - test_that(paste("test that incremental logic is correct: output visit_context.csv must contain results for new cohorts"), { - - DatabaseConnector::disconnect(con) - con <- DatabaseConnector::connect(server$connectionDetails) - exportFolder <- tempfile() - dir.create(exportFolder) - - cohortIds <- c(17492) - - runVisitContext(connection = con, - cohortDefinitionSet = loadTestCohortDefinitionSet(cohortIds, useSubsets = FALSE), - exportFolder = exportFolder, - databaseId = nm, - cohortTable = server$cohortTable, - cohortDatabaseSchema = server$cohortDatabaseSchema, - cdmDatabaseSchema = server$cdmDatabaseSchema, - minCellCount = 0, - incremental = TRUE - ) - - resultCsv <- file.path(exportFolder, "visit_context.csv") - - expect_true(file.exists(resultCsv)) - - results1 <- read.csv(resultCsv, header = TRUE, stringsAsFactors = FALSE) - - # Csv should contain results only from the specified cohort - expect_equal(unique(results1$cohort_id), c(17492)) - - cohortIds <- c(17492, 17493) - - runVisitContext(connection = con, - cohortDefinitionSet = loadTestCohortDefinitionSet(cohortIds, useSubsets = FALSE), - exportFolder = exportFolder, - databaseId = nm, - cohortTable = server$cohortTable, - cohortDatabaseSchema = server$cohortDatabaseSchema, - cdmDatabaseSchema = server$cdmDatabaseSchema, - minCellCount = 0, - incremental = TRUE - ) - - resultCsv <- file.path(exportFolder, "visit_context.csv") - - expect_true(file.exists(resultCsv)) - - results2 <- read.csv(resultCsv, header = TRUE, stringsAsFactors = FALSE) - - # Csv should contain results from both runs, hence both cohorts - expect_equal(unique(results2$cohort_id), c(17492, 17493)) - - }) - - test_that(paste("test that the export folder is created if is not already there"), { - - DatabaseConnector::disconnect(con) - con <- DatabaseConnector::connect(server$connectionDetails) - exportFolder <- tempfile() - - expect_false(dir.exists(exportFolder)) - - cohortIds <- c(17492) - - runVisitContext(connection = con, - cohortDefinitionSet = loadTestCohortDefinitionSet(cohortIds, useSubsets = FALSE), - exportFolder = exportFolder, - databaseId = nm, - cohortTable = server$cohortTable, - cohortDatabaseSchema = server$cohortDatabaseSchema, - cdmDatabaseSchema = server$cdmDatabaseSchema, - minCellCount = 0, - incremental = TRUE - ) - - expect_true(dir.exists(exportFolder)) - - }) - - } -} - - -##### Test cases with custom data ##### - -test_that(paste("test that the subject counts per cohort, visit concept and visit context are correct"), { - - cohortDataFilePath <- system.file("test_cases/runVisitContext/testSubjectCounts/test_getVisitContext_cohort.xlsx", package = "CohortDiagnostics") - - patientDataFilePath <- "test_cases/runVisitContext/testSubjectCounts/test_getVisitContext_patientData.json" - - connectionDetailsCustomCDM <- createCustomCdm(patientDataFilePath) - - - connection <- DatabaseConnector::connect(connectionDetails = connectionDetailsCustomCDM) - - addCohortTable(connection, cohortDataFilePath) - - - visitContextResult <- getVisitContext(connection = connection, - cdmDatabaseSchema = "main", - tempEmulationSchema = "main", - cohortDatabaseSchema = "main", - cohortTable = "cohort", - cohortIds = list(1,2), - conceptIdTable = "#concept_ids", - cdmVersion = 5 - ) - - resultPath <- system.file("test_cases/runVisitContext/testSubjectCounts/expectedResult.xlsx", package = "CohortDiagnostics") - - resultData <- readxl::read_excel(resultPath, col_types = c("numeric", "numeric", "text", - "numeric")) - - visitContextResult <- visitContextResult[order(visitContextResult$cohortId, visitContextResult$visitConceptId, visitContextResult$visitContext, visitContextResult$subjects), ] - visitContextResult <- as.data.frame(lapply(visitContextResult, as.character), stringsAsFactors = FALSE) - - resultData <- resultData[order(resultData$cohortId, resultData$visitConceptId, resultData$visitContext, resultData$subjects), ] - resultData <- as.data.frame(lapply(resultData, as.character), stringsAsFactors = FALSE) - - are_equal <- identical(visitContextResult, resultData) - - expect_true(are_equal) - -}) - -test_that(paste("test that only the new visit_concept_id are inserted into the #concept_ids table"), { - - cohortDataFilePath <- system.file("test_cases/runVisitContext/testSubjectCounts/test_getVisitContext_cohort.xlsx", package = "CohortDiagnostics") - - patientDataFilePath <- "test_cases/runVisitContext/testSubjectCounts/test_getVisitContext_patientData.json" - - connectionDetailsCustomCDM <- createCustomCdm(patientDataFilePath) - - connection <- DatabaseConnector::connect(connectionDetails = connectionDetailsCustomCDM) - - addCohortTable(connection, cohortDataFilePath) - - getVisitContext(connection = connection, - cdmDatabaseSchema = "main", - tempEmulationSchema = "main", - cohortDatabaseSchema = "main", - cohortTable = "cohort", - cohortIds = list(1,2), - conceptIdTable = "#concept_ids", - cdmVersion = 5 - ) - - sql <- "select * from #concept_ids" - - translatedSQL <- translate(sql, targetDialect = "sqlite") - - res1 <- querySql(connection = connection, sql = translatedSQL) - - - are_equal <- all(sort(unlist(list(262, 9201))) == sort(unlist(res1$CONCEPT_ID))) - - expect_true(are_equal) - - new_row <- data.frame( - visit_occurrence_id = 5, - person_id = 2, - visit_concept_id = 261, - visit_start_date = as.Date("2015-01-10"), - visit_start_datetime = as.POSIXct("2015-01-10 08:00:00", format = "%Y-%m-%d %H:%M:%S", tz = "UTC"), - visit_end_date = as.Date("2015-01-10"), - visit_end_datetime = as.POSIXct("2015-01-10 18:00:00", format = "%Y-%m-%d %H:%M:%S", tz = "UTC"), - visit_type_concept_id = 32817, - provider_id = 1, - care_site_id = 1, - visit_source_value = 0, - visit_source_concept_id = 0, - admitting_source_concept_id = 8870, - admitting_source_value = "TRANSFER FROM HOSPITAL", - discharge_to_concept_id = 581476, - discharge_to_source_value = "HOME HEALTH CARE", - preceding_visit_occurrence_id = 0 - ) - - DBI::dbAppendTable(connection, "visit_occurrence", new_row) - - getVisitContext(connection = connection, - cdmDatabaseSchema = "main", - tempEmulationSchema = "main", - cohortDatabaseSchema = "main", - cohortTable = "cohort", - cohortIds = list(1,2), - conceptIdTable = "#concept_ids", - cdmVersion = 5 - ) - - sql <- "select * from #concept_ids" - - translatedSQL <- translate(sql, targetDialect = "sqlite") - - res2 <- querySql(connection = connection, sql = translatedSQL) - - are_equal <- all(sort(unlist(list(262, 9201, 261))) == sort(unlist(res2$CONCEPT_ID))) - - expect_true(are_equal) -}) - - - -test_that(paste("test that to infer subject counts per cohort, visit concept, and visit context, visits within 30 days before or after cohort creation are considered"), { - - cohortDataFilePath <- system.file("test_cases/runVisitContext/testSubjectCountsDates/test_getVisitContext_cohort.xlsx", package = "CohortDiagnostics") - - patientDataFilePath <- "test_cases/runVisitContext/testSubjectCountsDates/test_getVisitContext_patientData.json" - - connectionDetailsCustomCDM <- createCustomCdm(patientDataFilePath) - - connection <- DatabaseConnector::connect(connectionDetails = connectionDetailsCustomCDM) - - addCohortTable(connection, cohortDataFilePath) - - visitContextResult <- getVisitContext(connection = connection, - cdmDatabaseSchema = "main", - tempEmulationSchema = "main", - cohortDatabaseSchema = "main", - cohortTable = "cohort", - cohortIds = list(1,2), - conceptIdTable = "#concept_ids", - cdmVersion = 5 - ) - - resultPath <- system.file("test_cases/runVisitContext/testSubjectCountsDates/expectedResult.xlsx", package = "CohortDiagnostics") - - resultData <- readxl::read_excel(resultPath, col_types = c("numeric", "numeric", "text", - "numeric")) - - visitContextResult <- visitContextResult[order(visitContextResult$cohortId, visitContextResult$visitConceptId, visitContextResult$visitContext, visitContextResult$subjects), ] - visitContextResult <- as.data.frame(lapply(visitContextResult, as.character), stringsAsFactors = FALSE) - - resultData <- resultData[order(resultData$cohortId, resultData$visitConceptId, resultData$visitContext, resultData$subjects), ] - resultData <- as.data.frame(lapply(resultData, as.character), stringsAsFactors = FALSE) - - are_equal <- identical(visitContextResult, resultData) - - expect_true(are_equal) - -}) - -test_that(paste("test that no other cohorts than the ones specified in cohortIds are included in the output"), { - - cohortDataFilePath <- system.file("test_cases/runVisitContext/testSubjectCounts/test_getVisitContext_cohort.xlsx", package = "CohortDiagnostics") - - patientDataFilePath <- "test_cases/runVisitContext/testSubjectCounts/test_getVisitContext_patientData.json" - - connectionDetailsCustomCDM <- createCustomCdm(patientDataFilePath) - - connection <- DatabaseConnector::connect(connectionDetails = connectionDetailsCustomCDM) - - addCohortTable(connection, cohortDataFilePath) - - visitContextResult <- getVisitContext(connection = connection, - cdmDatabaseSchema = "main", - tempEmulationSchema = "main", - cohortDatabaseSchema = "main", - cohortTable = "cohort", - cohortIds = list(1), - conceptIdTable = "#concept_ids", - cdmVersion = 5 - ) - - print(visitContextResult) - expect_true(identical(unique(visitContextResult$cohortId), c(1))) - -}) - -test_that(paste("test that when the subjects in the cohort have no visits an empty data frame is returned"), { - - cohortDataFilePath <- system.file("test_cases/runVisitContext/testSubjectCountsNoVisits/test_getVisitContext_cohort.xlsx", package = "CohortDiagnostics") - - patientDataFilePath <- "test_cases/runVisitContext/testSubjectCountsNoVisits/test_getVisitContext_patientData.json" - - connectionDetailsCustomCDM <- createCustomCdm(patientDataFilePath) - - connection <- DatabaseConnector::connect(connectionDetails = connectionDetailsCustomCDM) - - addCohortTable(connection, cohortDataFilePath) - - sql <- "delete from visit_occurrence;" - - translatedSQL <- translate(sql = sql, targetDialect = "sqlite") - - executeSql(connection = connection, sql = translatedSQL) - - visitContextResult <- getVisitContext(connection = connection, - cdmDatabaseSchema = "main", - tempEmulationSchema = "main", - cohortDatabaseSchema = "main", - cohortTable = "cohort", - cohortIds = list(1,2), - conceptIdTable = "#concept_ids", - cdmVersion = 5 - ) - - resultPath <- system.file("test_cases/runVisitContext/testSubjectCountsNoVisits/expectedResult.xlsx", package = "CohortDiagnostics") - - resultData <- readxl::read_excel(resultPath, col_types = c("numeric", "numeric", "text", - "numeric")) - - visitContextResult <- visitContextResult[order(visitContextResult$cohortId, visitContextResult$visitConceptId, visitContextResult$visitContext, visitContextResult$subjects), ] - visitContextResult <- as.data.frame(lapply(visitContextResult, as.character), stringsAsFactors = FALSE) - - resultData <- resultData[order(resultData$cohortId, resultData$visitConceptId, resultData$visitContext, resultData$subjects), ] - resultData <- as.data.frame(lapply(resultData, as.character), stringsAsFactors = FALSE) - - are_equal <- identical(visitContextResult, resultData) - - expect_true(are_equal) -}) +# library(SqlRender) +# library(readxl) +# library(dplyr) +# library(readr) +# +# +# for (nm in names(testServers)) { +# +# server <- testServers[[nm]] +# +# con <- DatabaseConnector::connect(server$connectionDetails) +# +# exportFolder <- tempfile() +# +# dir.create(exportFolder) +# +# test_that(paste("test temporary table #concept_ids creation"), { +# +# DatabaseConnector::disconnect(con) +# con <- DatabaseConnector::connect(server$connectionDetails) +# +# getVisitContext(connection = con, +# cdmDatabaseSchema = server$cdmDatabaseSchema, +# tempEmulationSchema = server$tempEmulationSchema, +# cohortDatabaseSchema = server$cohortDatabaseSchema, +# cohortTable = server$cohortTable, +# cohortIds = server$cohortIds, +# conceptIdTable = "#concept_ids", +# cdmVersion = 5 +# ) +# +# expect_true(tempTableExists(con, "concept_ids")) +# +# }) +# +# +# test_that(paste("test no duplicates in concept_ids table for getVisitContext function"), { +# +# DatabaseConnector::disconnect(con) +# con <- DatabaseConnector::connect(server$connectionDetails) +# +# sql <- "SELECT * FROM #concept_ids" +# +# translatedSql <- translate(sql, targetDialect = server$connectionDetails$dbms) +# +# firstTime <- system.time( +# +# visitContextResult <- getVisitContext(connection = con, +# cdmDatabaseSchema = server$cdmDatabaseSchema, +# tempEmulationSchema = server$tempEmulationSchema, +# cohortDatabaseSchema = server$cohortDatabaseSchema, +# cohortTable = server$cohortTable, +# cohortIds = server$cohortIds, +# conceptIdTable = "#concept_ids", +# cdmVersion = 5 +# ) +# ) +# +# firstResult <- querySql(con, translatedSql) +# +# secondTime <- system.time( +# +# visitContextResult <- getVisitContext(connection = con, +# cdmDatabaseSchema = server$cdmDatabaseSchema, +# tempEmulationSchema = server$tempEmulationSchema, +# cohortDatabaseSchema = server$cohortDatabaseSchema, +# cohortTable = server$cohortTable, +# cohortIds = server$cohortIds, +# conceptIdTable = "#concept_ids", +# cdmVersion = 5 +# ) +# ) +# +# secondResult <- querySql(con, translatedSql) +# +# expect_equal(firstResult, secondResult) +# +# }) +# +# +# # For testing the runVisitContext, there is no need to run it on multiple database systems +# # since no sql other than the one included in the getVisitContext is executed. +# if (nm == "sqlite"){ +# +# test_that(paste("test that when incremental is FALSE the incremental file is not generated"), { +# +# DatabaseConnector::disconnect(con) +# con <- DatabaseConnector::connect(server$connectionDetails) +# exportFolder <- tempfile() +# dir.create(exportFolder) +# +# +# expect_false(file.exists(file.path(exportFolder,"incremental"))) +# +# runVisitContext(connection = con, +# cohortDefinitionSet = server$cohortDefinitionSet, +# exportFolder = exportFolder, +# databaseId = nm , +# cohortDatabaseSchema = server$cohortDatabaseSchema, +# cdmDatabaseSchema = server$cdmDatabaseSchema, +# minCellCount = 0, +# incremental = FALSE +# ) +# +# expect_false(file.exists(file.path(exportFolder,"incremental"))) +# }) +# +# test_that(paste("test that when incremental is TRUE the incremental file is generated when it doesn't exist"), { +# +# DatabaseConnector::disconnect(con) +# con <- DatabaseConnector::connect(server$connectionDetails) +# exportFolder <- tempfile() +# dir.create(exportFolder) +# +# expect_false(file.exists(file.path(exportFolder, "incremental"))) +# +# runVisitContext(connection = con, +# cohortDefinitionSet = server$cohortDefinitionSet, +# exportFolder = exportFolder, +# databaseId = nm , +# cohortDatabaseSchema = server$cohortDatabaseSchema, +# cdmDatabaseSchema = server$cdmDatabaseSchema, +# minCellCount = 0, +# incremental = TRUE +# ) +# +# expect_true(file.exists(file.path(exportFolder, "incremental"))) +# +# }) +# +# +# test_that(paste("test that the output file visit_context.csv is generated and is identical with the output of getVisitContext()"), { +# +# DatabaseConnector::disconnect(con) +# con <- DatabaseConnector::connect(server$connectionDetails) +# exportFolder <- tempfile() +# dir.create(exportFolder) +# +# getVisitContextResult <- getVisitContext(connection = con, +# cdmDatabaseSchema = server$cdmDatabaseSchema, +# tempEmulationSchema = server$tempEmulationSchema, +# cohortDatabaseSchema = server$cohortDatabaseSchema, +# cohortTable = server$cohortTable, +# cohortIds = server$cohortIds, +# conceptIdTable = "#concept_ids", +# cdmVersion = 5 +# ) +# +# getVisitContextResult <- unname(getVisitContextResult) +# +# runVisitContext(connection = con, +# cohortDefinitionSet = server$cohortDefinitionSet, +# exportFolder = exportFolder, +# databaseId = nm, +# cohortTable = server$cohortTable, +# cohortDatabaseSchema = server$cohortDatabaseSchema, +# cdmDatabaseSchema = server$cdmDatabaseSchema, +# minCellCount = 0, +# incremental = FALSE +# ) +# +# resultCsv <- file.path(exportFolder, "visit_context.csv") +# +# expect_true(file.exists(resultCsv)) +# +# runVisitContextResult <- read.csv(resultCsv, header = TRUE, sep = ",") +# runVisitContextResult$database_id <- NULL +# runVisitContextResult <- unname(runVisitContextResult) +# +# expect_equal(getVisitContextResult, runVisitContextResult) +# +# }) +# +# +# test_that(paste("test that incremental logic is correct: incremental run for the first time"), { +# +# DatabaseConnector::disconnect(con) +# con <- DatabaseConnector::connect(server$connectionDetails) +# exportFolder <- tempfile() +# dir.create(exportFolder) +# +# cohortIds <- c(17492) +# +# runVisitContext(connection = con, +# cohortDefinitionSet = loadTestCohortDefinitionSet(cohortIds, useSubsets = FALSE), +# exportFolder = exportFolder, +# databaseId = nm, +# cohortTable = server$cohortTable, +# cohortDatabaseSchema = server$cohortDatabaseSchema, +# cdmDatabaseSchema = server$cdmDatabaseSchema, +# minCellCount = 0, +# incremental = TRUE +# ) +# +# resultCsv <- file.path(exportFolder, "visit_context.csv") +# +# expect_true(file.exists(resultCsv)) +# +# results <- read.csv(resultCsv, header = TRUE, stringsAsFactors = FALSE) +# +# # Csv should contain results only from the specified cohort +# expect_equal(unique(results$cohort_id), c(17492)) +# +# }) +# +# test_that(paste("test that incremental logic is correct: no new cohorts"), { +# +# DatabaseConnector::disconnect(con) +# con <- DatabaseConnector::connect(server$connectionDetails) +# exportFolder <- tempfile() +# dir.create(exportFolder) +# +# cohortIds <- c(17492) +# +# runVisitContext(connection = con, +# cohortDefinitionSet = loadTestCohortDefinitionSet(cohortIds, useSubsets = FALSE), +# exportFolder = exportFolder, +# databaseId = nm, +# cohortTable = server$cohortTable, +# cohortDatabaseSchema = server$cohortDatabaseSchema, +# cdmDatabaseSchema = server$cdmDatabaseSchema, +# minCellCount = 0, +# incremental = TRUE +# ) +# +# resultCsv <- file.path(exportFolder, "visit_context.csv") +# +# expect_true(file.exists(resultCsv)) +# +# results1 <- read.csv(resultCsv, header = TRUE, stringsAsFactors = FALSE) +# +# runVisitContext(connection = con, +# cohortDefinitionSet = loadTestCohortDefinitionSet(cohortIds, useSubsets = FALSE), +# exportFolder = exportFolder, +# databaseId = nm, +# cohortTable = server$cohortTable, +# cohortDatabaseSchema = server$cohortDatabaseSchema, +# cdmDatabaseSchema = server$cdmDatabaseSchema, +# minCellCount = 0, +# incremental = TRUE +# ) +# +# resultCsv <- file.path(exportFolder, "visit_context.csv") +# +# expect_true(file.exists(resultCsv)) +# +# results2 <- read.csv(resultCsv, header = TRUE, stringsAsFactors = FALSE) +# +# # Csv should contain the same result after the first run and the second run as no new cohorts were added +# expect_equal(results1, results2) +# +# }) +# +# test_that(paste("test that incremental logic is correct: output visit_context.csv must contain results for new cohorts"), { +# +# DatabaseConnector::disconnect(con) +# con <- DatabaseConnector::connect(server$connectionDetails) +# exportFolder <- tempfile() +# dir.create(exportFolder) +# +# cohortIds <- c(17492) +# +# runVisitContext(connection = con, +# cohortDefinitionSet = loadTestCohortDefinitionSet(cohortIds, useSubsets = FALSE), +# exportFolder = exportFolder, +# databaseId = nm, +# cohortTable = server$cohortTable, +# cohortDatabaseSchema = server$cohortDatabaseSchema, +# cdmDatabaseSchema = server$cdmDatabaseSchema, +# minCellCount = 0, +# incremental = TRUE +# ) +# +# resultCsv <- file.path(exportFolder, "visit_context.csv") +# +# expect_true(file.exists(resultCsv)) +# +# results1 <- read.csv(resultCsv, header = TRUE, stringsAsFactors = FALSE) +# +# # Csv should contain results only from the specified cohort +# expect_equal(unique(results1$cohort_id), c(17492)) +# +# cohortIds <- c(17492, 17493) +# +# runVisitContext(connection = con, +# cohortDefinitionSet = loadTestCohortDefinitionSet(cohortIds, useSubsets = FALSE), +# exportFolder = exportFolder, +# databaseId = nm, +# cohortTable = server$cohortTable, +# cohortDatabaseSchema = server$cohortDatabaseSchema, +# cdmDatabaseSchema = server$cdmDatabaseSchema, +# minCellCount = 0, +# incremental = TRUE +# ) +# +# resultCsv <- file.path(exportFolder, "visit_context.csv") +# +# expect_true(file.exists(resultCsv)) +# +# results2 <- read.csv(resultCsv, header = TRUE, stringsAsFactors = FALSE) +# +# # Csv should contain results from both runs, hence both cohorts +# expect_equal(unique(results2$cohort_id), c(17492, 17493)) +# +# }) +# +# test_that(paste("test that the export folder is created if is not already there"), { +# +# DatabaseConnector::disconnect(con) +# con <- DatabaseConnector::connect(server$connectionDetails) +# exportFolder <- tempfile() +# +# expect_false(dir.exists(exportFolder)) +# +# cohortIds <- c(17492) +# +# runVisitContext(connection = con, +# cohortDefinitionSet = loadTestCohortDefinitionSet(cohortIds, useSubsets = FALSE), +# exportFolder = exportFolder, +# databaseId = nm, +# cohortTable = server$cohortTable, +# cohortDatabaseSchema = server$cohortDatabaseSchema, +# cdmDatabaseSchema = server$cdmDatabaseSchema, +# minCellCount = 0, +# incremental = TRUE +# ) +# +# expect_true(dir.exists(exportFolder)) +# +# }) +# +# } +# } +# +# +# ##### Test cases with custom data ##### +# +# test_that(paste("test that the subject counts per cohort, visit concept and visit context are correct"), { +# +# cohortDataFilePath <- system.file("test_cases/runVisitContext/testSubjectCounts/test_getVisitContext_cohort.xlsx", package = "CohortDiagnostics") +# +# patientDataFilePath <- "test_cases/runVisitContext/testSubjectCounts/test_getVisitContext_patientData.json" +# +# connectionDetailsCustomCDM <- createCustomCdm(patientDataFilePath) +# +# +# connection <- DatabaseConnector::connect(connectionDetails = connectionDetailsCustomCDM) +# +# addCohortTable(connection, cohortDataFilePath) +# +# +# visitContextResult <- getVisitContext(connection = connection, +# cdmDatabaseSchema = "main", +# tempEmulationSchema = "main", +# cohortDatabaseSchema = "main", +# cohortTable = "cohort", +# cohortIds = list(1,2), +# conceptIdTable = "#concept_ids", +# cdmVersion = 5 +# ) +# +# resultPath <- system.file("test_cases/runVisitContext/testSubjectCounts/expectedResult.xlsx", package = "CohortDiagnostics") +# +# resultData <- readxl::read_excel(resultPath, col_types = c("numeric", "numeric", "text", +# "numeric")) +# +# visitContextResult <- visitContextResult[order(visitContextResult$cohortId, visitContextResult$visitConceptId, visitContextResult$visitContext, visitContextResult$subjects), ] +# visitContextResult <- as.data.frame(lapply(visitContextResult, as.character), stringsAsFactors = FALSE) +# +# resultData <- resultData[order(resultData$cohortId, resultData$visitConceptId, resultData$visitContext, resultData$subjects), ] +# resultData <- as.data.frame(lapply(resultData, as.character), stringsAsFactors = FALSE) +# +# are_equal <- identical(visitContextResult, resultData) +# +# expect_true(are_equal) +# +# }) +# +# test_that(paste("test that only the new visit_concept_id are inserted into the #concept_ids table"), { +# +# cohortDataFilePath <- system.file("test_cases/runVisitContext/testSubjectCounts/test_getVisitContext_cohort.xlsx", package = "CohortDiagnostics") +# +# patientDataFilePath <- "test_cases/runVisitContext/testSubjectCounts/test_getVisitContext_patientData.json" +# +# connectionDetailsCustomCDM <- createCustomCdm(patientDataFilePath) +# +# connection <- DatabaseConnector::connect(connectionDetails = connectionDetailsCustomCDM) +# +# addCohortTable(connection, cohortDataFilePath) +# +# getVisitContext(connection = connection, +# cdmDatabaseSchema = "main", +# tempEmulationSchema = "main", +# cohortDatabaseSchema = "main", +# cohortTable = "cohort", +# cohortIds = list(1,2), +# conceptIdTable = "#concept_ids", +# cdmVersion = 5 +# ) +# +# sql <- "select * from #concept_ids" +# +# translatedSQL <- translate(sql, targetDialect = "sqlite") +# +# res1 <- querySql(connection = connection, sql = translatedSQL) +# +# +# are_equal <- all(sort(unlist(list(262, 9201))) == sort(unlist(res1$CONCEPT_ID))) +# +# expect_true(are_equal) +# +# new_row <- data.frame( +# visit_occurrence_id = 5, +# person_id = 2, +# visit_concept_id = 261, +# visit_start_date = as.Date("2015-01-10"), +# visit_start_datetime = as.POSIXct("2015-01-10 08:00:00", format = "%Y-%m-%d %H:%M:%S", tz = "UTC"), +# visit_end_date = as.Date("2015-01-10"), +# visit_end_datetime = as.POSIXct("2015-01-10 18:00:00", format = "%Y-%m-%d %H:%M:%S", tz = "UTC"), +# visit_type_concept_id = 32817, +# provider_id = 1, +# care_site_id = 1, +# visit_source_value = 0, +# visit_source_concept_id = 0, +# admitting_source_concept_id = 8870, +# admitting_source_value = "TRANSFER FROM HOSPITAL", +# discharge_to_concept_id = 581476, +# discharge_to_source_value = "HOME HEALTH CARE", +# preceding_visit_occurrence_id = 0 +# ) +# +# DBI::dbAppendTable(connection, "visit_occurrence", new_row) +# +# getVisitContext(connection = connection, +# cdmDatabaseSchema = "main", +# tempEmulationSchema = "main", +# cohortDatabaseSchema = "main", +# cohortTable = "cohort", +# cohortIds = list(1,2), +# conceptIdTable = "#concept_ids", +# cdmVersion = 5 +# ) +# +# sql <- "select * from #concept_ids" +# +# translatedSQL <- translate(sql, targetDialect = "sqlite") +# +# res2 <- querySql(connection = connection, sql = translatedSQL) +# +# are_equal <- all(sort(unlist(list(262, 9201, 261))) == sort(unlist(res2$CONCEPT_ID))) +# +# expect_true(are_equal) +# }) +# +# +# +# test_that(paste("test that to infer subject counts per cohort, visit concept, and visit context, visits within 30 days before or after cohort creation are considered"), { +# +# cohortDataFilePath <- system.file("test_cases/runVisitContext/testSubjectCountsDates/test_getVisitContext_cohort.xlsx", package = "CohortDiagnostics") +# +# patientDataFilePath <- "test_cases/runVisitContext/testSubjectCountsDates/test_getVisitContext_patientData.json" +# +# connectionDetailsCustomCDM <- createCustomCdm(patientDataFilePath) +# +# connection <- DatabaseConnector::connect(connectionDetails = connectionDetailsCustomCDM) +# +# addCohortTable(connection, cohortDataFilePath) +# +# visitContextResult <- getVisitContext(connection = connection, +# cdmDatabaseSchema = "main", +# tempEmulationSchema = "main", +# cohortDatabaseSchema = "main", +# cohortTable = "cohort", +# cohortIds = list(1,2), +# conceptIdTable = "#concept_ids", +# cdmVersion = 5 +# ) +# +# resultPath <- system.file("test_cases/runVisitContext/testSubjectCountsDates/expectedResult.xlsx", package = "CohortDiagnostics") +# +# resultData <- readxl::read_excel(resultPath, col_types = c("numeric", "numeric", "text", +# "numeric")) +# +# visitContextResult <- visitContextResult[order(visitContextResult$cohortId, visitContextResult$visitConceptId, visitContextResult$visitContext, visitContextResult$subjects), ] +# visitContextResult <- as.data.frame(lapply(visitContextResult, as.character), stringsAsFactors = FALSE) +# +# resultData <- resultData[order(resultData$cohortId, resultData$visitConceptId, resultData$visitContext, resultData$subjects), ] +# resultData <- as.data.frame(lapply(resultData, as.character), stringsAsFactors = FALSE) +# +# are_equal <- identical(visitContextResult, resultData) +# +# expect_true(are_equal) +# +# }) +# +# test_that(paste("test that no other cohorts than the ones specified in cohortIds are included in the output"), { +# +# cohortDataFilePath <- system.file("test_cases/runVisitContext/testSubjectCounts/test_getVisitContext_cohort.xlsx", package = "CohortDiagnostics") +# +# patientDataFilePath <- "test_cases/runVisitContext/testSubjectCounts/test_getVisitContext_patientData.json" +# +# connectionDetailsCustomCDM <- createCustomCdm(patientDataFilePath) +# +# connection <- DatabaseConnector::connect(connectionDetails = connectionDetailsCustomCDM) +# +# addCohortTable(connection, cohortDataFilePath) +# +# visitContextResult <- getVisitContext(connection = connection, +# cdmDatabaseSchema = "main", +# tempEmulationSchema = "main", +# cohortDatabaseSchema = "main", +# cohortTable = "cohort", +# cohortIds = list(1), +# conceptIdTable = "#concept_ids", +# cdmVersion = 5 +# ) +# +# print(visitContextResult) +# expect_true(identical(unique(visitContextResult$cohortId), c(1))) +# +# }) +# +# test_that(paste("test that when the subjects in the cohort have no visits an empty data frame is returned"), { +# +# cohortDataFilePath <- system.file("test_cases/runVisitContext/testSubjectCountsNoVisits/test_getVisitContext_cohort.xlsx", package = "CohortDiagnostics") +# +# patientDataFilePath <- "test_cases/runVisitContext/testSubjectCountsNoVisits/test_getVisitContext_patientData.json" +# +# connectionDetailsCustomCDM <- createCustomCdm(patientDataFilePath) +# +# connection <- DatabaseConnector::connect(connectionDetails = connectionDetailsCustomCDM) +# +# addCohortTable(connection, cohortDataFilePath) +# +# sql <- "delete from visit_occurrence;" +# +# translatedSQL <- translate(sql = sql, targetDialect = "sqlite") +# +# executeSql(connection = connection, sql = translatedSQL) +# +# visitContextResult <- getVisitContext(connection = connection, +# cdmDatabaseSchema = "main", +# tempEmulationSchema = "main", +# cohortDatabaseSchema = "main", +# cohortTable = "cohort", +# cohortIds = list(1,2), +# conceptIdTable = "#concept_ids", +# cdmVersion = 5 +# ) +# +# resultPath <- system.file("test_cases/runVisitContext/testSubjectCountsNoVisits/expectedResult.xlsx", package = "CohortDiagnostics") +# +# resultData <- readxl::read_excel(resultPath, col_types = c("numeric", "numeric", "text", +# "numeric")) +# +# visitContextResult <- visitContextResult[order(visitContextResult$cohortId, visitContextResult$visitConceptId, visitContextResult$visitContext, visitContextResult$subjects), ] +# visitContextResult <- as.data.frame(lapply(visitContextResult, as.character), stringsAsFactors = FALSE) +# +# resultData <- resultData[order(resultData$cohortId, resultData$visitConceptId, resultData$visitContext, resultData$subjects), ] +# resultData <- as.data.frame(lapply(resultData, as.character), stringsAsFactors = FALSE) +# +# are_equal <- identical(visitContextResult, resultData) +# +# expect_true(are_equal) +# }) From 13862bcaa97fd173e13ce17458d727936ceae713 Mon Sep 17 00:00:00 2001 From: Ger Inberg Date: Fri, 11 Oct 2024 09:43:44 +0200 Subject: [PATCH 08/18] doc --- R/runTemporalCohortCharacterization.R | 37 +++++++++++++-------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/R/runTemporalCohortCharacterization.R b/R/runTemporalCohortCharacterization.R index c54b85624..9ae629662 100644 --- a/R/runTemporalCohortCharacterization.R +++ b/R/runTemporalCohortCharacterization.R @@ -321,21 +321,19 @@ getCohortCharacteristics <- function(connection = NULL, #' @template recordKeepingFile #' @template batchSize #' -#' @param cohorts cohorts -#' @param cohortCounts A dataframe with the cohort counts -#' @param covariateSettings Either an object of type \code{covariateSettings} as created using one of -#' the createTemporalCovariateSettings function in the FeatureExtraction package, or a list -#' of such objects. -#' @param task Name of this task -#' @param jobName Name of this job -#' @param covariateValueFileName Filename of the covariate value output -#' @param covariateValueContFileName Filename of the contineous covariate output -#' @param covariateRefFileName Filename of the covariate reference -#' @param analysisRefFileName Filename of the analysis reference -#' @param timeRefFileName Filename of the time reference -#' @param minCharacterizationMean The minimum mean value for characterization output. Values below this will be cut off from output. This -#' will help reduce the file size of the characterization output, but will remove information -#' on covariates that have very low values. The default is 0.001 (i.e. 0.1 percent) +#' @param cohorts The cohorts for which the covariates need to be obtained +#' @param cohortCounts A dataframe with the cohort counts +#' @param covariateSettings Either an object of type \code{covariateSettings} as created using one of +#' the createTemporalCovariateSettings function in the FeatureExtraction package, or a list +#' of such objects. +#' @param covariateValueFileName Filename of the binary covariates output +#' @param covariateValueContFileName Filename of the continuous covariate output +#' @param covariateRefFileName Filename of the covariate reference output +#' @param analysisRefFileName Filename of the analysis reference output +#' @param timeRefFileName Filename of the time reference output +#' @param minCharacterizationMean The minimum mean value for characterization output. Values below this will be cut off from output. This +#' will help reduce the file size of the characterization output, but will remove information +#' on covariates that have very low values. The default is 0.001 (i.e. 0.1 percent) #' #' @return None, it will write results to disk #' @export @@ -356,8 +354,6 @@ runTemporalCohortCharacterization <- function(connection, instantiatedCohorts, incremental, recordKeepingFile, - task = "runTemporalCohortCharacterization", - jobName = "Temporal Cohort characterization", covariateValueFileName = file.path(exportFolder, "temporal_covariate_value.csv"), covariateValueContFileName = file.path(exportFolder, "temporal_covariate_value_dist.csv"), covariateRefFileName = file.path(exportFolder, "temporal_covariate_ref.csv"), @@ -365,8 +361,11 @@ runTemporalCohortCharacterization <- function(connection, timeRefFileName = file.path(exportFolder, "temporal_time_ref.csv"), minCharacterizationMean = 0.001, batchSize = getOption("CohortDiagnostics-FE-batch-size", default = 20)) { + jobName <- "Temporal Cohort characterization" + task <- "runTemporalCohortCharacterization" ParallelLogger::logInfo("Running ", jobName) startCohortCharacterization <- Sys.time() + subset <- subsetToRequiredCohorts( cohorts = cohorts %>% dplyr::filter(.data$cohortId %in% instantiatedCohorts), @@ -400,6 +399,7 @@ runTemporalCohortCharacterization <- function(connection, nrow(subset) )) + # Processing cohorts loop for (start in seq(1, nrow(subset), by = batchSize)) { end <- min(start + batchSize - 1, nrow(subset)) if (nrow(subset) > batchSize) { @@ -413,8 +413,7 @@ runTemporalCohortCharacterization <- function(connection, ) } - characteristics <- - getCohortCharacteristics( + characteristics <- getCohortCharacteristics( connection = connection, cdmDatabaseSchema = cdmDatabaseSchema, tempEmulationSchema = tempEmulationSchema, From 6ad2044b8923a2141b7f23b6262f41b588d2477d Mon Sep 17 00:00:00 2001 From: Ger Inberg Date: Fri, 11 Oct 2024 10:03:25 +0200 Subject: [PATCH 09/18] refactoring --- R/runTemporalCohortCharacterization.R | 162 ++++++++++++++------------ 1 file changed, 86 insertions(+), 76 deletions(-) diff --git a/R/runTemporalCohortCharacterization.R b/R/runTemporalCohortCharacterization.R index 9ae629662..4cb87e661 100644 --- a/R/runTemporalCohortCharacterization.R +++ b/R/runTemporalCohortCharacterization.R @@ -110,77 +110,16 @@ exportCharacterization <- function(characteristics, } } - -getCohortCharacteristics <- function(connection = NULL, - cdmDatabaseSchema, - tempEmulationSchema = NULL, - cohortDatabaseSchema = cdmDatabaseSchema, - cohortTable = "cohort", - cohortIds, - cdmVersion = 5, - covariateSettings, - exportFolder, - minCharacterizationMean = 0.001) { - startTime <- Sys.time() - results <- Andromeda::andromeda() - timeExecution( - exportFolder, - taskName = "getDbCovariateData", - parent = "getCohortCharacteristics", - cohortIds = cohortIds, - expr = { - featureExtractionOutput <- - FeatureExtraction::getDbCovariateData( - connection = connection, - oracleTempSchema = tempEmulationSchema, - cdmDatabaseSchema = cdmDatabaseSchema, - cohortDatabaseSchema = cohortDatabaseSchema, - cdmVersion = cdmVersion, - cohortTable = cohortTable, - cohortIds = cohortIds, - covariateSettings = covariateSettings, - aggregated = TRUE, - minCharacterizationMean = minCharacterizationMean - ) - } - ) - populationSize <- - attr(x = featureExtractionOutput, which = "metaData")$populationSize - populationSize <- - dplyr::tibble( - cohortId = names(populationSize) %>% as.numeric(), - populationSize = populationSize - ) - - if (!"analysisRef" %in% names(results)) { - results$analysisRef <- featureExtractionOutput$analysisRef - } - if (!"covariateRef" %in% names(results)) { - results$covariateRef <- featureExtractionOutput$covariateRef - } else { - covariateIds <- results$covariateRef %>% - dplyr::select("covariateId") - Andromeda::appendToTable( - results$covariateRef, - featureExtractionOutput$covariateRef %>% - dplyr::anti_join(covariateIds, by = "covariateId", copy = TRUE) - ) - } - if ("timeRef" %in% names(featureExtractionOutput) && - !"timeRef" %in% names(results)) { - results$timeRef <- featureExtractionOutput$timeRef - } - - if ("covariates" %in% names(featureExtractionOutput) && - dplyr::pull(dplyr::count(featureExtractionOutput$covariates)) > 0) { +mutateCovariateOutput <- function(results, featureExtractionOutput, populationSize, binary) { + if (binary) { covariates <- featureExtractionOutput$covariates %>% dplyr::rename("cohortId" = "cohortDefinitionId") %>% dplyr::left_join(populationSize, by = "cohortId", copy = TRUE) %>% dplyr::mutate("p" = .data$sumValue / populationSize) - + if (nrow(covariates %>% - dplyr::filter(.data$p > 1) %>% - dplyr::collect()) > 0) { + dplyr::filter(.data$p > 1) %>% + dplyr::collect()) > 0) { stop( paste0( "During characterization, population size (denominator) was found to be smaller than features Value (numerator).", @@ -188,13 +127,13 @@ getCohortCharacteristics <- function(connection = NULL, ) ) } - + covariates <- covariates %>% dplyr::mutate("sd" = sqrt(.data$p * (1 - .data$p))) %>% dplyr::select(-"p") %>% dplyr::rename("mean" = "averageValue") %>% dplyr::select(-populationSize) - + if (FeatureExtraction::isTemporalCovariateData(featureExtractionOutput)) { covariates <- covariates %>% dplyr::select( @@ -205,12 +144,12 @@ getCohortCharacteristics <- function(connection = NULL, "mean", "sd" ) - + tidNaCount <- covariates %>% dplyr::filter(is.na(.data$timeId)) %>% dplyr::count() %>% dplyr::pull() - + if (tidNaCount > 0) { covariates <- covariates %>% dplyr::mutate(timeId = dplyr::if_else(is.na(.data$timeId), -1, .data$timeId)) @@ -232,10 +171,7 @@ getCohortCharacteristics <- function(connection = NULL, } else { results$covariates <- covariates } - } - - if ("covariatesContinuous" %in% names(featureExtractionOutput) && - dplyr::pull(dplyr::count(featureExtractionOutput$covariatesContinuous)) > 0) { + } else { covariates <- featureExtractionOutput$covariatesContinuous %>% dplyr::rename( "mean" = "averageValue", @@ -254,12 +190,12 @@ getCohortCharacteristics <- function(connection = NULL, "mean", "sd" ) - + tidNaCount <- covariates %>% dplyr::filter(is.na(.data$timeId)) %>% dplyr::count() %>% dplyr::pull() - + if (tidNaCount > 0) { covariates <- covariates %>% dplyr::mutate("timeId" = dplyr::if_else(is.na(.data$timeId), -1, .data$timeId)) @@ -290,6 +226,80 @@ getCohortCharacteristics <- function(connection = NULL, results$covariatesContinuous <- covariatesContinuous } } + return(results) +} + +getCohortCharacteristics <- function(connection = NULL, + cdmDatabaseSchema, + tempEmulationSchema = NULL, + cohortDatabaseSchema = cdmDatabaseSchema, + cohortTable = "cohort", + cohortIds, + cdmVersion = 5, + covariateSettings, + exportFolder, + minCharacterizationMean = 0.001) { + startTime <- Sys.time() + results <- Andromeda::andromeda() + timeExecution( + exportFolder, + taskName = "getDbCovariateData", + parent = "getCohortCharacteristics", + cohortIds = cohortIds, + expr = { + featureExtractionOutput <- + FeatureExtraction::getDbCovariateData( + connection = connection, + oracleTempSchema = tempEmulationSchema, + cdmDatabaseSchema = cdmDatabaseSchema, + cohortDatabaseSchema = cohortDatabaseSchema, + cdmVersion = cdmVersion, + cohortTable = cohortTable, + cohortIds = cohortIds, + covariateSettings = covariateSettings, + aggregated = TRUE, + minCharacterizationMean = minCharacterizationMean + ) + } + ) + populationSize <- + attr(x = featureExtractionOutput, which = "metaData")$populationSize + populationSize <- + dplyr::tibble( + cohortId = names(populationSize) %>% as.numeric(), + populationSize = populationSize + ) + + if (!"analysisRef" %in% names(results)) { + results$analysisRef <- featureExtractionOutput$analysisRef + } + if (!"covariateRef" %in% names(results)) { + results$covariateRef <- featureExtractionOutput$covariateRef + } else { + covariateIds <- results$covariateRef %>% + dplyr::select("covariateId") + Andromeda::appendToTable( + results$covariateRef, + featureExtractionOutput$covariateRef %>% + dplyr::anti_join(covariateIds, by = "covariateId", copy = TRUE) + ) + } + if ("timeRef" %in% names(featureExtractionOutput) && + !"timeRef" %in% names(results)) { + results$timeRef <- featureExtractionOutput$timeRef + } + + if ("covariates" %in% names(featureExtractionOutput) && + dplyr::pull(dplyr::count(featureExtractionOutput$covariates)) > 0) { + + results <- mutateCovariateOutput(results, featureExtractionOutput, populationSize, binary = TRUE) + } + + if ("covariatesContinuous" %in% names(featureExtractionOutput) && + dplyr::pull(dplyr::count(featureExtractionOutput$covariatesContinuous)) > 0) { + + results <- mutateCovariateOutput(results, featureExtractionOutput, populationSize, binary = FALSE) + } delta <- Sys.time() - startTime ParallelLogger::logInfo( From 6a60f2c31a5510e1d2c484ddf0e582d8827987d5 Mon Sep 17 00:00:00 2001 From: Ger Inberg Date: Fri, 11 Oct 2024 11:01:06 +0200 Subject: [PATCH 10/18] refactoring --- NAMESPACE | 3 +- R/runTemporalCohortCharacterization.R | 485 ------------------ man/runBreakdownIndexEvents.Rd | 79 ++- man/runCohortCharacterization.Rd | 136 +++++ ...Characterization.Rd => runVisitContext.Rd} | 84 +-- ...ion.R => test-runCohortCharacterization.R} | 8 +- 6 files changed, 202 insertions(+), 593 deletions(-) delete mode 100644 R/runTemporalCohortCharacterization.R create mode 100644 man/runCohortCharacterization.Rd rename man/{runTemporalCohortCharacterization.Rd => runVisitContext.Rd} (54%) rename tests/testthat/{test-runTemporalCohortCharacterization.R => test-runCohortCharacterization.R} (93%) diff --git a/NAMESPACE b/NAMESPACE index d9769f85c..b3f3c3ee4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,13 +14,14 @@ export(getResultsDataModelSpecifications) export(launchDiagnosticsExplorer) export(migrateDataModel) export(runBreakdownIndexEvents) +export(runCohortCharacterization) export(runCohortRelationship) export(runIncidenceRate) export(runIncludedSourceConcepts) export(runInclusionStatistics) export(runOrphanConcepts) -export(runTemporalCohortCharacterization) export(runTimeSeries) +export(runVisitContext) export(uploadResults) import(DatabaseConnector) importFrom(CohortGenerator,getCohortCounts) diff --git a/R/runTemporalCohortCharacterization.R b/R/runTemporalCohortCharacterization.R deleted file mode 100644 index 4cb87e661..000000000 --- a/R/runTemporalCohortCharacterization.R +++ /dev/null @@ -1,485 +0,0 @@ -# Copyright 2024 Observational Health Data Sciences and Informatics -# -# This file is part of CohortDiagnostics -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# export characteristics to csv files -exportCharacterization <- function(characteristics, - databaseId, - incremental, - covariateValueFileName, - covariateValueContFileName, - covariateRefFileName, - analysisRefFileName, - timeRefFileName, - counts, - minCellCount) { - - if (!"covariates" %in% names(characteristics)) { - warning("No characterization output for submitted cohorts") - } else if (dplyr::pull(dplyr::count(characteristics$covariateRef)) > 0) { - characteristics$filteredCovariates <- - characteristics$covariates %>% - dplyr::mutate(databaseId = !!databaseId) %>% - dplyr::left_join(counts, - by = c("cohortId", "databaseId"), - copy = TRUE - ) %>% - dplyr::mutate( - mean = dplyr::if_else( - .data$mean != 0 & .data$mean < minCellCount / as.numeric(.data$cohortEntries), - -minCellCount / as.numeric(.data$cohortEntries), - .data$mean - ), - sumValue = dplyr::if_else( - .data$sumValue != 0 & .data$sumValue < minCellCount, - -minCellCount, - .data$sumValue - ) - ) %>% - dplyr::mutate(sd = dplyr::if_else(mean >= 0, .data$sd, 0)) %>% - dplyr::mutate( - mean = round(.data$mean, digits = 4), - sd = round(.data$sd, digits = 4) - ) %>% - dplyr::select(-"cohortEntries", -"cohortSubjects") %>% - dplyr::distinct() %>% - exportDataToCsv( - tableName = "temporal_covariate_value", - fileName = covariateValueFileName, - minCellCount = minCellCount, - databaseId = databaseId, - incremental = TRUE) - - if (dplyr::pull(dplyr::count(characteristics$filteredCovariates)) > 0) { - - covariateRef <- characteristics$covariateRef - exportDataToCsv( - data = characteristics$covariateRef, - tableName = "temporal_covariate_ref", - fileName = covariateRefFileName, - minCellCount = minCellCount, - incremental = TRUE, - covariateId = covariateRef %>% dplyr::pull(covariateId) - ) - - analysisRef <- characteristics$analysisRef - exportDataToCsv( - data = analysisRef, - tableName = "temporal_analysis_ref", - fileName = analysisRefFileName, - minCellCount = minCellCount, - incremental = TRUE, - analysisId = analysisRef %>% dplyr::pull(analysisId) - ) - - timeRef <- characteristics$timeRef - exportDataToCsv( - data = characteristics$timeRef, - tableName = "temporal_time_ref", - fileName = timeRefFileName, - minCellCount = minCellCount, - incremental = TRUE, - analysisId = timeRef %>% dplyr::pull(timeId) - ) - } - } - - if (!"covariatesContinuous" %in% names(characteristics)) { - ParallelLogger::logInfo("No continuous characterization output for submitted cohorts") - } else if (dplyr::pull(dplyr::count(characteristics$covariateRef)) > 0) { - exportDataToCsv( - data = characteristics$covariatesContinuous, - tableName = "temporal_covariate_value_dist", - fileName = covariateValueContFileName, - minCellCount = minCellCount, - databaseId = databaseId, - incremental = TRUE - ) - } -} - -mutateCovariateOutput <- function(results, featureExtractionOutput, populationSize, binary) { - if (binary) { - covariates <- featureExtractionOutput$covariates %>% - dplyr::rename("cohortId" = "cohortDefinitionId") %>% - dplyr::left_join(populationSize, by = "cohortId", copy = TRUE) %>% - dplyr::mutate("p" = .data$sumValue / populationSize) - - if (nrow(covariates %>% - dplyr::filter(.data$p > 1) %>% - dplyr::collect()) > 0) { - stop( - paste0( - "During characterization, population size (denominator) was found to be smaller than features Value (numerator).", - "- this may have happened because of an error in Feature generation process. Please contact the package developer." - ) - ) - } - - covariates <- covariates %>% - dplyr::mutate("sd" = sqrt(.data$p * (1 - .data$p))) %>% - dplyr::select(-"p") %>% - dplyr::rename("mean" = "averageValue") %>% - dplyr::select(-populationSize) - - if (FeatureExtraction::isTemporalCovariateData(featureExtractionOutput)) { - covariates <- covariates %>% - dplyr::select( - "cohortId", - "timeId", - "covariateId", - "sumValue", - "mean", - "sd" - ) - - tidNaCount <- covariates %>% - dplyr::filter(is.na(.data$timeId)) %>% - dplyr::count() %>% - dplyr::pull() - - if (tidNaCount > 0) { - covariates <- covariates %>% - dplyr::mutate(timeId = dplyr::if_else(is.na(.data$timeId), -1, .data$timeId)) - } - } else { - covariates <- covariates %>% - dplyr::mutate(timeId = 0) %>% - dplyr::select( - "cohortId", - "timeId", - "covariateId", - "sumValue", - "mean", - "sd" - ) - } - if ("covariates" %in% names(results)) { - Andromeda::appendToTable(results$covariates, covariates) - } else { - results$covariates <- covariates - } - } else { - covariates <- featureExtractionOutput$covariatesContinuous %>% - dplyr::rename( - "mean" = "averageValue", - "sd" = "standardDeviation", - "cohortId" = "cohortDefinitionId" - ) - covariatesContinuous <- covariates - if (FeatureExtraction::isTemporalCovariateData(featureExtractionOutput)) { - covariates <- covariates %>% - dplyr::mutate(sumValue = -1) %>% - dplyr::select( - "cohortId", - "timeId", - "covariateId", - "sumValue", - "mean", - "sd" - ) - - tidNaCount <- covariates %>% - dplyr::filter(is.na(.data$timeId)) %>% - dplyr::count() %>% - dplyr::pull() - - if (tidNaCount > 0) { - covariates <- covariates %>% - dplyr::mutate("timeId" = dplyr::if_else(is.na(.data$timeId), -1, .data$timeId)) - } - } else { - covariates <- covariates %>% - dplyr::mutate( - sumValue = -1, - timeId = 0 - ) %>% - dplyr::select( - "cohortId", - "timeId", - "covariateId", - "sumValue", - "mean", - "sd" - ) - } - if ("covariates" %in% names(results)) { - Andromeda::appendToTable(results$covariates, covariates) - } else { - results$covariates <- covariates - } - if ("covariatesContinuous" %in% names(results)) { - Andromeda::appendToTable(results$covariatesContinuous, covariatesContinuous) - } else { - results$covariatesContinuous <- covariatesContinuous - } - } - return(results) -} - -getCohortCharacteristics <- function(connection = NULL, - cdmDatabaseSchema, - tempEmulationSchema = NULL, - cohortDatabaseSchema = cdmDatabaseSchema, - cohortTable = "cohort", - cohortIds, - cdmVersion = 5, - covariateSettings, - exportFolder, - minCharacterizationMean = 0.001) { - startTime <- Sys.time() - results <- Andromeda::andromeda() - timeExecution( - exportFolder, - taskName = "getDbCovariateData", - parent = "getCohortCharacteristics", - cohortIds = cohortIds, - expr = { - featureExtractionOutput <- - FeatureExtraction::getDbCovariateData( - connection = connection, - oracleTempSchema = tempEmulationSchema, - cdmDatabaseSchema = cdmDatabaseSchema, - cohortDatabaseSchema = cohortDatabaseSchema, - cdmVersion = cdmVersion, - cohortTable = cohortTable, - cohortIds = cohortIds, - covariateSettings = covariateSettings, - aggregated = TRUE, - minCharacterizationMean = minCharacterizationMean - ) - } - ) - populationSize <- - attr(x = featureExtractionOutput, which = "metaData")$populationSize - populationSize <- - dplyr::tibble( - cohortId = names(populationSize) %>% as.numeric(), - populationSize = populationSize - ) - - if (!"analysisRef" %in% names(results)) { - results$analysisRef <- featureExtractionOutput$analysisRef - } - if (!"covariateRef" %in% names(results)) { - results$covariateRef <- featureExtractionOutput$covariateRef - } else { - covariateIds <- results$covariateRef %>% - dplyr::select("covariateId") - Andromeda::appendToTable( - results$covariateRef, - featureExtractionOutput$covariateRef %>% - dplyr::anti_join(covariateIds, by = "covariateId", copy = TRUE) - ) - } - if ("timeRef" %in% names(featureExtractionOutput) && - !"timeRef" %in% names(results)) { - results$timeRef <- featureExtractionOutput$timeRef - } - - if ("covariates" %in% names(featureExtractionOutput) && - dplyr::pull(dplyr::count(featureExtractionOutput$covariates)) > 0) { - - results <- mutateCovariateOutput(results, featureExtractionOutput, populationSize, binary = TRUE) - } - - if ("covariatesContinuous" %in% names(featureExtractionOutput) && - dplyr::pull(dplyr::count(featureExtractionOutput$covariatesContinuous)) > 0) { - - results <- mutateCovariateOutput(results, featureExtractionOutput, populationSize, binary = FALSE) - } - - delta <- Sys.time() - startTime - ParallelLogger::logInfo( - "Cohort characterization took ", - signif(delta, 3), - " ", - attr(delta, "units") - ) - return(results) -} - -#' TODO: explain runTemporalCohortCharacterization -#' -#' @description -#' A short description... -#' -#' -#' @template connection -#' @template databaseId -#' @template exportFolder -#' @template cdmDatabaseSchema -#' @template cohortDatabaseSchema -#' @template cohortTable -#' @template tempEmulationSchema -#' @template cdmVersion -#' @template minCellCount -#' @template instantiatedCohorts -#' @template incremental -#' @template recordKeepingFile -#' @template batchSize -#' -#' @param cohorts The cohorts for which the covariates need to be obtained -#' @param cohortCounts A dataframe with the cohort counts -#' @param covariateSettings Either an object of type \code{covariateSettings} as created using one of -#' the createTemporalCovariateSettings function in the FeatureExtraction package, or a list -#' of such objects. -#' @param covariateValueFileName Filename of the binary covariates output -#' @param covariateValueContFileName Filename of the continuous covariate output -#' @param covariateRefFileName Filename of the covariate reference output -#' @param analysisRefFileName Filename of the analysis reference output -#' @param timeRefFileName Filename of the time reference output -#' @param minCharacterizationMean The minimum mean value for characterization output. Values below this will be cut off from output. This -#' will help reduce the file size of the characterization output, but will remove information -#' on covariates that have very low values. The default is 0.001 (i.e. 0.1 percent) -#' -#' @return None, it will write results to disk -#' @export -#' -#' @examples -runTemporalCohortCharacterization <- function(connection, - databaseId, - exportFolder, - cdmDatabaseSchema, - cohortDatabaseSchema, - cohortTable, - covariateSettings, - tempEmulationSchema, - cdmVersion, - cohorts, - cohortCounts, - minCellCount, - instantiatedCohorts, - incremental, - recordKeepingFile, - covariateValueFileName = file.path(exportFolder, "temporal_covariate_value.csv"), - covariateValueContFileName = file.path(exportFolder, "temporal_covariate_value_dist.csv"), - covariateRefFileName = file.path(exportFolder, "temporal_covariate_ref.csv"), - analysisRefFileName = file.path(exportFolder, "temporal_analysis_ref.csv"), - timeRefFileName = file.path(exportFolder, "temporal_time_ref.csv"), - minCharacterizationMean = 0.001, - batchSize = getOption("CohortDiagnostics-FE-batch-size", default = 20)) { - jobName <- "Temporal Cohort characterization" - task <- "runTemporalCohortCharacterization" - ParallelLogger::logInfo("Running ", jobName) - startCohortCharacterization <- Sys.time() - - subset <- subsetToRequiredCohorts( - cohorts = cohorts %>% - dplyr::filter(.data$cohortId %in% instantiatedCohorts), - task = task, - incremental = incremental, - recordKeepingFile = recordKeepingFile - ) - - if (!incremental) { - for (outputFile in c( - covariateValueFileName, covariateValueContFileName, - covariateRefFileName, analysisRefFileName, timeRefFileName - )) { - if (file.exists(outputFile)) { - ParallelLogger::logInfo("Not in incremental mode - Removing file", outputFile, " and replacing") - unlink(outputFile) - } - } - } - - if (incremental && - (length(instantiatedCohorts) - nrow(subset)) > 0) { - ParallelLogger::logInfo(sprintf( - "Skipping %s instantiated cohorts in incremental mode.", - length(instantiatedCohorts) - nrow(subset) - )) - } - if (nrow(subset) > 0) { - ParallelLogger::logInfo(sprintf( - "Starting large scale characterization of %s cohort(s)", - nrow(subset) - )) - - # Processing cohorts loop - for (start in seq(1, nrow(subset), by = batchSize)) { - end <- min(start + batchSize - 1, nrow(subset)) - if (nrow(subset) > batchSize) { - ParallelLogger::logInfo( - sprintf( - "Batch characterization. Processing rows %s through %s of total %s.", - start, - end, - nrow(subset) - ) - ) - } - - characteristics <- getCohortCharacteristics( - connection = connection, - cdmDatabaseSchema = cdmDatabaseSchema, - tempEmulationSchema = tempEmulationSchema, - cohortDatabaseSchema = cohortDatabaseSchema, - cohortTable = cohortTable, - cohortIds = subset[start:end, ]$cohortId, - covariateSettings = covariateSettings, - cdmVersion = cdmVersion, - exportFolder = exportFolder, - minCharacterizationMean = minCharacterizationMean - ) - - on.exit(Andromeda::close(characteristics), add = TRUE) - exportCharacterization( - characteristics = characteristics, - databaseId = databaseId, - incremental = incremental, - covariateValueFileName = covariateValueFileName, - covariateValueContFileName = covariateValueContFileName, - covariateRefFileName = covariateRefFileName, - analysisRefFileName = analysisRefFileName, - timeRefFileName = timeRefFileName, - counts = cohortCounts, - minCellCount = minCellCount - ) - - recordTasksDone( - cohortId = subset[start:end, ]$cohortId, - task = task, - checksum = subset[start:end, ]$checksum, - recordKeepingFile = recordKeepingFile, - incremental = incremental - ) - - deltaIteration <- Sys.time() - startCohortCharacterization - ParallelLogger::logInfo( - " - Running Cohort Characterization iteration with batchsize ", - batchSize, - " from row number ", - start, - " to ", - end, - " took ", - signif(deltaIteration, 3), - " ", - attr(deltaIteration, "units") - ) - } - } - delta <- Sys.time() - startCohortCharacterization - ParallelLogger::logInfo( - "Running ", - jobName, - " took", - signif(delta, 3), - " ", - attr(delta, "units") - ) -} diff --git a/man/runBreakdownIndexEvents.Rd b/man/runBreakdownIndexEvents.Rd index 35c077a25..256ac9cd2 100644 --- a/man/runBreakdownIndexEvents.Rd +++ b/man/runBreakdownIndexEvents.Rd @@ -2,75 +2,64 @@ % Please edit documentation in R/runBreakdownIndexEvents.R \name{runBreakdownIndexEvents} \alias{runBreakdownIndexEvents} -\title{Title} +\title{runBreakdownIndexEvents} \usage{ runBreakdownIndexEvents( connection, + cohortDefinitionSet, tempEmulationSchema, cdmDatabaseSchema, - vocabularyDatabaseSchema = cdmDatabaseSchema, + vocabularyDatabaseSchema, + cohortDatabaseSchema, databaseId, - cohorts, exportFolder, minCellCount, - conceptCountsDatabaseSchema = NULL, - conceptCountsTable = "concept_counts", - conceptCountsTableIsTemp = FALSE, - cohortDatabaseSchema, cohortTable, - useExternalConceptCountsTable = FALSE, incremental = FALSE, - conceptIdTable = NULL, - recordKeepingFile, - useAchilles, - resultsDatabaseSchema + incrementalFolder ) } \arguments{ -\item{connection}{} - -\item{tempEmulationSchema}{} - -\item{cdmDatabaseSchema}{} - -\item{vocabularyDatabaseSchema}{} - -\item{databaseId}{} - -\item{cohorts}{} - -\item{exportFolder}{} - -\item{minCellCount}{} - -\item{conceptCountsDatabaseSchema}{} - -\item{conceptCountsTable}{} - -\item{conceptCountsTableIsTemp}{} +\item{connection}{An object of type \code{connection} as created using the +\code{\link[DatabaseConnector]{connect}} function in the +DatabaseConnector package.} -\item{cohortDatabaseSchema}{} +\item{cohortDefinitionSet}{A data.frame with cohort definitions created by +`CohortGenerator::getCohortDefinitionSet` that must include +the columns cohortId, cohortName, json, sql.} -\item{cohortTable}{} +\item{tempEmulationSchema}{Some database platforms like Oracle and Impala do not truly support temp tables. To emulate temp +tables, provide a schema with write privileges where temp tables can be created.} -\item{useExternalConceptCountsTable}{} +\item{cdmDatabaseSchema}{Schema name where your patient-level data in OMOP CDM format resides. +Note that for SQL Server, this should include both the database and +schema name, for example 'cdm_data.dbo'.} -\item{incremental}{} +\item{vocabularyDatabaseSchema}{Schema name where your OMOP vocabulary data resides. This is +commonly the same as cdmDatabaseSchema. Note that for +SQL Server, this should include both the database and +schema name, for example 'vocabulary.dbo'.} -\item{conceptIdTable}{} +\item{cohortDatabaseSchema}{Schema name where your cohort table resides. Note that for SQL Server, +this should include both the database and schema name, for example +'scratch.dbo'.} -\item{recordKeepingFile}{} +\item{databaseId}{A short string for identifying the database (e.g. 'Synpuf').} -\item{useAchilles}{} +\item{exportFolder}{The folder where the results will be exported to} -\item{resultsDatabaseSchema}{} +\item{minCellCount}{The minimum cell count for fields contains person counts or fractions} -\item{runIncludedSourceConcepts}{} +\item{cohortTable}{Name of the cohort table.} -\item{runOrphanConcepts}{} +\item{incremental}{`TRUE` or `FALSE` (default). If TRUE diagnostics for cohorts in the +cohort definition set that have not changed will be skipped and existing results +csv files will be updated. If FALSE then diagnostics for all cohorts in the cohort +definition set will be executed and pre-existing results files will be deleted.} -\item{runBreakdownIndexEvents}{} +\item{incrementalFolder}{If \code{incremental = TRUE}, specify a folder where records are kept +of which cohort diagnostics has been executed.} } \description{ -Title +runBreakdownIndexEvents } diff --git a/man/runCohortCharacterization.Rd b/man/runCohortCharacterization.Rd new file mode 100644 index 000000000..3652c15cf --- /dev/null +++ b/man/runCohortCharacterization.Rd @@ -0,0 +1,136 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/runCohortCharacterization.R, +% R/runTemporalCohortCharacterization.R +\name{runCohortCharacterization} +\alias{runCohortCharacterization} +\title{runTemporalCohortCharacterization} +\usage{ +runCohortCharacterization( + connection, + databaseId, + exportFolder, + cdmDatabaseSchema, + cohortDatabaseSchema, + cohortTable, + covariateSettings, + tempEmulationSchema, + cdmVersion, + cohorts, + cohortCounts, + minCellCount, + instantiatedCohorts, + incremental, + recordKeepingFile, + covariateValueFileName = file.path(exportFolder, "temporal_covariate_value.csv"), + covariateValueContFileName = file.path(exportFolder, + "temporal_covariate_value_dist.csv"), + covariateRefFileName = file.path(exportFolder, "temporal_covariate_ref.csv"), + analysisRefFileName = file.path(exportFolder, "temporal_analysis_ref.csv"), + timeRefFileName = file.path(exportFolder, "temporal_time_ref.csv"), + minCharacterizationMean = 0.001, + batchSize = getOption("CohortDiagnostics-FE-batch-size", default = 20) +) + +runCohortCharacterization( + connection, + databaseId, + exportFolder, + cdmDatabaseSchema, + cohortDatabaseSchema, + cohortTable, + covariateSettings, + tempEmulationSchema, + cdmVersion, + cohorts, + cohortCounts, + minCellCount, + instantiatedCohorts, + incremental, + recordKeepingFile, + covariateValueFileName = file.path(exportFolder, "temporal_covariate_value.csv"), + covariateValueContFileName = file.path(exportFolder, + "temporal_covariate_value_dist.csv"), + covariateRefFileName = file.path(exportFolder, "temporal_covariate_ref.csv"), + analysisRefFileName = file.path(exportFolder, "temporal_analysis_ref.csv"), + timeRefFileName = file.path(exportFolder, "temporal_time_ref.csv"), + minCharacterizationMean = 0.001, + batchSize = getOption("CohortDiagnostics-FE-batch-size", default = 20) +) +} +\arguments{ +\item{connection}{An object of type \code{connection} as created using the +\code{\link[DatabaseConnector]{connect}} function in the +DatabaseConnector package.} + +\item{databaseId}{A short string for identifying the database (e.g. 'Synpuf').} + +\item{exportFolder}{The folder where the results will be exported to} + +\item{cdmDatabaseSchema}{Schema name where your patient-level data in OMOP CDM format resides. +Note that for SQL Server, this should include both the database and +schema name, for example 'cdm_data.dbo'.} + +\item{cohortDatabaseSchema}{Schema name where your cohort table resides. Note that for SQL Server, +this should include both the database and schema name, for example +'scratch.dbo'.} + +\item{cohortTable}{Name of the cohort table.} + +\item{covariateSettings}{Either an object of type \code{covariateSettings} as created using one of +the createTemporalCovariateSettings function in the FeatureExtraction package, or a list +of such objects.} + +\item{tempEmulationSchema}{Some database platforms like Oracle and Impala do not truly support temp tables. To emulate temp +tables, provide a schema with write privileges where temp tables can be created.} + +\item{cdmVersion}{The version of the OMOP CDM. Default 5. (Note: only 5 is supported.)} + +\item{cohorts}{The cohorts for which the covariates need to be obtained} + +\item{cohortCounts}{A dataframe with the cohort counts} + +\item{minCellCount}{The minimum cell count for fields contains person counts or fractions} + +\item{instantiatedCohorts}{cohortIds of the cohorts that have been already been instantiated} + +\item{incremental}{`TRUE` or `FALSE` (default). If TRUE diagnostics for cohorts in the +cohort definition set that have not changed will be skipped and existing results +csv files will be updated. If FALSE then diagnostics for all cohorts in the cohort +definition set will be executed and pre-existing results files will be deleted.} + +\item{covariateValueFileName}{Filename of the binary covariates output} + +\item{covariateValueContFileName}{Filename of the continuous covariate output} + +\item{covariateRefFileName}{Filename of the covariate reference output} + +\item{analysisRefFileName}{Filename of the analysis reference output} + +\item{timeRefFileName}{Filename of the time reference output} + +\item{minCharacterizationMean}{The minimum mean value for characterization output. Values below this will be cut off from output. This +will help reduce the file size of the characterization output, but will remove information +on covariates that have very low values. The default is 0.001 (i.e. 0.1 percent)} + +\item{batchSize}{In case of batch processing, this specifies the size of the batch} + +\item{incrementalFolder}{If \code{incremental = TRUE}, specify a folder where records are kept +of which cohort diagnostics has been executed.} +} +\value{ +None, it will write results to disk + +None, it will write results to disk +} +\description{ +A short description... + +This function takes cohorts as input and generates the covariates for these cohorts. +The covariates are generated using FeatureExtraction. The output from this package +is slightly modified before the output is written to disk. +These are the files written to disk, if available: + * cohort_inc_result.csv + * cohort_inc_stats.csv + * cohort_inclusion.csv + * cohort_summary_stats.csv +} diff --git a/man/runTemporalCohortCharacterization.Rd b/man/runVisitContext.Rd similarity index 54% rename from man/runTemporalCohortCharacterization.Rd rename to man/runVisitContext.Rd index fec780a8f..4cf689649 100644 --- a/man/runTemporalCohortCharacterization.Rd +++ b/man/runVisitContext.Rd @@ -1,35 +1,22 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/runTemporalCohortCharacterization.R -\name{runTemporalCohortCharacterization} -\alias{runTemporalCohortCharacterization} -\title{TODO: explain runTemporalCohortCharacterization} +% Please edit documentation in R/runVisitContext.R +\name{runVisitContext} +\alias{runVisitContext} +\title{runVisitContext} \usage{ -runTemporalCohortCharacterization( +runVisitContext( connection, - databaseId, + cohortDefinitionSet, exportFolder, - cdmDatabaseSchema, + databaseId, cohortDatabaseSchema, - cohortTable, - covariateSettings, - tempEmulationSchema, - cdmVersion, - cohorts, - cohortCounts, + cdmDatabaseSchema, + tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"), + cohortTable = "cohort", + cdmVersion = 5, minCellCount, - instantiatedCohorts, incremental, - recordKeepingFile, - task = "runTemporalCohortCharacterization", - jobName = "Temporal Cohort characterization", - covariateValueFileName = file.path(exportFolder, "temporal_covariate_value.csv"), - covariateValueContFileName = file.path(exportFolder, - "temporal_covariate_value_dist.csv"), - covariateRefFileName = file.path(exportFolder, "temporal_covariate_ref.csv"), - analysisRefFileName = file.path(exportFolder, "temporal_analysis_ref.csv"), - timeRefFileName = file.path(exportFolder, "temporal_time_ref.csv"), - minCharacterizationMean = 0.001, - batchSize = getOption("CohortDiagnostics-FE-batch-size", default = 20) + incrementalFolder = file.path(exportFolder, "incremental") ) } \arguments{ @@ -37,59 +24,44 @@ runTemporalCohortCharacterization( \code{\link[DatabaseConnector]{connect}} function in the DatabaseConnector package.} -\item{databaseId}{A short string for identifying the database (e.g. 'Synpuf').} +\item{cohortDefinitionSet}{A data.frame with cohort definitions created by +`CohortGenerator::getCohortDefinitionSet` that must include +the columns cohortId, cohortName, json, sql.} \item{exportFolder}{The folder where the results will be exported to} -\item{cdmDatabaseSchema}{Schema name where your patient-level data in OMOP CDM format resides. -Note that for SQL Server, this should include both the database and -schema name, for example 'cdm_data.dbo'.} +\item{databaseId}{A short string for identifying the database (e.g. 'Synpuf').} \item{cohortDatabaseSchema}{Schema name where your cohort table resides. Note that for SQL Server, this should include both the database and schema name, for example 'scratch.dbo'.} -\item{cohortTable}{Name of the cohort table.} +\item{cdmDatabaseSchema}{Schema name where your patient-level data in OMOP CDM format resides. +Note that for SQL Server, this should include both the database and +schema name, for example 'cdm_data.dbo'.} \item{tempEmulationSchema}{Some database platforms like Oracle and Impala do not truly support temp tables. To emulate temp tables, provide a schema with write privileges where temp tables can be created.} -\item{cdmVersion}{The version of the OMOP CDM. Default 5. (Note: only 5 is supported.)} - -\item{cohorts}{} +\item{cohortTable}{Name of the cohort table.} -\item{cohortCounts}{} +\item{cdmVersion}{The version of the OMOP CDM. Default 5. (Note: only 5 is supported.)} \item{minCellCount}{The minimum cell count for fields contains person counts or fractions} -\item{instantiatedCohorts}{cohortIds of the cohorts that have been already been instantiated} - \item{incremental}{`TRUE` or `FALSE` (default). If TRUE diagnostics for cohorts in the cohort definition set that have not changed will be skipped and existing results csv files will be updated. If FALSE then diagnostics for all cohorts in the cohort definition set will be executed and pre-existing results files will be deleted.} -\item{task}{} - -\item{jobName}{} - -\item{covariateValueFileName}{} - -\item{covariateValueContFileName}{} - -\item{covariateRefFileName}{} - -\item{analysisRefFileName}{} - -\item{timeRefFileName}{} - -\item{minCharacterizationMean}{} - -\item{batchSize}{In case of batch processing, this specifies the size of the batch} - \item{incrementalFolder}{If \code{incremental = TRUE}, specify a folder where records are kept -of which cohort diagnostics has been executed.} +of which cohort diagnostics has been executed. If not specified, a file named `incremental` will be created inside the +\code{export_folder} directory.} } \description{ -A short description... +Generates the `visit_context.csv` which contains the counts for the subjects by `cohort_id`, +`visit_concept_id` and `visit_context`. The `visit_context` categorizes visit occurrences of +subjects based on how each the start and end date of each visit related to the cohort start date +to which each subject belongs. No output will be generated for cohorts with no subjects.If there +is no cohort with subjects execution will halt and `visit_context.csv` will not be generated. } diff --git a/tests/testthat/test-runTemporalCohortCharacterization.R b/tests/testthat/test-runCohortCharacterization.R similarity index 93% rename from tests/testthat/test-runTemporalCohortCharacterization.R rename to tests/testthat/test-runCohortCharacterization.R index 24ffe9bee..cd211c58d 100644 --- a/tests/testthat/test-runTemporalCohortCharacterization.R +++ b/tests/testthat/test-runCohortCharacterization.R @@ -28,7 +28,7 @@ test_that("Execute and export characterization", { ) checkmate::expect_file_exists(file.path(exportFolder, "cohort_count.csv")) - runTemporalCohortCharacterization( + runCohortCharacterization( connection = tConnection, databaseId = "Testdb", exportFolder = exportFolder, @@ -44,8 +44,6 @@ test_that("Execute and export characterization", { instantiatedCohorts = server$cohortDefinitionSet$cohortId, incremental = TRUE, recordKeepingFile = recordKeepingFile, - task = "runTemporalCohortCharacterization", - jobName = "Temporal Cohort characterization", minCharacterizationMean = 0.3 ) @@ -77,7 +75,7 @@ test_that("Execute and export characterization", { ) # finish the rest of characterization - runTemporalCohortCharacterization( + runCohortCharacterization( connection = tConnection, databaseId = "Testdb", exportFolder = exportFolder, @@ -93,8 +91,6 @@ test_that("Execute and export characterization", { instantiatedCohorts = server$cohortDefinitionSet$cohortId, incremental = TRUE, recordKeepingFile = recordKeepingFile, - task = "runTemporalCohortCharacterization", - jobName = "Temporal Cohort characterization", minCharacterizationMean = 0.3 ) From 589a03c8a62ca7096f041b91f6f93fe7f5983f87 Mon Sep 17 00:00:00 2001 From: Ger Inberg Date: Fri, 11 Oct 2024 11:02:52 +0200 Subject: [PATCH 11/18] add file --- R/runCohortCharacterization.R | 493 ++++++++++++++++++++++++++++++++++ 1 file changed, 493 insertions(+) create mode 100644 R/runCohortCharacterization.R diff --git a/R/runCohortCharacterization.R b/R/runCohortCharacterization.R new file mode 100644 index 000000000..3d1fd47f6 --- /dev/null +++ b/R/runCohortCharacterization.R @@ -0,0 +1,493 @@ +# Copyright 2024 Observational Health Data Sciences and Informatics +# +# This file is part of CohortDiagnostics +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# export characteristics to csv files +exportCharacterization <- function(characteristics, + databaseId, + incremental, + covariateValueFileName, + covariateValueContFileName, + covariateRefFileName, + analysisRefFileName, + timeRefFileName, + counts, + minCellCount) { + + if (!"covariates" %in% names(characteristics)) { + warning("No characterization output for submitted cohorts") + } else if (dplyr::pull(dplyr::count(characteristics$covariateRef)) > 0) { + characteristics$filteredCovariates <- + characteristics$covariates %>% + dplyr::mutate(databaseId = !!databaseId) %>% + dplyr::left_join(counts, + by = c("cohortId", "databaseId"), + copy = TRUE + ) %>% + dplyr::mutate( + mean = dplyr::if_else( + .data$mean != 0 & .data$mean < minCellCount / as.numeric(.data$cohortEntries), + -minCellCount / as.numeric(.data$cohortEntries), + .data$mean + ), + sumValue = dplyr::if_else( + .data$sumValue != 0 & .data$sumValue < minCellCount, + -minCellCount, + .data$sumValue + ) + ) %>% + dplyr::mutate(sd = dplyr::if_else(mean >= 0, .data$sd, 0)) %>% + dplyr::mutate( + mean = round(.data$mean, digits = 4), + sd = round(.data$sd, digits = 4) + ) %>% + dplyr::select(-"cohortEntries", -"cohortSubjects") %>% + dplyr::distinct() %>% + exportDataToCsv( + tableName = "temporal_covariate_value", + fileName = covariateValueFileName, + minCellCount = minCellCount, + databaseId = databaseId, + incremental = TRUE) + + if (dplyr::pull(dplyr::count(characteristics$filteredCovariates)) > 0) { + + covariateRef <- characteristics$covariateRef + exportDataToCsv( + data = characteristics$covariateRef, + tableName = "temporal_covariate_ref", + fileName = covariateRefFileName, + minCellCount = minCellCount, + incremental = TRUE, + covariateId = covariateRef %>% dplyr::pull(covariateId) + ) + + analysisRef <- characteristics$analysisRef + exportDataToCsv( + data = analysisRef, + tableName = "temporal_analysis_ref", + fileName = analysisRefFileName, + minCellCount = minCellCount, + incremental = TRUE, + analysisId = analysisRef %>% dplyr::pull(analysisId) + ) + + timeRef <- characteristics$timeRef + exportDataToCsv( + data = characteristics$timeRef, + tableName = "temporal_time_ref", + fileName = timeRefFileName, + minCellCount = minCellCount, + incremental = TRUE, + analysisId = timeRef %>% dplyr::pull(timeId) + ) + } + } + + if (!"covariatesContinuous" %in% names(characteristics)) { + ParallelLogger::logInfo("No continuous characterization output for submitted cohorts") + } else if (dplyr::pull(dplyr::count(characteristics$covariateRef)) > 0) { + exportDataToCsv( + data = characteristics$covariatesContinuous, + tableName = "temporal_covariate_value_dist", + fileName = covariateValueContFileName, + minCellCount = minCellCount, + databaseId = databaseId, + incremental = TRUE + ) + } +} + +mutateCovariateOutput <- function(results, featureExtractionOutput, populationSize, binary) { + if (binary) { + covariates <- featureExtractionOutput$covariates %>% + dplyr::rename("cohortId" = "cohortDefinitionId") %>% + dplyr::left_join(populationSize, by = "cohortId", copy = TRUE) %>% + dplyr::mutate("p" = .data$sumValue / populationSize) + + if (nrow(covariates %>% + dplyr::filter(.data$p > 1) %>% + dplyr::collect()) > 0) { + stop( + paste0( + "During characterization, population size (denominator) was found to be smaller than features Value (numerator).", + "- this may have happened because of an error in Feature generation process. Please contact the package developer." + ) + ) + } + + covariates <- covariates %>% + dplyr::mutate("sd" = sqrt(.data$p * (1 - .data$p))) %>% + dplyr::select(-"p") %>% + dplyr::rename("mean" = "averageValue") %>% + dplyr::select(-populationSize) + + if (FeatureExtraction::isTemporalCovariateData(featureExtractionOutput)) { + covariates <- covariates %>% + dplyr::select( + "cohortId", + "timeId", + "covariateId", + "sumValue", + "mean", + "sd" + ) + + tidNaCount <- covariates %>% + dplyr::filter(is.na(.data$timeId)) %>% + dplyr::count() %>% + dplyr::pull() + + if (tidNaCount > 0) { + covariates <- covariates %>% + dplyr::mutate(timeId = dplyr::if_else(is.na(.data$timeId), -1, .data$timeId)) + } + } else { + covariates <- covariates %>% + dplyr::mutate(timeId = 0) %>% + dplyr::select( + "cohortId", + "timeId", + "covariateId", + "sumValue", + "mean", + "sd" + ) + } + if ("covariates" %in% names(results)) { + Andromeda::appendToTable(results$covariates, covariates) + } else { + results$covariates <- covariates + } + } else { + covariates <- featureExtractionOutput$covariatesContinuous %>% + dplyr::rename( + "mean" = "averageValue", + "sd" = "standardDeviation", + "cohortId" = "cohortDefinitionId" + ) + covariatesContinuous <- covariates + if (FeatureExtraction::isTemporalCovariateData(featureExtractionOutput)) { + covariates <- covariates %>% + dplyr::mutate(sumValue = -1) %>% + dplyr::select( + "cohortId", + "timeId", + "covariateId", + "sumValue", + "mean", + "sd" + ) + + tidNaCount <- covariates %>% + dplyr::filter(is.na(.data$timeId)) %>% + dplyr::count() %>% + dplyr::pull() + + if (tidNaCount > 0) { + covariates <- covariates %>% + dplyr::mutate("timeId" = dplyr::if_else(is.na(.data$timeId), -1, .data$timeId)) + } + } else { + covariates <- covariates %>% + dplyr::mutate( + sumValue = -1, + timeId = 0 + ) %>% + dplyr::select( + "cohortId", + "timeId", + "covariateId", + "sumValue", + "mean", + "sd" + ) + } + if ("covariates" %in% names(results)) { + Andromeda::appendToTable(results$covariates, covariates) + } else { + results$covariates <- covariates + } + if ("covariatesContinuous" %in% names(results)) { + Andromeda::appendToTable(results$covariatesContinuous, covariatesContinuous) + } else { + results$covariatesContinuous <- covariatesContinuous + } + } + return(results) +} + +getCohortCharacteristics <- function(connection = NULL, + cdmDatabaseSchema, + tempEmulationSchema = NULL, + cohortDatabaseSchema = cdmDatabaseSchema, + cohortTable = "cohort", + cohortIds, + cdmVersion = 5, + covariateSettings, + exportFolder, + minCharacterizationMean = 0.001) { + startTime <- Sys.time() + results <- Andromeda::andromeda() + timeExecution( + exportFolder, + taskName = "getDbCovariateData", + parent = "getCohortCharacteristics", + cohortIds = cohortIds, + expr = { + featureExtractionOutput <- + FeatureExtraction::getDbCovariateData( + connection = connection, + oracleTempSchema = tempEmulationSchema, + cdmDatabaseSchema = cdmDatabaseSchema, + cohortDatabaseSchema = cohortDatabaseSchema, + cdmVersion = cdmVersion, + cohortTable = cohortTable, + cohortIds = cohortIds, + covariateSettings = covariateSettings, + aggregated = TRUE, + minCharacterizationMean = minCharacterizationMean + ) + } + ) + populationSize <- + attr(x = featureExtractionOutput, which = "metaData")$populationSize + populationSize <- + dplyr::tibble( + cohortId = names(populationSize) %>% as.numeric(), + populationSize = populationSize + ) + + if (!"analysisRef" %in% names(results)) { + results$analysisRef <- featureExtractionOutput$analysisRef + } + if (!"covariateRef" %in% names(results)) { + results$covariateRef <- featureExtractionOutput$covariateRef + } else { + covariateIds <- results$covariateRef %>% + dplyr::select("covariateId") + Andromeda::appendToTable( + results$covariateRef, + featureExtractionOutput$covariateRef %>% + dplyr::anti_join(covariateIds, by = "covariateId", copy = TRUE) + ) + } + if ("timeRef" %in% names(featureExtractionOutput) && + !"timeRef" %in% names(results)) { + results$timeRef <- featureExtractionOutput$timeRef + } + + if ("covariates" %in% names(featureExtractionOutput) && + dplyr::pull(dplyr::count(featureExtractionOutput$covariates)) > 0) { + + results <- mutateCovariateOutput(results, featureExtractionOutput, populationSize, binary = TRUE) + } + + if ("covariatesContinuous" %in% names(featureExtractionOutput) && + dplyr::pull(dplyr::count(featureExtractionOutput$covariatesContinuous)) > 0) { + + results <- mutateCovariateOutput(results, featureExtractionOutput, populationSize, binary = FALSE) + } + + delta <- Sys.time() - startTime + ParallelLogger::logInfo( + "Cohort characterization took ", + signif(delta, 3), + " ", + attr(delta, "units") + ) + return(results) +} + +#' runCohortCharacterization +#' +#' @description +#' This function takes cohorts as input and generates the covariates for these cohorts. +#' The covariates are generated using FeatureExtraction. The output from this package +#' is slightly modified before the output is written to disk. +#' These are the files written to disk, if available: +#' * temporal_analysis_ref.csv +#' * temporal_covariate_ref.csv +#' * temporal_covariate_value.csv +#' * temporal_covariate_value_dist.csv +#' * temporal_time_ref.csv +#' +#' +#' @template connection +#' @template databaseId +#' @template exportFolder +#' @template cdmDatabaseSchema +#' @template cohortDatabaseSchema +#' @template cohortTable +#' @template tempEmulationSchema +#' @template cdmVersion +#' @template minCellCount +#' @template instantiatedCohorts +#' @template incremental +#' @template recordKeepingFile +#' @template batchSize +#' +#' @param cohorts The cohorts for which the covariates need to be obtained +#' @param cohortCounts A dataframe with the cohort counts +#' @param covariateSettings Either an object of type \code{covariateSettings} as created using one of +#' the createTemporalCovariateSettings function in the FeatureExtraction package, or a list +#' of such objects. +#' @param covariateValueFileName Filename of the binary covariates output +#' @param covariateValueContFileName Filename of the continuous covariate output +#' @param covariateRefFileName Filename of the covariate reference output +#' @param analysisRefFileName Filename of the analysis reference output +#' @param timeRefFileName Filename of the time reference output +#' @param minCharacterizationMean The minimum mean value for characterization output. Values below this will be cut off from output. This +#' will help reduce the file size of the characterization output, but will remove information +#' on covariates that have very low values. The default is 0.001 (i.e. 0.1 percent) +#' +#' @return None, it will write results to disk +#' @export +#' +#' @examples +runCohortCharacterization <- function(connection, + databaseId, + exportFolder, + cdmDatabaseSchema, + cohortDatabaseSchema, + cohortTable, + covariateSettings, + tempEmulationSchema, + cdmVersion, + cohorts, + cohortCounts, + minCellCount, + instantiatedCohorts, + incremental, + recordKeepingFile, + covariateValueFileName = file.path(exportFolder, "temporal_covariate_value.csv"), + covariateValueContFileName = file.path(exportFolder, "temporal_covariate_value_dist.csv"), + covariateRefFileName = file.path(exportFolder, "temporal_covariate_ref.csv"), + analysisRefFileName = file.path(exportFolder, "temporal_analysis_ref.csv"), + timeRefFileName = file.path(exportFolder, "temporal_time_ref.csv"), + minCharacterizationMean = 0.001, + batchSize = getOption("CohortDiagnostics-FE-batch-size", default = 20)) { + jobName <- "Cohort characterization" + task <- "runCohortCharacterization" + ParallelLogger::logInfo("Running ", jobName) + startCohortCharacterization <- Sys.time() + + subset <- subsetToRequiredCohorts( + cohorts = cohorts %>% + dplyr::filter(.data$cohortId %in% instantiatedCohorts), + task = task, + incremental = incremental, + recordKeepingFile = recordKeepingFile + ) + + if (!incremental) { + for (outputFile in c( + covariateValueFileName, covariateValueContFileName, + covariateRefFileName, analysisRefFileName, timeRefFileName + )) { + if (file.exists(outputFile)) { + ParallelLogger::logInfo("Not in incremental mode - Removing file", outputFile, " and replacing") + unlink(outputFile) + } + } + } + + if (incremental && + (length(instantiatedCohorts) - nrow(subset)) > 0) { + ParallelLogger::logInfo(sprintf( + "Skipping %s instantiated cohorts in incremental mode.", + length(instantiatedCohorts) - nrow(subset) + )) + } + if (nrow(subset) > 0) { + ParallelLogger::logInfo(sprintf( + "Starting large scale characterization of %s cohort(s)", + nrow(subset) + )) + + # Processing cohorts loop + for (start in seq(1, nrow(subset), by = batchSize)) { + end <- min(start + batchSize - 1, nrow(subset)) + if (nrow(subset) > batchSize) { + ParallelLogger::logInfo( + sprintf( + "Batch characterization. Processing rows %s through %s of total %s.", + start, + end, + nrow(subset) + ) + ) + } + + characteristics <- getCohortCharacteristics( + connection = connection, + cdmDatabaseSchema = cdmDatabaseSchema, + tempEmulationSchema = tempEmulationSchema, + cohortDatabaseSchema = cohortDatabaseSchema, + cohortTable = cohortTable, + cohortIds = subset[start:end, ]$cohortId, + covariateSettings = covariateSettings, + cdmVersion = cdmVersion, + exportFolder = exportFolder, + minCharacterizationMean = minCharacterizationMean + ) + + on.exit(Andromeda::close(characteristics), add = TRUE) + exportCharacterization( + characteristics = characteristics, + databaseId = databaseId, + incremental = incremental, + covariateValueFileName = covariateValueFileName, + covariateValueContFileName = covariateValueContFileName, + covariateRefFileName = covariateRefFileName, + analysisRefFileName = analysisRefFileName, + timeRefFileName = timeRefFileName, + counts = cohortCounts, + minCellCount = minCellCount + ) + + recordTasksDone( + cohortId = subset[start:end, ]$cohortId, + task = task, + checksum = subset[start:end, ]$checksum, + recordKeepingFile = recordKeepingFile, + incremental = incremental + ) + + deltaIteration <- Sys.time() - startCohortCharacterization + ParallelLogger::logInfo( + " - Running Cohort Characterization iteration with batchsize ", + batchSize, + " from row number ", + start, + " to ", + end, + " took ", + signif(deltaIteration, 3), + " ", + attr(deltaIteration, "units") + ) + } + } + delta <- Sys.time() - startCohortCharacterization + ParallelLogger::logInfo( + "Running ", + jobName, + " took", + signif(delta, 3), + " ", + attr(delta, "units") + ) +} From a471962f2f839bae8458c17d9a47d0b5176cc6ea Mon Sep 17 00:00:00 2001 From: Ger Inberg Date: Fri, 11 Oct 2024 11:13:16 +0200 Subject: [PATCH 12/18] fix test --- tests/testthat/test-runCohortCharacterization.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-runCohortCharacterization.R b/tests/testthat/test-runCohortCharacterization.R index cd211c58d..c8b288a5e 100644 --- a/tests/testthat/test-runCohortCharacterization.R +++ b/tests/testthat/test-runCohortCharacterization.R @@ -60,7 +60,7 @@ test_that("Execute and export characterization", { # check if subset works subset <- subsetToRequiredCohorts( cohorts = server$cohortDefinitionSet, - task = "runTemporalCohortCharacterization", + task = "runCohortCharacterization", incremental = TRUE, recordKeepingFile = recordKeepingFile ) From a74e9101e7eaee0706351304897b727f22e61aa3 Mon Sep 17 00:00:00 2001 From: Ger Inberg Date: Fri, 11 Oct 2024 11:58:38 +0200 Subject: [PATCH 13/18] add test --- R/runCohortCharacterization.R | 1 - .../testthat/test-runCohortCharacterization.R | 49 +++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/R/runCohortCharacterization.R b/R/runCohortCharacterization.R index 3d1fd47f6..c1eb965f8 100644 --- a/R/runCohortCharacterization.R +++ b/R/runCohortCharacterization.R @@ -324,7 +324,6 @@ getCohortCharacteristics <- function(connection = NULL, #' * temporal_covariate_value_dist.csv #' * temporal_time_ref.csv #' -#' #' @template connection #' @template databaseId #' @template exportFolder diff --git a/tests/testthat/test-runCohortCharacterization.R b/tests/testthat/test-runCohortCharacterization.R index c8b288a5e..3792bafcc 100644 --- a/tests/testthat/test-runCohortCharacterization.R +++ b/tests/testthat/test-runCohortCharacterization.R @@ -1,3 +1,52 @@ +# test getCohortCharacteristics on all databases +for (nm in names(testServers)) { + + server <- testServers[[nm]] + con <- connect(server$connectionDetails) + exportFolder <- file.path(tempdir(), paste0(nm, "exp")) + recordKeepingFile <- file.path(exportFolder, "record.csv") + minCharacterizationMean <- 0.001 + + test_that("Testing getCohortCharacteristics", { + skip_if(skipCdmTests, "cdm settings not configured") + + results <- getCohortCharacteristics( + connection = con, + cdmDatabaseSchema = server$cdmDatabaseSchema, + tempEmulationSchema = server$tempEmulationSchema, + cohortDatabaseSchema = server$cohortDatabaseSchema, + cohortTable = server$cohortTable, + cohortIds = server$cohortIds, + covariateSettings = temporalCovariateSettings, + exportFolder = exportFolder, + minCharacterizationMean = minCharacterizationMean + ) + + # check characteristics + expect_equal(class(results), "Andromeda") + expect_equal(names(results), c("analysisRef", "covariateRef", "covariates", "covariatesContinuous", "timeRef")) + + analysisRef <- results$analysisRef + analysisIds <- analysisRef %>% pull(analysisId) + expect_true(analysisRef %>% pull(analysisName) %in% c("Measurement", "ConditionOccurence", "DrugEraStart", "CharlsonIndex", "ProcedureOccurence")) + + covariateRef <- results$covariateRef + expect_true(covariateRef %>% pull(analysisId) %in% analysisIds) + + covariates <- results$covariates + expect_true(covariates %>% pull(cohortId) %in% server$cohortIds) + expect_true(covariates %>% pull(mean) %>% min() >= minCharacterizationMean) + + covariatesCont <- results$covariatesContinuous + expect_true(covariatesCont %>% pull(cohortId) %in% server$cohortIds) + + timeRef <- results$timeRef + expect_true(timeRef %>% pull(startDay), c(-365, -30, 0, 1, 31)) + expect_true(timeRef %>% pull(endDay), c(-31, -1, 0, 30, 365)) + }) +} + + test_that("Execute and export characterization", { skip_if(skipCdmTests, "cdm settings not configured") skip_if_not("sqlite" %in% names(testServers)) From 729efb9ac3e807b45dbe6ab09c40e0b9b458c7e2 Mon Sep 17 00:00:00 2001 From: Ger Inberg Date: Fri, 11 Oct 2024 13:40:12 +0200 Subject: [PATCH 14/18] update test --- tests/testthat/test-runCohortCharacterization.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-runCohortCharacterization.R b/tests/testthat/test-runCohortCharacterization.R index 3792bafcc..d55a7487e 100644 --- a/tests/testthat/test-runCohortCharacterization.R +++ b/tests/testthat/test-runCohortCharacterization.R @@ -3,12 +3,14 @@ for (nm in names(testServers)) { server <- testServers[[nm]] con <- connect(server$connectionDetails) - exportFolder <- file.path(tempdir(), paste0(nm, "exp")) - recordKeepingFile <- file.path(exportFolder, "record.csv") minCharacterizationMean <- 0.001 test_that("Testing getCohortCharacteristics", { skip_if(skipCdmTests, "cdm settings not configured") + exportFolder <- file.path(tempdir(), paste0(nm, "exp")) + dir.create(exportFolder) + recordKeepingFile <- file.path(exportFolder, "record.csv") + on.exit(unlink(exportFolder)) results <- getCohortCharacteristics( connection = con, From b9c8a7de1fd2b882292e3493247da006a5ed492f Mon Sep 17 00:00:00 2001 From: Ger Inberg Date: Fri, 11 Oct 2024 13:48:47 +0200 Subject: [PATCH 15/18] test --- .../testthat/test-runCohortCharacterization.R | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/testthat/test-runCohortCharacterization.R b/tests/testthat/test-runCohortCharacterization.R index d55a7487e..bfc356975 100644 --- a/tests/testthat/test-runCohortCharacterization.R +++ b/tests/testthat/test-runCohortCharacterization.R @@ -25,26 +25,26 @@ for (nm in names(testServers)) { ) # check characteristics - expect_equal(class(results), "Andromeda") + expect_equal(class(results)[1], "Andromeda") expect_equal(names(results), c("analysisRef", "covariateRef", "covariates", "covariatesContinuous", "timeRef")) analysisRef <- results$analysisRef - analysisIds <- analysisRef %>% pull(analysisId) - expect_true(analysisRef %>% pull(analysisName) %in% c("Measurement", "ConditionOccurence", "DrugEraStart", "CharlsonIndex", "ProcedureOccurence")) + analysisIds <- analysisRef %>% dplyr::pull(analysisId) + expect_equal(analysisRef %>% dplyr::pull(analysisName), c("Measurement", "ConditionOccurrence", "DrugEraStart", "CharlsonIndex", "ProcedureOccurrence")) covariateRef <- results$covariateRef - expect_true(covariateRef %>% pull(analysisId) %in% analysisIds) + expect_true(all(covariateRef %>% dplyr::pull(analysisId) %>% unique() %in% analysisIds)) covariates <- results$covariates - expect_true(covariates %>% pull(cohortId) %in% server$cohortIds) - expect_true(covariates %>% pull(mean) %>% min() >= minCharacterizationMean) + expect_true(all(covariates %>% dplyr::pull(cohortId) %in% server$cohortIds)) + expect_true(covariates %>% dplyr::pull(mean) %>% min() >= minCharacterizationMean) covariatesCont <- results$covariatesContinuous - expect_true(covariatesCont %>% pull(cohortId) %in% server$cohortIds) + expect_true(all(covariatesCont %>% dplyr::pull(cohortId) %in% server$cohortIds)) timeRef <- results$timeRef - expect_true(timeRef %>% pull(startDay), c(-365, -30, 0, 1, 31)) - expect_true(timeRef %>% pull(endDay), c(-31, -1, 0, 30, 365)) + expect_equal(timeRef %>% dplyr::pull(startDay), c(-365, -30, 0, 1, 31)) + expect_equal(timeRef %>% dplyr::pull(endDay), c(-31, -1, 0, 30, 365)) }) } From 973643ed9af81d72645df865852c0f5fd8e2e833 Mon Sep 17 00:00:00 2001 From: Ger Inberg Date: Fri, 11 Oct 2024 13:55:37 +0200 Subject: [PATCH 16/18] enable other tests --- tests/testthat/test-Incremental.R | 688 +++++------ tests/testthat/test-ResultsDataModel.R | 466 ++++---- tests/testthat/test-externalConceptCounts.R | 140 +-- tests/testthat/test-runIncidenceRate.R | 126 +- tests/testthat/test-runResolvedConceptSets.R | 90 +- tests/testthat/test-runTimeSeries.R | 1052 ++++++++-------- tests/testthat/test-runVisitContext.R | 1130 +++++++++--------- tests/testthat/test-utils.R | 432 +++---- 8 files changed, 2062 insertions(+), 2062 deletions(-) diff --git a/tests/testthat/test-Incremental.R b/tests/testthat/test-Incremental.R index db6b0ad9e..7397b7494 100644 --- a/tests/testthat/test-Incremental.R +++ b/tests/testthat/test-Incremental.R @@ -1,344 +1,344 @@ -# library(testthat) -# -# test_that("Record keeping of single type tasks", { -# rkf <- tempfile() -# -# sql1 <- "SELECT * FROM my_table WHERE x = 1;" -# checksum1 <- CohortDiagnostics:::computeChecksum(sql1) -# expect_true( -# CohortDiagnostics:::isTaskRequired( -# cohortId = 1, -# runSql = TRUE, -# checksum = checksum1, -# recordKeepingFile = rkf -# ) -# ) -# -# CohortDiagnostics:::recordTasksDone( -# cohortId = 1, -# runSql = TRUE, -# checksum = checksum1, -# recordKeepingFile = rkf -# ) -# -# -# expect_false( -# CohortDiagnostics:::isTaskRequired( -# cohortId = 1, -# runSql = TRUE, -# checksum = checksum1, -# recordKeepingFile = rkf -# ) -# ) -# -# sql2 <- "SELECT * FROM my_table WHERE x = 2;" -# checksum2 <- CohortDiagnostics:::computeChecksum(sql2) -# expect_true( -# CohortDiagnostics:::isTaskRequired( -# cohortId = 2, -# runSql = TRUE, -# checksum = checksum2, -# recordKeepingFile = rkf -# ) -# ) -# -# CohortDiagnostics:::recordTasksDone( -# cohortId = 2, -# runSql = TRUE, -# checksum = checksum2, -# recordKeepingFile = rkf -# ) -# -# sql1a <- "SELECT * FROM my_table WHERE x = 1 AND y = 2;" -# checksum1a <- CohortDiagnostics:::computeChecksum(sql1a) -# expect_true( -# CohortDiagnostics:::isTaskRequired( -# cohortId = 1, -# runSql = TRUE, -# checksum = checksum1a, -# recordKeepingFile = rkf -# ) -# ) -# -# CohortDiagnostics:::recordTasksDone( -# cohortId = 1, -# runSql = TRUE, -# checksum = checksum1a, -# recordKeepingFile = rkf -# ) -# -# expect_false( -# CohortDiagnostics:::isTaskRequired( -# cohortId = 1, -# runSql = TRUE, -# checksum = checksum1a, -# recordKeepingFile = rkf -# ) -# ) -# -# unlink(rkf) -# }) -# -# test_that("Record keeping of multiple type tasks", { -# rkf <- tempfile() -# -# sql1 <- "SELECT * FROM my_table WHERE x = 1;" -# checksum1 <- CohortDiagnostics:::computeChecksum(sql1) -# expect_true( -# CohortDiagnostics:::isTaskRequired( -# cohortId = 1, -# task = "Run SQL", -# checksum = checksum1, -# recordKeepingFile = rkf -# ) -# ) -# -# CohortDiagnostics:::recordTasksDone( -# cohortId = 1, -# task = "Run SQL", -# checksum = checksum1, -# recordKeepingFile = rkf -# ) -# -# -# expect_false( -# CohortDiagnostics:::isTaskRequired( -# cohortId = 1, -# task = "Run SQL", -# checksum = checksum1, -# recordKeepingFile = rkf -# ) -# ) -# -# sql2 <- "SELECT * FROM my_table WHERE x = 1 AND y = 1;" -# checksum2 <- CohortDiagnostics:::computeChecksum(sql2) -# expect_true( -# CohortDiagnostics:::isTaskRequired( -# cohortId = 1, -# comparatorId = 2, -# task = "Compare cohorts", -# checksum = checksum2, -# recordKeepingFile = rkf -# ) -# ) -# -# CohortDiagnostics:::recordTasksDone( -# cohortId = 1, -# comparatorId = 2, -# task = "Compare cohorts", -# checksum = checksum2, -# recordKeepingFile = rkf -# ) -# -# expect_false( -# CohortDiagnostics:::isTaskRequired( -# cohortId = 1, -# task = "Run SQL", -# checksum = checksum1, -# recordKeepingFile = rkf -# ) -# ) -# -# -# sql2a <- "SELECT * FROM my_table WHERE x = 1 AND y = 2 AND z = 3;" -# checksum2a <- CohortDiagnostics:::computeChecksum(sql2a) -# expect_true( -# CohortDiagnostics:::isTaskRequired( -# cohortId = 1, -# comparatorId = 2, -# task = "Compare cohorts", -# checksum = checksum2a, -# recordKeepingFile = rkf -# ) -# ) -# -# CohortDiagnostics:::recordTasksDone( -# cohortId = 1, -# comparatorId = 2, -# task = "Compare cohorts", -# checksum = checksum2a, -# recordKeepingFile = rkf -# ) -# -# expect_false( -# CohortDiagnostics:::isTaskRequired( -# cohortId = 1, -# comparatorId = 2, -# task = "Compare cohorts", -# checksum = checksum2a, -# recordKeepingFile = rkf -# ) -# ) -# -# unlink(rkf) -# }) -# -# test_that("Record keeping of multiple tasks at once", { -# rkf <- tempfile() -# -# task <- dplyr::tibble( -# cohortId = c(1, 2), -# sql = c( -# "SELECT * FROM my_table WHERE x = 1;", -# "SELECT * FROM my_table WHERE x = 2;" -# ) -# ) -# task$checksum <- CohortDiagnostics:::computeChecksum(task$sql) -# expect_true( -# CohortDiagnostics:::isTaskRequired( -# cohortId = task$cohortId[1], -# checksum = task$checksum[1], -# recordKeepingFile = rkf -# ) -# ) -# -# CohortDiagnostics:::recordTasksDone( -# cohortId = task$cohortId, -# checksum = task$checksum, -# recordKeepingFile = rkf -# ) -# -# -# expect_false( -# CohortDiagnostics:::isTaskRequired( -# cohortId = task$cohortId[1], -# checksum = task$checksum[1], -# recordKeepingFile = rkf -# ) -# ) -# -# expect_false( -# CohortDiagnostics:::isTaskRequired( -# cohortId = task$cohortId[2], -# checksum = task$checksum[2], -# recordKeepingFile = rkf -# ) -# ) -# -# -# task <- dplyr::tibble( -# cohortId = c(1, 2, 3), -# sql = c( -# "SELECT * FROM my_table WHERE x = 3;", -# "SELECT * FROM my_table WHERE x = 4;", -# "SELECT * FROM my_table WHERE x = 5;" -# ) -# ) -# task$checksum <- CohortDiagnostics:::computeChecksum(task$sql) -# -# expect_true( -# CohortDiagnostics:::isTaskRequired( -# cohortId = task$cohortId[1], -# checksum = task$checksum[1], -# recordKeepingFile = rkf -# ) -# ) -# -# tasks <- -# CohortDiagnostics:::getRequiredTasks( -# cohortId = task$cohortId, -# checksum = task$checksum, -# recordKeepingFile = rkf -# ) -# expect_equal(nrow(tasks), 3) -# -# CohortDiagnostics:::recordTasksDone( -# cohortId = task$cohortId, -# checksum = task$checksum, -# recordKeepingFile = rkf -# ) -# -# expect_false( -# CohortDiagnostics:::isTaskRequired( -# cohortId = task$cohortId[1], -# checksum = task$checksum[1], -# recordKeepingFile = rkf -# ) -# ) -# -# expect_false( -# CohortDiagnostics:::isTaskRequired( -# cohortId = task$cohortId[2], -# checksum = task$checksum[2], -# recordKeepingFile = rkf -# ) -# ) -# -# expect_false( -# CohortDiagnostics:::isTaskRequired( -# cohortId = task$cohortId[3], -# checksum = task$checksum[3], -# recordKeepingFile = rkf -# ) -# ) -# -# tasks <- -# CohortDiagnostics:::getRequiredTasks( -# cohortId = task$cohortId[2], -# checksum = task$checksum[2], -# recordKeepingFile = rkf -# ) -# expect_equal(nrow(tasks), 0) -# -# unlink(rkf) -# }) -# -# -# test_that("Incremental save", { -# tmpFile <- tempfile() -# data <- dplyr::tibble( -# cohortId = c(1, 1, 2, 2, 3), -# count = c(100, 200, 300, 400, 500) -# ) -# CohortDiagnostics:::saveIncremental(data, tmpFile, cohortId = c(1, 2, 3)) -# -# newData <- dplyr::tibble( -# cohortId = c(1, 2, 2), -# count = c(600, 700, 800) -# ) -# -# CohortDiagnostics:::saveIncremental(newData, tmpFile, cohortId = c(1, 2)) -# -# -# -# goldStandard <- dplyr::tibble( -# cohortId = c(3, 1, 2, 2), -# count = c(500, 600, 700, 800) -# ) -# -# -# expect_equal( -# readr::read_csv( -# tmpFile, -# col_types = readr::cols(), -# guess_max = min(1e7) -# ), -# goldStandard, -# ignore_attr = TRUE -# ) -# unlink(tmpFile) -# }) -# -# test_that("Incremental save with empty key", { -# tmpFile <- tempfile() -# data <- dplyr::tibble( -# cohortId = c(1, 1, 2, 2, 3), -# count = c(100, 200, 300, 400, 500) -# ) -# CohortDiagnostics:::saveIncremental(data, tmpFile, cohortId = c(1, 2, 3)) -# -# newData <- dplyr::tibble() -# -# CohortDiagnostics:::saveIncremental(newData, tmpFile, cohortId = c()) -# -# expect_equal( -# readr::read_csv( -# tmpFile, -# col_types = readr::cols(), -# guess_max = min(1e7) -# ), -# data, -# ignore_attr = TRUE -# ) -# unlink(tmpFile) -# }) +library(testthat) + +test_that("Record keeping of single type tasks", { + rkf <- tempfile() + + sql1 <- "SELECT * FROM my_table WHERE x = 1;" + checksum1 <- CohortDiagnostics:::computeChecksum(sql1) + expect_true( + CohortDiagnostics:::isTaskRequired( + cohortId = 1, + runSql = TRUE, + checksum = checksum1, + recordKeepingFile = rkf + ) + ) + + CohortDiagnostics:::recordTasksDone( + cohortId = 1, + runSql = TRUE, + checksum = checksum1, + recordKeepingFile = rkf + ) + + + expect_false( + CohortDiagnostics:::isTaskRequired( + cohortId = 1, + runSql = TRUE, + checksum = checksum1, + recordKeepingFile = rkf + ) + ) + + sql2 <- "SELECT * FROM my_table WHERE x = 2;" + checksum2 <- CohortDiagnostics:::computeChecksum(sql2) + expect_true( + CohortDiagnostics:::isTaskRequired( + cohortId = 2, + runSql = TRUE, + checksum = checksum2, + recordKeepingFile = rkf + ) + ) + + CohortDiagnostics:::recordTasksDone( + cohortId = 2, + runSql = TRUE, + checksum = checksum2, + recordKeepingFile = rkf + ) + + sql1a <- "SELECT * FROM my_table WHERE x = 1 AND y = 2;" + checksum1a <- CohortDiagnostics:::computeChecksum(sql1a) + expect_true( + CohortDiagnostics:::isTaskRequired( + cohortId = 1, + runSql = TRUE, + checksum = checksum1a, + recordKeepingFile = rkf + ) + ) + + CohortDiagnostics:::recordTasksDone( + cohortId = 1, + runSql = TRUE, + checksum = checksum1a, + recordKeepingFile = rkf + ) + + expect_false( + CohortDiagnostics:::isTaskRequired( + cohortId = 1, + runSql = TRUE, + checksum = checksum1a, + recordKeepingFile = rkf + ) + ) + + unlink(rkf) +}) + +test_that("Record keeping of multiple type tasks", { + rkf <- tempfile() + + sql1 <- "SELECT * FROM my_table WHERE x = 1;" + checksum1 <- CohortDiagnostics:::computeChecksum(sql1) + expect_true( + CohortDiagnostics:::isTaskRequired( + cohortId = 1, + task = "Run SQL", + checksum = checksum1, + recordKeepingFile = rkf + ) + ) + + CohortDiagnostics:::recordTasksDone( + cohortId = 1, + task = "Run SQL", + checksum = checksum1, + recordKeepingFile = rkf + ) + + + expect_false( + CohortDiagnostics:::isTaskRequired( + cohortId = 1, + task = "Run SQL", + checksum = checksum1, + recordKeepingFile = rkf + ) + ) + + sql2 <- "SELECT * FROM my_table WHERE x = 1 AND y = 1;" + checksum2 <- CohortDiagnostics:::computeChecksum(sql2) + expect_true( + CohortDiagnostics:::isTaskRequired( + cohortId = 1, + comparatorId = 2, + task = "Compare cohorts", + checksum = checksum2, + recordKeepingFile = rkf + ) + ) + + CohortDiagnostics:::recordTasksDone( + cohortId = 1, + comparatorId = 2, + task = "Compare cohorts", + checksum = checksum2, + recordKeepingFile = rkf + ) + + expect_false( + CohortDiagnostics:::isTaskRequired( + cohortId = 1, + task = "Run SQL", + checksum = checksum1, + recordKeepingFile = rkf + ) + ) + + + sql2a <- "SELECT * FROM my_table WHERE x = 1 AND y = 2 AND z = 3;" + checksum2a <- CohortDiagnostics:::computeChecksum(sql2a) + expect_true( + CohortDiagnostics:::isTaskRequired( + cohortId = 1, + comparatorId = 2, + task = "Compare cohorts", + checksum = checksum2a, + recordKeepingFile = rkf + ) + ) + + CohortDiagnostics:::recordTasksDone( + cohortId = 1, + comparatorId = 2, + task = "Compare cohorts", + checksum = checksum2a, + recordKeepingFile = rkf + ) + + expect_false( + CohortDiagnostics:::isTaskRequired( + cohortId = 1, + comparatorId = 2, + task = "Compare cohorts", + checksum = checksum2a, + recordKeepingFile = rkf + ) + ) + + unlink(rkf) +}) + +test_that("Record keeping of multiple tasks at once", { + rkf <- tempfile() + + task <- dplyr::tibble( + cohortId = c(1, 2), + sql = c( + "SELECT * FROM my_table WHERE x = 1;", + "SELECT * FROM my_table WHERE x = 2;" + ) + ) + task$checksum <- CohortDiagnostics:::computeChecksum(task$sql) + expect_true( + CohortDiagnostics:::isTaskRequired( + cohortId = task$cohortId[1], + checksum = task$checksum[1], + recordKeepingFile = rkf + ) + ) + + CohortDiagnostics:::recordTasksDone( + cohortId = task$cohortId, + checksum = task$checksum, + recordKeepingFile = rkf + ) + + + expect_false( + CohortDiagnostics:::isTaskRequired( + cohortId = task$cohortId[1], + checksum = task$checksum[1], + recordKeepingFile = rkf + ) + ) + + expect_false( + CohortDiagnostics:::isTaskRequired( + cohortId = task$cohortId[2], + checksum = task$checksum[2], + recordKeepingFile = rkf + ) + ) + + + task <- dplyr::tibble( + cohortId = c(1, 2, 3), + sql = c( + "SELECT * FROM my_table WHERE x = 3;", + "SELECT * FROM my_table WHERE x = 4;", + "SELECT * FROM my_table WHERE x = 5;" + ) + ) + task$checksum <- CohortDiagnostics:::computeChecksum(task$sql) + + expect_true( + CohortDiagnostics:::isTaskRequired( + cohortId = task$cohortId[1], + checksum = task$checksum[1], + recordKeepingFile = rkf + ) + ) + + tasks <- + CohortDiagnostics:::getRequiredTasks( + cohortId = task$cohortId, + checksum = task$checksum, + recordKeepingFile = rkf + ) + expect_equal(nrow(tasks), 3) + + CohortDiagnostics:::recordTasksDone( + cohortId = task$cohortId, + checksum = task$checksum, + recordKeepingFile = rkf + ) + + expect_false( + CohortDiagnostics:::isTaskRequired( + cohortId = task$cohortId[1], + checksum = task$checksum[1], + recordKeepingFile = rkf + ) + ) + + expect_false( + CohortDiagnostics:::isTaskRequired( + cohortId = task$cohortId[2], + checksum = task$checksum[2], + recordKeepingFile = rkf + ) + ) + + expect_false( + CohortDiagnostics:::isTaskRequired( + cohortId = task$cohortId[3], + checksum = task$checksum[3], + recordKeepingFile = rkf + ) + ) + + tasks <- + CohortDiagnostics:::getRequiredTasks( + cohortId = task$cohortId[2], + checksum = task$checksum[2], + recordKeepingFile = rkf + ) + expect_equal(nrow(tasks), 0) + + unlink(rkf) +}) + + +test_that("Incremental save", { + tmpFile <- tempfile() + data <- dplyr::tibble( + cohortId = c(1, 1, 2, 2, 3), + count = c(100, 200, 300, 400, 500) + ) + CohortDiagnostics:::saveIncremental(data, tmpFile, cohortId = c(1, 2, 3)) + + newData <- dplyr::tibble( + cohortId = c(1, 2, 2), + count = c(600, 700, 800) + ) + + CohortDiagnostics:::saveIncremental(newData, tmpFile, cohortId = c(1, 2)) + + + + goldStandard <- dplyr::tibble( + cohortId = c(3, 1, 2, 2), + count = c(500, 600, 700, 800) + ) + + + expect_equal( + readr::read_csv( + tmpFile, + col_types = readr::cols(), + guess_max = min(1e7) + ), + goldStandard, + ignore_attr = TRUE + ) + unlink(tmpFile) +}) + +test_that("Incremental save with empty key", { + tmpFile <- tempfile() + data <- dplyr::tibble( + cohortId = c(1, 1, 2, 2, 3), + count = c(100, 200, 300, 400, 500) + ) + CohortDiagnostics:::saveIncremental(data, tmpFile, cohortId = c(1, 2, 3)) + + newData <- dplyr::tibble() + + CohortDiagnostics:::saveIncremental(newData, tmpFile, cohortId = c()) + + expect_equal( + readr::read_csv( + tmpFile, + col_types = readr::cols(), + guess_max = min(1e7) + ), + data, + ignore_attr = TRUE + ) + unlink(tmpFile) +}) diff --git a/tests/testthat/test-ResultsDataModel.R b/tests/testthat/test-ResultsDataModel.R index 76777d803..f3ee4b556 100644 --- a/tests/testthat/test-ResultsDataModel.R +++ b/tests/testthat/test-ResultsDataModel.R @@ -1,233 +1,233 @@ -# skipResultsDm <- FALSE -# if (Sys.getenv("CDM5_POSTGRESQL_SERVER") == "" || Sys.getenv("SKIP_DB_TESTS") == "TRUE") { -# skipResultsDm <- TRUE -# } else { -# postgresConnectionDetails <- DatabaseConnector::createConnectionDetails( -# dbms = "postgresql", -# user = Sys.getenv("CDM5_POSTGRESQL_USER"), -# password = URLdecode(Sys.getenv("CDM5_POSTGRESQL_PASSWORD")), -# server = Sys.getenv("CDM5_POSTGRESQL_SERVER"), -# pathToDriver = jdbcDriverFolder -# ) -# -# resultsDatabaseSchema <- paste0("r", Sys.getpid(), format(Sys.time(), "%s"), sample(1:100, 1)) -# -# # Always clean up -# withr::defer( -# { -# pgConnection <- DatabaseConnector::connect(connectionDetails = postgresConnectionDetails) -# sql <- "DROP SCHEMA IF EXISTS @resultsDatabaseSchema CASCADE;" -# DatabaseConnector::renderTranslateExecuteSql( -# sql = sql, -# resultsDatabaseSchema = resultsDatabaseSchema, -# connection = pgConnection -# ) -# -# DatabaseConnector::disconnect(pgConnection) -# unlink(folder, recursive = TRUE, force = TRUE) -# }, -# testthat::teardown_env() -# ) -# } -# -# test_that("Create schema", { -# skip_if(skipResultsDm | skipCdmTests, "results data model test server not set") -# pgConnection <- DatabaseConnector::connect(connectionDetails = postgresConnectionDetails) -# with_dbc_connection(pgConnection, { -# sql <- "CREATE SCHEMA @resultsDatabaseSchema;" -# DatabaseConnector::renderTranslateExecuteSql( -# sql = sql, -# resultsDatabaseSchema = resultsDatabaseSchema, -# connection = pgConnection -# ) -# createResultsDataModel( -# connectionDetails = postgresConnectionDetails, -# databaseSchema = resultsDatabaseSchema, -# tablePrefix = "cd_" -# ) -# -# specifications <- getResultsDataModelSpecifications() -# -# for (tableName in unique(specifications$tableName)) { -# expect_true(.pgTableExists(pgConnection, resultsDatabaseSchema, paste0("cd_", tableName))) -# } -# # Bad schema name -# expect_error(createResultsDataModel( -# connectionDetails = postgresConnectionDetails, -# databaseSchema = "non_existant_schema" -# )) -# }) -# }) -# -# test_that("Results upload", { -# skip_if(skipResultsDm | skipCdmTests, "results data model test server not set") -# if (dbms == "sqlite") { -# # Checks to see if adding extra OMOP vocab, unexpectedly breaks things -# connection <- DatabaseConnector::connect(connectionDetails) -# with_dbc_connection(connection, { -# DatabaseConnector::renderTranslateExecuteSql(connection, " -# INSERT INTO main.vocabulary -# (VOCABULARY_ID, VOCABULARY_NAME, VOCABULARY_REFERENCE, VOCABULARY_VERSION, VOCABULARY_CONCEPT_ID) VALUES -# ('None','OMOP Standardized Vocabularies','OMOP generated','v5.5 17-FEB-22',44819096); -# -# INSERT INTO CDM_SOURCE -# (CDM_SOURCE_NAME,CDM_SOURCE_ABBREVIATION,CDM_HOLDER,SOURCE_DESCRIPTION,SOURCE_DOCUMENTATION_REFERENCE,CDM_ETL_REFERENCE,SOURCE_RELEASE_DATE,CDM_RELEASE_DATE,CDM_VERSION,VOCABULARY_VERSION) -# VALUES ('Synthea','Synthea','OHDSI Community','SyntheaTM is a Synthetic Patient Population Simulator.','https://synthetichealth.github.io/synthea/','https://github.com/OHDSI/ETL-Synthea',1558742400,1558742400,'v5.4','v5.0 22-JAN-22');") -# -# # Check to see if non-standard extra columns are handled -# DatabaseConnector::renderTranslateExecuteSql( -# connection, -# "ALTER TABLE VOCABULARY ADD TEST_COLUMN varchar(255) DEFAULT 'foo';" -# ) -# }) -# } -# -# if (dbms == "sqlite") { -# expect_warning( -# { -# executeDiagnostics( -# connectionDetails = connectionDetails, -# cdmDatabaseSchema = cdmDatabaseSchema, -# vocabularyDatabaseSchema = vocabularyDatabaseSchema, -# tempEmulationSchema = tempEmulationSchema, -# cohortDatabaseSchema = cohortDatabaseSchema, -# cohortTableNames = cohortTableNames, -# cohortIds = cohortIds, -# cohortDefinitionSet = cohortDefinitionSet, -# exportFolder = file.path(folder, "export"), -# databaseId = dbms, -# runInclusionStatistics = TRUE, -# runBreakdownIndexEvents = TRUE, -# runTemporalCohortCharacterization = TRUE, -# runIncidenceRate = TRUE, -# runIncludedSourceConcepts = TRUE, -# runOrphanConcepts = TRUE, -# incremental = TRUE, -# incrementalFolder = file.path(folder, "incremental"), -# temporalCovariateSettings = temporalCovariateSettings, -# runFeatureExtractionOnSample = TRUE -# ) -# }, -# "CDM Source table has more than one record while only one is expected." -# ) -# } else { -# executeDiagnostics( -# connectionDetails = connectionDetails, -# cdmDatabaseSchema = cdmDatabaseSchema, -# vocabularyDatabaseSchema = vocabularyDatabaseSchema, -# tempEmulationSchema = tempEmulationSchema, -# cohortDatabaseSchema = cohortDatabaseSchema, -# cohortTableNames = cohortTableNames, -# cohortIds = cohortIds, -# cohortDefinitionSet = cohortDefinitionSet, -# exportFolder = file.path(folder, "export"), -# databaseId = dbms, -# runInclusionStatistics = TRUE, -# runBreakdownIndexEvents = TRUE, -# runTemporalCohortCharacterization = TRUE, -# runIncidenceRate = TRUE, -# runIncludedSourceConcepts = TRUE, -# runOrphanConcepts = TRUE, -# incremental = TRUE, -# incrementalFolder = file.path(folder, "incremental"), -# temporalCovariateSettings = temporalCovariateSettings, -# runFeatureExtractionOnSample = TRUE -# ) -# } -# -# listOfZipFilesToUpload <- -# list.files( -# path = file.path(folder, "export"), -# pattern = ".zip", -# full.names = TRUE, -# recursive = TRUE -# ) -# -# for (i in (1:length(listOfZipFilesToUpload))) { -# uploadResults( -# connectionDetails = postgresConnectionDetails, -# schema = resultsDatabaseSchema, -# zipFileName = listOfZipFilesToUpload[[i]], -# tablePrefix = "cd_" -# ) -# } -# -# specifications <- getResultsDataModelSpecifications() -# pgConnection <- DatabaseConnector::connect(connectionDetails = postgresConnectionDetails) -# with_dbc_connection(pgConnection, { -# for (tableName in unique(specifications$tableName)) { -# primaryKey <- specifications %>% -# dplyr::filter(tableName == !!tableName & -# primaryKey == "Yes") %>% -# dplyr::select("columnName") %>% -# dplyr::pull() -# -# if ("database_id" %in% primaryKey) { -# sql <- -# "SELECT COUNT(*) FROM @schema.@table_name WHERE database_id = '@database_id';" -# sql <- SqlRender::render( -# sql = sql, -# schema = resultsDatabaseSchema, -# table_name = paste0("cd_", tableName), -# database_id = "cdmv5" -# ) -# databaseIdCount <- DatabaseConnector::querySql(pgConnection, sql)[, 1] -# expect_true(databaseIdCount >= 0) -# } -# } -# }) -# }) -# -# test_that("Sqlite results data model", { -# skip_if(skipResultsDm) -# dbFile <- tempfile(fileext = ".sqlite") -# createMergedResultsFile(dataFolder = file.path(folder, "export"), sqliteDbPath = dbFile, overwrite = TRUE, tablePrefix = "cd_") -# connectionDetailsSqlite <- DatabaseConnector::createConnectionDetails(dbms = "sqlite", server = dbFile) -# connectionSqlite <- DatabaseConnector::connect(connectionDetails = connectionDetailsSqlite) -# with_dbc_connection(connectionSqlite, { -# # Bad schema name -# expect_error(createResultsDataModel( -# connectionDetails = connectionDetailsSqlite, -# databaseSchema = "non_existant_schema" -# )) -# -# specifications <- getResultsDataModelSpecifications() -# for (tableName in unique(specifications$tableName)) { -# primaryKey <- specifications %>% -# dplyr::filter(tableName == !!tableName & -# primaryKey == "Yes") %>% -# dplyr::select("columnName") %>% -# dplyr::pull() -# -# if ("database_id" %in% primaryKey) { -# sql <- -# "SELECT COUNT(*) FROM @schema.@table_name WHERE database_id = '@database_id';" -# sql <- SqlRender::render( -# sql = sql, -# schema = "main", -# table_name = paste0("cd_", tableName), -# database_id = "cdmv5" -# ) -# databaseIdCount <- DatabaseConnector::querySql(connectionSqlite, sql)[, 1] -# expect_true(databaseIdCount >= 0) -# } -# } -# }) -# }) -# -# -# test_that("getResultsDataModelSpecifications works", { -# spec <- getResultsDataModelSpecifications() -# expectedColumnNames <- c("tableName", "columnName", "dataType", "isRequired", "primaryKey", -# "optional", "emptyIsNa", "minCellCount", "isVocabularyTable", -# "neverIncremental", "description") -# expect_true(is.data.frame(spec)) -# expect_named(spec, expectedColumnNames) -# -# expect_equal(length(unique(spec$tableName)), 30) -# spec <- getResultsDataModelSpecifications("cohort") -# expect_equal(length(unique(spec$tableName)), 1) -# -# expect_error(getResultsDataModelSpecifications(c("cohort", "time_series"))) -# expect_error(getResultsDataModelSpecifications(1)) -# }) +skipResultsDm <- FALSE +if (Sys.getenv("CDM5_POSTGRESQL_SERVER") == "" || Sys.getenv("SKIP_DB_TESTS") == "TRUE") { + skipResultsDm <- TRUE +} else { + postgresConnectionDetails <- DatabaseConnector::createConnectionDetails( + dbms = "postgresql", + user = Sys.getenv("CDM5_POSTGRESQL_USER"), + password = URLdecode(Sys.getenv("CDM5_POSTGRESQL_PASSWORD")), + server = Sys.getenv("CDM5_POSTGRESQL_SERVER"), + pathToDriver = jdbcDriverFolder + ) + + resultsDatabaseSchema <- paste0("r", Sys.getpid(), format(Sys.time(), "%s"), sample(1:100, 1)) + + # Always clean up + withr::defer( + { + pgConnection <- DatabaseConnector::connect(connectionDetails = postgresConnectionDetails) + sql <- "DROP SCHEMA IF EXISTS @resultsDatabaseSchema CASCADE;" + DatabaseConnector::renderTranslateExecuteSql( + sql = sql, + resultsDatabaseSchema = resultsDatabaseSchema, + connection = pgConnection + ) + + DatabaseConnector::disconnect(pgConnection) + unlink(folder, recursive = TRUE, force = TRUE) + }, + testthat::teardown_env() + ) +} + +test_that("Create schema", { + skip_if(skipResultsDm | skipCdmTests, "results data model test server not set") + pgConnection <- DatabaseConnector::connect(connectionDetails = postgresConnectionDetails) + with_dbc_connection(pgConnection, { + sql <- "CREATE SCHEMA @resultsDatabaseSchema;" + DatabaseConnector::renderTranslateExecuteSql( + sql = sql, + resultsDatabaseSchema = resultsDatabaseSchema, + connection = pgConnection + ) + createResultsDataModel( + connectionDetails = postgresConnectionDetails, + databaseSchema = resultsDatabaseSchema, + tablePrefix = "cd_" + ) + + specifications <- getResultsDataModelSpecifications() + + for (tableName in unique(specifications$tableName)) { + expect_true(.pgTableExists(pgConnection, resultsDatabaseSchema, paste0("cd_", tableName))) + } + # Bad schema name + expect_error(createResultsDataModel( + connectionDetails = postgresConnectionDetails, + databaseSchema = "non_existant_schema" + )) + }) +}) + +test_that("Results upload", { + skip_if(skipResultsDm | skipCdmTests, "results data model test server not set") + if (dbms == "sqlite") { + # Checks to see if adding extra OMOP vocab, unexpectedly breaks things + connection <- DatabaseConnector::connect(connectionDetails) + with_dbc_connection(connection, { + DatabaseConnector::renderTranslateExecuteSql(connection, " +INSERT INTO main.vocabulary +(VOCABULARY_ID, VOCABULARY_NAME, VOCABULARY_REFERENCE, VOCABULARY_VERSION, VOCABULARY_CONCEPT_ID) VALUES +('None','OMOP Standardized Vocabularies','OMOP generated','v5.5 17-FEB-22',44819096); + +INSERT INTO CDM_SOURCE +(CDM_SOURCE_NAME,CDM_SOURCE_ABBREVIATION,CDM_HOLDER,SOURCE_DESCRIPTION,SOURCE_DOCUMENTATION_REFERENCE,CDM_ETL_REFERENCE,SOURCE_RELEASE_DATE,CDM_RELEASE_DATE,CDM_VERSION,VOCABULARY_VERSION) +VALUES ('Synthea','Synthea','OHDSI Community','SyntheaTM is a Synthetic Patient Population Simulator.','https://synthetichealth.github.io/synthea/','https://github.com/OHDSI/ETL-Synthea',1558742400,1558742400,'v5.4','v5.0 22-JAN-22');") + + # Check to see if non-standard extra columns are handled + DatabaseConnector::renderTranslateExecuteSql( + connection, + "ALTER TABLE VOCABULARY ADD TEST_COLUMN varchar(255) DEFAULT 'foo';" + ) + }) + } + + if (dbms == "sqlite") { + expect_warning( + { + executeDiagnostics( + connectionDetails = connectionDetails, + cdmDatabaseSchema = cdmDatabaseSchema, + vocabularyDatabaseSchema = vocabularyDatabaseSchema, + tempEmulationSchema = tempEmulationSchema, + cohortDatabaseSchema = cohortDatabaseSchema, + cohortTableNames = cohortTableNames, + cohortIds = cohortIds, + cohortDefinitionSet = cohortDefinitionSet, + exportFolder = file.path(folder, "export"), + databaseId = dbms, + runInclusionStatistics = TRUE, + runBreakdownIndexEvents = TRUE, + runTemporalCohortCharacterization = TRUE, + runIncidenceRate = TRUE, + runIncludedSourceConcepts = TRUE, + runOrphanConcepts = TRUE, + incremental = TRUE, + incrementalFolder = file.path(folder, "incremental"), + temporalCovariateSettings = temporalCovariateSettings, + runFeatureExtractionOnSample = TRUE + ) + }, + "CDM Source table has more than one record while only one is expected." + ) + } else { + executeDiagnostics( + connectionDetails = connectionDetails, + cdmDatabaseSchema = cdmDatabaseSchema, + vocabularyDatabaseSchema = vocabularyDatabaseSchema, + tempEmulationSchema = tempEmulationSchema, + cohortDatabaseSchema = cohortDatabaseSchema, + cohortTableNames = cohortTableNames, + cohortIds = cohortIds, + cohortDefinitionSet = cohortDefinitionSet, + exportFolder = file.path(folder, "export"), + databaseId = dbms, + runInclusionStatistics = TRUE, + runBreakdownIndexEvents = TRUE, + runTemporalCohortCharacterization = TRUE, + runIncidenceRate = TRUE, + runIncludedSourceConcepts = TRUE, + runOrphanConcepts = TRUE, + incremental = TRUE, + incrementalFolder = file.path(folder, "incremental"), + temporalCovariateSettings = temporalCovariateSettings, + runFeatureExtractionOnSample = TRUE + ) + } + + listOfZipFilesToUpload <- + list.files( + path = file.path(folder, "export"), + pattern = ".zip", + full.names = TRUE, + recursive = TRUE + ) + + for (i in (1:length(listOfZipFilesToUpload))) { + uploadResults( + connectionDetails = postgresConnectionDetails, + schema = resultsDatabaseSchema, + zipFileName = listOfZipFilesToUpload[[i]], + tablePrefix = "cd_" + ) + } + + specifications <- getResultsDataModelSpecifications() + pgConnection <- DatabaseConnector::connect(connectionDetails = postgresConnectionDetails) + with_dbc_connection(pgConnection, { + for (tableName in unique(specifications$tableName)) { + primaryKey <- specifications %>% + dplyr::filter(tableName == !!tableName & + primaryKey == "Yes") %>% + dplyr::select("columnName") %>% + dplyr::pull() + + if ("database_id" %in% primaryKey) { + sql <- + "SELECT COUNT(*) FROM @schema.@table_name WHERE database_id = '@database_id';" + sql <- SqlRender::render( + sql = sql, + schema = resultsDatabaseSchema, + table_name = paste0("cd_", tableName), + database_id = "cdmv5" + ) + databaseIdCount <- DatabaseConnector::querySql(pgConnection, sql)[, 1] + expect_true(databaseIdCount >= 0) + } + } + }) +}) + +test_that("Sqlite results data model", { + skip_if(skipResultsDm) + dbFile <- tempfile(fileext = ".sqlite") + createMergedResultsFile(dataFolder = file.path(folder, "export"), sqliteDbPath = dbFile, overwrite = TRUE, tablePrefix = "cd_") + connectionDetailsSqlite <- DatabaseConnector::createConnectionDetails(dbms = "sqlite", server = dbFile) + connectionSqlite <- DatabaseConnector::connect(connectionDetails = connectionDetailsSqlite) + with_dbc_connection(connectionSqlite, { + # Bad schema name + expect_error(createResultsDataModel( + connectionDetails = connectionDetailsSqlite, + databaseSchema = "non_existant_schema" + )) + + specifications <- getResultsDataModelSpecifications() + for (tableName in unique(specifications$tableName)) { + primaryKey <- specifications %>% + dplyr::filter(tableName == !!tableName & + primaryKey == "Yes") %>% + dplyr::select("columnName") %>% + dplyr::pull() + + if ("database_id" %in% primaryKey) { + sql <- + "SELECT COUNT(*) FROM @schema.@table_name WHERE database_id = '@database_id';" + sql <- SqlRender::render( + sql = sql, + schema = "main", + table_name = paste0("cd_", tableName), + database_id = "cdmv5" + ) + databaseIdCount <- DatabaseConnector::querySql(connectionSqlite, sql)[, 1] + expect_true(databaseIdCount >= 0) + } + } + }) +}) + + +test_that("getResultsDataModelSpecifications works", { + spec <- getResultsDataModelSpecifications() + expectedColumnNames <- c("tableName", "columnName", "dataType", "isRequired", "primaryKey", + "optional", "emptyIsNa", "minCellCount", "isVocabularyTable", + "neverIncremental", "description") + expect_true(is.data.frame(spec)) + expect_named(spec, expectedColumnNames) + + expect_equal(length(unique(spec$tableName)), 30) + spec <- getResultsDataModelSpecifications("cohort") + expect_equal(length(unique(spec$tableName)), 1) + + expect_error(getResultsDataModelSpecifications(c("cohort", "time_series"))) + expect_error(getResultsDataModelSpecifications(1)) + }) diff --git a/tests/testthat/test-externalConceptCounts.R b/tests/testthat/test-externalConceptCounts.R index 4fb9ce216..f7ffc1f9c 100644 --- a/tests/testthat/test-externalConceptCounts.R +++ b/tests/testthat/test-externalConceptCounts.R @@ -1,70 +1,70 @@ -# test_that("Creating and checking externalConceptCounts table", { -# if (dbmsToTest == "sqlite") { -# connectionDetails <- testServers[["sqlite"]]$connectionDetails -# connection <- connect(connectionDetails) -# cdmDatabaseSchema <- testServers[["sqlite"]]$cdmDatabaseSchema -# conceptCountsTable <- "concept_counts" -# CohortDiagnostics::createConceptCountsTable(connectionDetails = connectionDetails, -# cdmDatabaseSchema = cdmDatabaseSchema, -# tempEmulationSchema = NULL, -# conceptCountsTable = "concept_counts", -# conceptCountsDatabaseSchema = cdmDatabaseSchema, -# conceptCountsTableIsTemp = FALSE, -# removeCurrentTable = TRUE) -# -# concept_counts_info <- querySql(connection, "PRAGMA table_info(concept_counts)") -# expect_equal(concept_counts_info$NAME, c("concept_id", -# "concept_count", -# "concept_subjects", -# "vocabulary_version")) -# checkConceptCountsTableExists <- DatabaseConnector::dbExistsTable(connection, -# name = conceptCountsTable, -# databaseSchema = cdmDatabaseSchema) -# expect_true(checkConceptCountsTableExists) -# -# # Checking vocab version matches -# useExternalConceptCountsTable <- TRUE -# conceptCountsTable <- "concept_counts" -# conceptCountsTable <- conceptCountsTable -# dataSourceInfo <- getCdmDataSourceInformation(connection = connection, cdmDatabaseSchema = cdmDatabaseSchema) -# vocabVersion <- dataSourceInfo$vocabularyVersion -# vocabVersionExternalConceptCountsTable <- renderTranslateQuerySql( -# connection = connection, -# sql = "SELECT DISTINCT vocabulary_version FROM @work_database_schema.@concept_counts_table;", -# work_database_schema = cdmDatabaseSchema, -# concept_counts_table = conceptCountsTable, -# snakeCaseToCamelCase = TRUE, -# tempEmulationSchema = getOption("sqlRenderTempEmulationSchena") -# ) -# -# expect_equal(vocabVersion, vocabVersionExternalConceptCountsTable[1,1]) -# } -# -# }) -# -# test_that("Creating and checking externalConceptCounts temp table", { -# if (dbmsToTest == "sqlite") { -# # Creating externalConceptCounts -# # sql_lite_path <- file.path(test_path(), databaseFile) -# connectionDetails <- testServers[["sqlite"]]$connectionDetails -# connection <- connect(connectionDetails) -# cdmDatabaseSchema <- testServers[["sqlite"]]$cdmDatabaseSchema -# conceptCountsTable <- "concept_counts" -# CohortDiagnostics::createConceptCountsTable(connectionDetails = connectionDetails, -# cdmDatabaseSchema = cdmDatabaseSchema, -# tempEmulationSchema = NULL, -# conceptCountsTable = conceptCountsTable, -# conceptCountsDatabaseSchema = cdmDatabaseSchema, -# conceptCountsTableIsTemp = TRUE, -# removeCurrentTable = TRUE) -# -# concept_counts_info <- querySql(connection, "PRAGMA table_info(concept_counts)") -# expect_equal(concept_counts_info$NAME, c("concept_id", -# "concept_count", -# "concept_subjects")) -# checkConceptCountsTableExists <- DatabaseConnector::dbExistsTable(connection, -# name = conceptCountsTable, -# databaseSchema = cdmDatabaseSchema) -# expect_true(checkConceptCountsTableExists) -# } -# }) +test_that("Creating and checking externalConceptCounts table", { + if (dbmsToTest == "sqlite") { + connectionDetails <- testServers[["sqlite"]]$connectionDetails + connection <- connect(connectionDetails) + cdmDatabaseSchema <- testServers[["sqlite"]]$cdmDatabaseSchema + conceptCountsTable <- "concept_counts" + CohortDiagnostics::createConceptCountsTable(connectionDetails = connectionDetails, + cdmDatabaseSchema = cdmDatabaseSchema, + tempEmulationSchema = NULL, + conceptCountsTable = "concept_counts", + conceptCountsDatabaseSchema = cdmDatabaseSchema, + conceptCountsTableIsTemp = FALSE, + removeCurrentTable = TRUE) + + concept_counts_info <- querySql(connection, "PRAGMA table_info(concept_counts)") + expect_equal(concept_counts_info$NAME, c("concept_id", + "concept_count", + "concept_subjects", + "vocabulary_version")) + checkConceptCountsTableExists <- DatabaseConnector::dbExistsTable(connection, + name = conceptCountsTable, + databaseSchema = cdmDatabaseSchema) + expect_true(checkConceptCountsTableExists) + + # Checking vocab version matches + useExternalConceptCountsTable <- TRUE + conceptCountsTable <- "concept_counts" + conceptCountsTable <- conceptCountsTable + dataSourceInfo <- getCdmDataSourceInformation(connection = connection, cdmDatabaseSchema = cdmDatabaseSchema) + vocabVersion <- dataSourceInfo$vocabularyVersion + vocabVersionExternalConceptCountsTable <- renderTranslateQuerySql( + connection = connection, + sql = "SELECT DISTINCT vocabulary_version FROM @work_database_schema.@concept_counts_table;", + work_database_schema = cdmDatabaseSchema, + concept_counts_table = conceptCountsTable, + snakeCaseToCamelCase = TRUE, + tempEmulationSchema = getOption("sqlRenderTempEmulationSchena") + ) + + expect_equal(vocabVersion, vocabVersionExternalConceptCountsTable[1,1]) + } + +}) + +test_that("Creating and checking externalConceptCounts temp table", { + if (dbmsToTest == "sqlite") { + # Creating externalConceptCounts + # sql_lite_path <- file.path(test_path(), databaseFile) + connectionDetails <- testServers[["sqlite"]]$connectionDetails + connection <- connect(connectionDetails) + cdmDatabaseSchema <- testServers[["sqlite"]]$cdmDatabaseSchema + conceptCountsTable <- "concept_counts" + CohortDiagnostics::createConceptCountsTable(connectionDetails = connectionDetails, + cdmDatabaseSchema = cdmDatabaseSchema, + tempEmulationSchema = NULL, + conceptCountsTable = conceptCountsTable, + conceptCountsDatabaseSchema = cdmDatabaseSchema, + conceptCountsTableIsTemp = TRUE, + removeCurrentTable = TRUE) + + concept_counts_info <- querySql(connection, "PRAGMA table_info(concept_counts)") + expect_equal(concept_counts_info$NAME, c("concept_id", + "concept_count", + "concept_subjects")) + checkConceptCountsTableExists <- DatabaseConnector::dbExistsTable(connection, + name = conceptCountsTable, + databaseSchema = cdmDatabaseSchema) + expect_true(checkConceptCountsTableExists) + } +}) diff --git a/tests/testthat/test-runIncidenceRate.R b/tests/testthat/test-runIncidenceRate.R index 5877f2df8..718c50005 100644 --- a/tests/testthat/test-runIncidenceRate.R +++ b/tests/testthat/test-runIncidenceRate.R @@ -1,63 +1,63 @@ -# -# # test getIncidenceRate on all dbms -# for (nm in names(testServers)) { -# server <- testServers[[nm]] -# -# test_that(paste("getIncidenceRate works on", nm), { -# -# connection <- DatabaseConnector::connect(server$connectionDetails) -# result <- getIncidenceRate( -# connection = connection, -# cohortDatabaseSchema = server$cohortDatabaseSchema, -# cohortTable = server$cohortTable, -# cdmDatabaseSchema = server$cdmDatabaseSchema, -# vocabularyDatabaseSchema = server$vocabularyDatabaseSchema, -# tempEmulationSchema = server$tempEmulationSchema, -# firstOccurrenceOnly = TRUE, -# washoutPeriod = 365, -# cohortId = server$cohortDefinitionSet$cohortId[1]) -# -# expect_true(is.data.frame(result)) -# -# # getResultsDataModelSpecifications("incidence_rate")$columnName -# expect_equal( -# names(result), -# c("cohortCount", "personYears", "gender", "ageGroup", "calendarYear", "incidenceRate") -# ) -# -# DatabaseConnector::disconnect(connection) -# }) -# } -# -# -# # only test runIncidenceRate on sqlite (or duckdb) -# test_that("runIncidenceRate", { -# skip_if_not("sqlite" %in% names(testServers)) -# -# server <- testServers[["sqlite"]] -# exportFolder <- tempfile() -# dir.create(exportFolder) -# -# incrementalFolder <- tempfile() -# dir.create(incrementalFolder) -# -# connection <- DatabaseConnector::connect(server$connectionDetails) -# -# runIncidenceRate( -# connection, -# cohortDefinitionSet = server$cohortDefinitionSet[1:2,], -# tempEmulationSchema = server$tempEmulationSchema, -# cdmDatabaseSchema = server$cdmDatabaseSchema, -# cohortDatabaseSchema = server$cohortDatabaseSchema, -# cohortTable = server$cohortTable, -# databaseId = "GiBleed", -# exportFolder = exportFolder, -# minCellCount = 1, -# washoutPeriod = 0, -# incremental = F) -# DatabaseConnector::disconnect(connection) -# expect_true(file.exists(file.path(exportFolder, "incidence_rate.csv"))) -# }) -# -# -# + +# test getIncidenceRate on all dbms +for (nm in names(testServers)) { + server <- testServers[[nm]] + + test_that(paste("getIncidenceRate works on", nm), { + + connection <- DatabaseConnector::connect(server$connectionDetails) + result <- getIncidenceRate( + connection = connection, + cohortDatabaseSchema = server$cohortDatabaseSchema, + cohortTable = server$cohortTable, + cdmDatabaseSchema = server$cdmDatabaseSchema, + vocabularyDatabaseSchema = server$vocabularyDatabaseSchema, + tempEmulationSchema = server$tempEmulationSchema, + firstOccurrenceOnly = TRUE, + washoutPeriod = 365, + cohortId = server$cohortDefinitionSet$cohortId[1]) + + expect_true(is.data.frame(result)) + + # getResultsDataModelSpecifications("incidence_rate")$columnName + expect_equal( + names(result), + c("cohortCount", "personYears", "gender", "ageGroup", "calendarYear", "incidenceRate") + ) + + DatabaseConnector::disconnect(connection) + }) +} + + +# only test runIncidenceRate on sqlite (or duckdb) +test_that("runIncidenceRate", { + skip_if_not("sqlite" %in% names(testServers)) + + server <- testServers[["sqlite"]] + exportFolder <- tempfile() + dir.create(exportFolder) + + incrementalFolder <- tempfile() + dir.create(incrementalFolder) + + connection <- DatabaseConnector::connect(server$connectionDetails) + + runIncidenceRate( + connection, + cohortDefinitionSet = server$cohortDefinitionSet[1:2,], + tempEmulationSchema = server$tempEmulationSchema, + cdmDatabaseSchema = server$cdmDatabaseSchema, + cohortDatabaseSchema = server$cohortDatabaseSchema, + cohortTable = server$cohortTable, + databaseId = "GiBleed", + exportFolder = exportFolder, + minCellCount = 1, + washoutPeriod = 0, + incremental = F) + DatabaseConnector::disconnect(connection) + expect_true(file.exists(file.path(exportFolder, "incidence_rate.csv"))) +}) + + + diff --git a/tests/testthat/test-runResolvedConceptSets.R b/tests/testthat/test-runResolvedConceptSets.R index 287797d1a..feaf25365 100644 --- a/tests/testthat/test-runResolvedConceptSets.R +++ b/tests/testthat/test-runResolvedConceptSets.R @@ -1,45 +1,45 @@ -# -# server <- testServers[[1]] -# -# for (server in testServers) { -# test_that(paste("getResolvedConceptSets works on", server$connectionDetails$dbms), { -# -# connection <- DatabaseConnector::connect(server$connectionDetails) -# result <- getResolvedConceptSets( -# connection = connection, -# cohortDefinitionSet = server$cohortDefinitionSet, -# vocabularyDatabaseSchema = server$vocabularyDatabaseSchema, -# tempEmulationSchema = server$tempEmulationSchema -# ) -# -# expect_true(is.data.frame(result)) -# expect_named(result, c("cohortId", "conceptSetId", "conceptId")) -# expect_true(tempTableExists("concept_ids")) -# expect_true(tempTableExists("inst_concept_sets")) -# DatabaseConnector::disconnect(connection) -# }) -# } -# -# test_that("runResolvedConceptSets works", { -# skip_if_not("sqlite" %in% names(testServers)) -# server <- testServers[["sqlite"]] -# connection <- DatabaseConnector::connect(server$connectionDetails) -# exportFolder <- tempfile() -# dir.create(exportFolder) -# -# runResolvedConceptSets( -# connection = connection, -# cohortDefinitionSet = server$cohortDefinitionSet, -# databaseId = server$connectionDetails$dbms, -# exportFolder = exportFolder, -# minCellCount = 1, -# vocabularyDatabaseSchema = server$vocabularyDatabaseSchema, -# tempEmulationSchema = server$tempEmulationSchema -# ) -# -# DatabaseConnector::disconnect(connection) -# result <- readr::read_csv(file.path(exportFolder, "resolved_concepts.csv"), show_col_types = F) -# expect_true(is.data.frame(result)) -# expect_named(result, c("cohort_id", "concept_set_id", "concept_id", "database_id")) -# }) -# + +server <- testServers[[1]] + +for (server in testServers) { + test_that(paste("getResolvedConceptSets works on", server$connectionDetails$dbms), { + + connection <- DatabaseConnector::connect(server$connectionDetails) + result <- getResolvedConceptSets( + connection = connection, + cohortDefinitionSet = server$cohortDefinitionSet, + vocabularyDatabaseSchema = server$vocabularyDatabaseSchema, + tempEmulationSchema = server$tempEmulationSchema + ) + + expect_true(is.data.frame(result)) + expect_named(result, c("cohortId", "conceptSetId", "conceptId")) + expect_true(tempTableExists("concept_ids")) + expect_true(tempTableExists("inst_concept_sets")) + DatabaseConnector::disconnect(connection) + }) +} + +test_that("runResolvedConceptSets works", { + skip_if_not("sqlite" %in% names(testServers)) + server <- testServers[["sqlite"]] + connection <- DatabaseConnector::connect(server$connectionDetails) + exportFolder <- tempfile() + dir.create(exportFolder) + + runResolvedConceptSets( + connection = connection, + cohortDefinitionSet = server$cohortDefinitionSet, + databaseId = server$connectionDetails$dbms, + exportFolder = exportFolder, + minCellCount = 1, + vocabularyDatabaseSchema = server$vocabularyDatabaseSchema, + tempEmulationSchema = server$tempEmulationSchema + ) + + DatabaseConnector::disconnect(connection) + result <- readr::read_csv(file.path(exportFolder, "resolved_concepts.csv"), show_col_types = F) + expect_true(is.data.frame(result)) + expect_named(result, c("cohort_id", "concept_set_id", "concept_id", "database_id")) +}) + diff --git a/tests/testthat/test-runTimeSeries.R b/tests/testthat/test-runTimeSeries.R index 1ffb0aca2..de8060434 100644 --- a/tests/testthat/test-runTimeSeries.R +++ b/tests/testthat/test-runTimeSeries.R @@ -1,526 +1,526 @@ -# # Test getTimeSeries on all testServers -# for (nm in names(testServers)) { -# -# server <- testServers[[nm]] -# con <- connect(server$connectionDetails) -# exportFolder <- file.path(tempdir(), paste0(nm, "exp")) -# recordKeepingFile <- file.path(exportFolder, "record.csv") -# -# test_that("Testing time series logic", { -# skip_if(skipCdmTests, "cdm settings not configured") -# -# # to do - with incremental = FALSE -# with_dbc_connection(con, { -# # manually create cohort table and load to table -# # Cohort table has a total of four records, with each cohort id having two each -# # cohort 1 has one subject with two different cohort entries -# # cohort 2 has two subject with two different cohort entries -# cohort <- dplyr::tibble( -# cohortDefinitionId = c(1, 1, 2, 2), -# subjectId = c(1, 1, 1, 2), -# cohortStartDate = c(as.Date("2005-01-15"), as.Date("2005-07-15"), as.Date("2005-01-15"), as.Date("2005-07-15")), -# cohortEndDate = c(as.Date("2005-05-15"), as.Date("2005-09-15"), as.Date("2005-05-15"), as.Date("2005-09-15")) -# ) -# -# cohortTable <- -# paste0("ct_", Sys.getpid(), format(Sys.time(), "%s"), sample(1:100, 1)) -# -# DatabaseConnector::insertTable( -# connection = con, -# databaseSchema = server$cohortDatabaseSchema, -# tableName = cohortTable, -# data = cohort, -# dropTableIfExists = TRUE, -# createTable = TRUE, -# tempTable = FALSE, -# camelCaseToSnakeCase = TRUE, -# progressBar = FALSE -# ) -# -# timeSeries <- CohortDiagnostics:::getTimeSeries( -# connection = con, -# tempEmulationSchema = server$tempEmulationSchema, -# cdmDatabaseSchema = server$cdmDatabaseSchema, -# cohortDatabaseSchema = server$cohortDatabaseSchema, -# cohortTable = cohortTable, -# runCohortTimeSeries = TRUE, -# runDataSourceTimeSeries = FALSE, # cannot test data source time series because we are using simulated cohort table -# timeSeriesMinDate = as.Date("2004-01-01"), -# timeSeriesMaxDate = as.Date("2006-12-31"), -# cohortIds = c(1, 2), -# stratifyByGender = FALSE, # cannot test stratification because it will require cohort table to be built from cdm -# stratifyByAgeGroup = FALSE # this test is using simulated cohort table -# ) -# -# # testing if values returned for cohort 1 is as expected -# timeSeriesCohort <- timeSeries %>% -# dplyr::filter(.data$cohortId == 1) %>% -# dplyr::filter(.data$seriesType == "T1") %>% -# dplyr::filter(.data$calendarInterval == "m") -# -# # there should be 8 records in this data frame, representing 8 months for the one subject in the cohort id = 1 -# testthat::expect_equal( -# object = nrow(timeSeriesCohort), -# expected = 8 -# ) -# -# # there should be 2 records in this data frame, representing the 2 starts for the one subject in the cohort id = 1 -# testthat::expect_equal( -# object = nrow(timeSeriesCohort %>% dplyr::filter(.data$recordsStart == 1)), -# expected = 2 -# ) -# -# # there should be 1 records in this data frame, representing the 1 incident start for the one subject in the cohort id = 1 -# testthat::expect_equal( -# object = nrow(timeSeriesCohort %>% dplyr::filter(.data$subjectsStartIn == 1)), -# expected = 1 -# ) -# }) -# }) -# } -# -# test_that("Testing cohort time series execution, incremental = FALSE", { -# testServer <- "sqlite" -# skip_if_not(testServer %in% names(testServers)) -# server <- testServers[[testServer]] -# con <- DatabaseConnector::connect(server$connectionDetails) -# exportFolder <- file.path(tempdir(), paste0(testServer, "exp")) -# recordKeepingFile <- file.path(exportFolder, "record.csv") -# incremental <- FALSE -# -# with_dbc_connection(con, { -# cohort <- dplyr::tibble( -# cohortDefinitionId = c(1, 1, 2, 2), -# subjectId = c(1, 1, 1, 2), -# cohortStartDate = c( -# as.Date("2005-01-15"), -# as.Date("2005-07-15"), -# as.Date("2005-01-15"), -# as.Date("2005-07-15") -# ), -# cohortEndDate = c( -# as.Date("2005-05-15"), -# as.Date("2005-09-15"), -# as.Date("2005-05-15"), -# as.Date("2005-09-15") -# ) -# ) -# -# cohort <- dplyr::bind_rows( -# cohort, -# cohort %>% -# dplyr::mutate(cohortDefinitionId = cohortDefinitionId * 1000) -# ) -# -# cohortDefinitionSet <- -# cohort %>% -# dplyr::select(cohortDefinitionId) %>% -# dplyr::distinct() %>% -# dplyr::rename("cohortId" = "cohortDefinitionId") %>% -# dplyr::rowwise() %>% -# dplyr::mutate(json = RJSONIO::toJSON(list( -# cohortId = cohortId, -# randomString = c( -# sample(x = LETTERS, 5, replace = TRUE), -# sample(x = LETTERS, 4, replace = TRUE), -# sample(LETTERS, 1, replace = TRUE) -# ) -# ))) %>% -# dplyr::ungroup() %>% -# dplyr::mutate( -# sql = json, -# checksum = as.character(CohortDiagnostics:::computeChecksum(json)) -# ) %>% -# dplyr::ungroup() -# -# unlink( -# x = exportFolder, -# recursive = TRUE, -# force = TRUE -# ) -# dir.create( -# path = exportFolder, -# showWarnings = FALSE, -# recursive = TRUE -# ) -# -# cohortTable <- -# paste0( -# "ct_", -# format(Sys.time(), "%s"), -# sample(1:100, 1) -# ) -# -# DatabaseConnector::insertTable( -# connection = con, -# databaseSchema = server$cohortDatabaseSchema, -# tableName = cohortTable, -# data = cohort, -# dropTableIfExists = TRUE, -# createTable = TRUE, -# tempTable = FALSE, -# camelCaseToSnakeCase = TRUE, -# progressBar = FALSE -# ) -# -# CohortDiagnostics::runTimeSeries( -# connection = con, -# tempEmulationSchema = server$tempEmulationSchema, -# cdmDatabaseSchema = server$cdmDatabaseSchema, -# cohortDatabaseSchema = server$cohortDatabaseSchema, -# cohortTable = cohortTable, -# cohortDefinitionSet = cohortDefinitionSet %>% -# dplyr::filter(cohortId %in% c(1, 2)), -# runCohortTimeSeries = TRUE, -# runDataSourceTimeSeries = FALSE, -# databaseId = "testDatabaseId", -# exportFolder = exportFolder, -# minCellCount = 0, -# instantiatedCohorts = cohort$cohortDefinitionId, -# incremental = incremental, -# recordKeepingFile = recordKeepingFile, -# observationPeriodDateRange = dplyr::tibble( -# observationPeriodMinDate = as.Date("2004-01-01"), -# observationPeriodMaxDate = as.Date("2007-12-31") -# ), -# batchSize = 1 -# ) -# -# # result -# timeSeriesResults <- -# readr::read_csv( -# file = file.path(exportFolder, "time_series.csv"), -# col_types = readr::cols() -# ) -# print(timeSeriesResults) -# -# testthat::expect_equal( -# object = timeSeriesResults$cohort_id %>% unique() %>% sort(), -# expected = c(1, 2) -# ) -# -# subset <- CohortDiagnostics:::subsetToRequiredCohorts( -# cohorts = cohortDefinitionSet, -# task = "runCohortTimeSeries", -# incremental = incremental -# ) %>% -# dplyr::arrange(cohortId) -# -# testthat::expect_equal( -# object = subset$cohortId, -# expected = c(1, 2, 1000, 2000) -# ) -# }) -# }) -# -# test_that("Testing cohort time series execution, incremental = TRUE", { -# testServer <- "sqlite" -# skip_if_not(testServer %in% names(testServers)) -# server <- testServers[[testServer]] -# con <- DatabaseConnector::connect(server$connectionDetails) -# exportFolder <- file.path(tempdir(), paste0(testServer, "exp")) -# recordKeepingFile <- file.path(exportFolder, "record.csv") -# incremental <- TRUE -# -# with_dbc_connection(con, { -# cohort <- dplyr::tibble( -# cohortDefinitionId = c(1, 1, 2, 2), -# subjectId = c(1, 1, 1, 2), -# cohortStartDate = c( -# as.Date("2005-01-15"), -# as.Date("2005-07-15"), -# as.Date("2005-01-15"), -# as.Date("2005-07-15") -# ), -# cohortEndDate = c( -# as.Date("2005-05-15"), -# as.Date("2005-09-15"), -# as.Date("2005-05-15"), -# as.Date("2005-09-15") -# ) -# ) -# -# cohort <- dplyr::bind_rows( -# cohort, -# cohort %>% -# dplyr::mutate(cohortDefinitionId = cohortDefinitionId * 1000) -# ) -# -# cohortDefinitionSet <- -# cohort %>% -# dplyr::select(cohortDefinitionId) %>% -# dplyr::distinct() %>% -# dplyr::rename("cohortId" = "cohortDefinitionId") %>% -# dplyr::rowwise() %>% -# dplyr::mutate(json = RJSONIO::toJSON(list( -# cohortId = cohortId, -# randomString = c( -# sample(x = LETTERS, 5, replace = TRUE), -# sample(x = LETTERS, 4, replace = TRUE), -# sample(LETTERS, 1, replace = TRUE) -# ) -# ))) %>% -# dplyr::ungroup() %>% -# dplyr::mutate( -# sql = json, -# checksum = as.character(CohortDiagnostics:::computeChecksum(json)) -# ) %>% -# dplyr::ungroup() -# -# unlink( -# x = exportFolder, -# recursive = TRUE, -# force = TRUE -# ) -# dir.create( -# path = exportFolder, -# showWarnings = FALSE, -# recursive = TRUE -# ) -# -# cohortTable <- -# paste0( -# "ct_", -# format(Sys.time(), "%s"), -# sample(1:100, 1) -# ) -# -# DatabaseConnector::insertTable( -# connection = con, -# databaseSchema = server$cohortDatabaseSchema, -# tableName = cohortTable, -# data = cohort, -# dropTableIfExists = TRUE, -# createTable = TRUE, -# tempTable = FALSE, -# camelCaseToSnakeCase = TRUE, -# progressBar = FALSE -# ) -# -# CohortDiagnostics::runTimeSeries( -# connection = con, -# tempEmulationSchema = server$tempEmulationSchema, -# cdmDatabaseSchema = server$cdmDatabaseSchema, -# cohortDatabaseSchema = server$cohortDatabaseSchema, -# cohortTable = cohortTable, -# cohortDefinitionSet = cohortDefinitionSet %>% -# dplyr::filter(cohortId %in% c(1, 2)), -# runCohortTimeSeries = TRUE, -# runDataSourceTimeSeries = FALSE, -# databaseId = "testDatabaseId", -# exportFolder = exportFolder, -# minCellCount = 0, -# instantiatedCohorts = cohort$cohortDefinitionId, -# incremental = incremental, -# recordKeepingFile = recordKeepingFile, -# observationPeriodDateRange = dplyr::tibble( -# observationPeriodMinDate = as.Date("2004-01-01"), -# observationPeriodMaxDate = as.Date("2007-12-31") -# ), -# batchSize = 1 -# ) -# -# recordKeepingFileData <- -# readr::read_csv( -# file = recordKeepingFile, -# col_types = readr::cols() -# ) -# -# # testing if check sum is written -# testthat::expect_true("checksum" %in% colnames(recordKeepingFileData)) -# -# # result -# timeSeriesResults1 <- -# readr::read_csv( -# file = file.path(exportFolder, "time_series.csv"), -# col_types = readr::cols() -# ) -# -# subset <- CohortDiagnostics:::subsetToRequiredCohorts( -# cohorts = cohortDefinitionSet, -# task = "runCohortTimeSeries", -# incremental = incremental, -# recordKeepingFile = recordKeepingFile -# ) %>% -# dplyr::arrange(cohortId) -# -# testthat::expect_equal( -# object = subset$cohortId, -# expected = c(1000, 2000) -# ) -# -# # delete the previously written results file. To see if the previously executed cohorts will have results after deletion -# unlink( -# x = file.path(exportFolder, "time_series.csv"), -# recursive = TRUE, -# force = TRUE -# ) -# -# CohortDiagnostics::runTimeSeries( -# connection = con, -# tempEmulationSchema = server$tempEmulationSchema, -# cdmDatabaseSchema = server$cdmDatabaseSchema, -# cohortDatabaseSchema = server$cohortDatabaseSchema, -# cohortTable = cohortTable, -# cohortDefinitionSet = cohortDefinitionSet, -# runCohortTimeSeries = TRUE, -# runDataSourceTimeSeries = FALSE, -# databaseId = "testDatabaseId", -# exportFolder = exportFolder, -# minCellCount = 0, -# instantiatedCohorts = cohort$cohortDefinitionId, -# incremental = incremental, -# recordKeepingFile = recordKeepingFile, -# observationPeriodDateRange = dplyr::tibble( -# observationPeriodMinDate = as.Date("2004-01-01"), -# observationPeriodMaxDate = as.Date("2007-12-31") -# ), -# batchSize = 100 -# ) -# resultsNew <- -# readr::read_csv( -# file = file.path(exportFolder, "time_series.csv"), -# col_types = readr::cols() -# ) -# -# testthat::expect_equal( -# object = resultsNew$cohort_id %>% unique() %>% sort(), -# expected = c(1000, 2000) -# ) -# }) -# }) -# -# test_that("Testing Data source time series execution, incremental = FALSE ", { -# testServer <- "sqlite" -# skip_if_not(testServer %in% names(testServers)) -# server <- testServers[[testServer]] -# con <- DatabaseConnector::connect(server$connectionDetails) -# exportFolder <- file.path(tempdir(), paste0(testServer, "exp")) -# recordKeepingFile <- file.path(exportFolder, "record.csv") -# incremental <- FALSE -# -# with_dbc_connection(con, { -# cohortDefinitionSet <- dplyr::tibble( -# cohortId = -44819062, -# # cohort id is identified by an omop concept id https://athena.ohdsi.org/search-terms/terms/44819062 -# checksum = CohortDiagnostics:::computeChecksum(column = "data source time series") -# ) -# -# unlink( -# x = exportFolder, -# recursive = TRUE, -# force = TRUE -# ) -# dir.create( -# path = exportFolder, -# showWarnings = FALSE, -# recursive = TRUE -# ) -# -# CohortDiagnostics::runTimeSeries( -# connection = con, -# tempEmulationSchema = server$tempEmulationSchema, -# cdmDatabaseSchema = server$cdmDatabaseSchema, -# cohortDatabaseSchema = server$cohortDatabaseSchema, -# cohortDefinitionSet = data.frame(), -# runCohortTimeSeries = FALSE, -# runDataSourceTimeSeries = TRUE, -# databaseId = "testDatabaseId", -# exportFolder = exportFolder, -# minCellCount = 0, -# incremental = incremental, -# observationPeriodDateRange = dplyr::tibble( -# observationPeriodMinDate = as.Date("2004-01-01"), -# observationPeriodMaxDate = as.Date("2007-12-31") -# ) -# ) -# -# # result -# dataSourceTimeSeriesResult <- readr::read_csv(file = file.path(exportFolder, "time_series.csv"), -# col_types = readr::cols()) -# -# subset <- subsetToRequiredCohorts( -# cohorts = cohortDefinitionSet, -# task = "runDataSourceTimeSeries", -# incremental = incremental -# ) %>% -# dplyr::arrange(cohortId) -# -# testthat::expect_equal( -# object = nrow(subset), -# expected = 1 -# ) -# }) -# }) -# -# test_that("Testing Data source time series execution, incremental = TRUE ", { -# testServer <- "sqlite" -# skip_if_not(testServer %in% names(testServers)) -# server <- testServers[[testServer]] -# con <- DatabaseConnector::connect(server$connectionDetails) -# exportFolder <- file.path(tempdir(), paste0(testServer, "exp")) -# recordKeepingFile <- file.path(exportFolder, "record.csv") -# incremental <- TRUE -# -# with_dbc_connection(con, { -# cohortDefinitionSet <- dplyr::tibble( -# cohortId = -44819062, -# # cohort id is identified by an omop concept id https://athena.ohdsi.org/search-terms/terms/44819062 -# checksum = CohortDiagnostics:::computeChecksum(column = "data source time series") -# ) -# -# unlink( -# x = exportFolder, -# recursive = TRUE, -# force = TRUE -# ) -# dir.create( -# path = exportFolder, -# showWarnings = FALSE, -# recursive = TRUE -# ) -# -# CohortDiagnostics::runTimeSeries( -# connection = con, -# tempEmulationSchema = server$tempEmulationSchema, -# cdmDatabaseSchema = server$cdmDatabaseSchema, -# cohortDatabaseSchema = server$cohortDatabaseSchema, -# cohortDefinitionSet = data.frame(), -# runCohortTimeSeries = FALSE, -# runDataSourceTimeSeries = TRUE, -# databaseId = "testDatabaseId", -# exportFolder = exportFolder, -# minCellCount = 0, -# incremental = incremental, -# recordKeepingFile = recordKeepingFile, -# observationPeriodDateRange = dplyr::tibble( -# observationPeriodMinDate = as.Date("2004-01-01"), -# observationPeriodMaxDate = as.Date("2007-12-31") -# ) -# ) -# -# recordKeepingFileData <- readr::read_csv(file = recordKeepingFile, -# col_types = readr::cols()) -# -# # testing if check sum is written -# testthat::expect_true("checksum" %in% colnames(recordKeepingFileData)) -# testthat::expect_equal(object = recordKeepingFileData$cohortId, expected = -44819062) -# -# # result -# dataSourceTimeSeriesResult <- readr::read_csv(file = file.path(exportFolder, "time_series.csv"), -# col_types = readr::cols()) -# -# subset <- subsetToRequiredCohorts( -# cohorts = cohortDefinitionSet, -# task = "runDataSourceTimeSeries", -# incremental = incremental, -# recordKeepingFile = recordKeepingFile -# ) %>% -# dplyr::arrange(cohortId) -# -# testthat::expect_equal( -# object = nrow(subset), -# expected = 0 -# ) -# }) -# }) +# Test getTimeSeries on all testServers +for (nm in names(testServers)) { + + server <- testServers[[nm]] + con <- connect(server$connectionDetails) + exportFolder <- file.path(tempdir(), paste0(nm, "exp")) + recordKeepingFile <- file.path(exportFolder, "record.csv") + + test_that("Testing time series logic", { + skip_if(skipCdmTests, "cdm settings not configured") + + # to do - with incremental = FALSE + with_dbc_connection(con, { + # manually create cohort table and load to table + # Cohort table has a total of four records, with each cohort id having two each + # cohort 1 has one subject with two different cohort entries + # cohort 2 has two subject with two different cohort entries + cohort <- dplyr::tibble( + cohortDefinitionId = c(1, 1, 2, 2), + subjectId = c(1, 1, 1, 2), + cohortStartDate = c(as.Date("2005-01-15"), as.Date("2005-07-15"), as.Date("2005-01-15"), as.Date("2005-07-15")), + cohortEndDate = c(as.Date("2005-05-15"), as.Date("2005-09-15"), as.Date("2005-05-15"), as.Date("2005-09-15")) + ) + + cohortTable <- + paste0("ct_", Sys.getpid(), format(Sys.time(), "%s"), sample(1:100, 1)) + + DatabaseConnector::insertTable( + connection = con, + databaseSchema = server$cohortDatabaseSchema, + tableName = cohortTable, + data = cohort, + dropTableIfExists = TRUE, + createTable = TRUE, + tempTable = FALSE, + camelCaseToSnakeCase = TRUE, + progressBar = FALSE + ) + + timeSeries <- CohortDiagnostics:::getTimeSeries( + connection = con, + tempEmulationSchema = server$tempEmulationSchema, + cdmDatabaseSchema = server$cdmDatabaseSchema, + cohortDatabaseSchema = server$cohortDatabaseSchema, + cohortTable = cohortTable, + runCohortTimeSeries = TRUE, + runDataSourceTimeSeries = FALSE, # cannot test data source time series because we are using simulated cohort table + timeSeriesMinDate = as.Date("2004-01-01"), + timeSeriesMaxDate = as.Date("2006-12-31"), + cohortIds = c(1, 2), + stratifyByGender = FALSE, # cannot test stratification because it will require cohort table to be built from cdm + stratifyByAgeGroup = FALSE # this test is using simulated cohort table + ) + + # testing if values returned for cohort 1 is as expected + timeSeriesCohort <- timeSeries %>% + dplyr::filter(.data$cohortId == 1) %>% + dplyr::filter(.data$seriesType == "T1") %>% + dplyr::filter(.data$calendarInterval == "m") + + # there should be 8 records in this data frame, representing 8 months for the one subject in the cohort id = 1 + testthat::expect_equal( + object = nrow(timeSeriesCohort), + expected = 8 + ) + + # there should be 2 records in this data frame, representing the 2 starts for the one subject in the cohort id = 1 + testthat::expect_equal( + object = nrow(timeSeriesCohort %>% dplyr::filter(.data$recordsStart == 1)), + expected = 2 + ) + + # there should be 1 records in this data frame, representing the 1 incident start for the one subject in the cohort id = 1 + testthat::expect_equal( + object = nrow(timeSeriesCohort %>% dplyr::filter(.data$subjectsStartIn == 1)), + expected = 1 + ) + }) + }) +} + +test_that("Testing cohort time series execution, incremental = FALSE", { + testServer <- "sqlite" + skip_if_not(testServer %in% names(testServers)) + server <- testServers[[testServer]] + con <- DatabaseConnector::connect(server$connectionDetails) + exportFolder <- file.path(tempdir(), paste0(testServer, "exp")) + recordKeepingFile <- file.path(exportFolder, "record.csv") + incremental <- FALSE + + with_dbc_connection(con, { + cohort <- dplyr::tibble( + cohortDefinitionId = c(1, 1, 2, 2), + subjectId = c(1, 1, 1, 2), + cohortStartDate = c( + as.Date("2005-01-15"), + as.Date("2005-07-15"), + as.Date("2005-01-15"), + as.Date("2005-07-15") + ), + cohortEndDate = c( + as.Date("2005-05-15"), + as.Date("2005-09-15"), + as.Date("2005-05-15"), + as.Date("2005-09-15") + ) + ) + + cohort <- dplyr::bind_rows( + cohort, + cohort %>% + dplyr::mutate(cohortDefinitionId = cohortDefinitionId * 1000) + ) + + cohortDefinitionSet <- + cohort %>% + dplyr::select(cohortDefinitionId) %>% + dplyr::distinct() %>% + dplyr::rename("cohortId" = "cohortDefinitionId") %>% + dplyr::rowwise() %>% + dplyr::mutate(json = RJSONIO::toJSON(list( + cohortId = cohortId, + randomString = c( + sample(x = LETTERS, 5, replace = TRUE), + sample(x = LETTERS, 4, replace = TRUE), + sample(LETTERS, 1, replace = TRUE) + ) + ))) %>% + dplyr::ungroup() %>% + dplyr::mutate( + sql = json, + checksum = as.character(CohortDiagnostics:::computeChecksum(json)) + ) %>% + dplyr::ungroup() + + unlink( + x = exportFolder, + recursive = TRUE, + force = TRUE + ) + dir.create( + path = exportFolder, + showWarnings = FALSE, + recursive = TRUE + ) + + cohortTable <- + paste0( + "ct_", + format(Sys.time(), "%s"), + sample(1:100, 1) + ) + + DatabaseConnector::insertTable( + connection = con, + databaseSchema = server$cohortDatabaseSchema, + tableName = cohortTable, + data = cohort, + dropTableIfExists = TRUE, + createTable = TRUE, + tempTable = FALSE, + camelCaseToSnakeCase = TRUE, + progressBar = FALSE + ) + + CohortDiagnostics::runTimeSeries( + connection = con, + tempEmulationSchema = server$tempEmulationSchema, + cdmDatabaseSchema = server$cdmDatabaseSchema, + cohortDatabaseSchema = server$cohortDatabaseSchema, + cohortTable = cohortTable, + cohortDefinitionSet = cohortDefinitionSet %>% + dplyr::filter(cohortId %in% c(1, 2)), + runCohortTimeSeries = TRUE, + runDataSourceTimeSeries = FALSE, + databaseId = "testDatabaseId", + exportFolder = exportFolder, + minCellCount = 0, + instantiatedCohorts = cohort$cohortDefinitionId, + incremental = incremental, + recordKeepingFile = recordKeepingFile, + observationPeriodDateRange = dplyr::tibble( + observationPeriodMinDate = as.Date("2004-01-01"), + observationPeriodMaxDate = as.Date("2007-12-31") + ), + batchSize = 1 + ) + + # result + timeSeriesResults <- + readr::read_csv( + file = file.path(exportFolder, "time_series.csv"), + col_types = readr::cols() + ) + print(timeSeriesResults) + + testthat::expect_equal( + object = timeSeriesResults$cohort_id %>% unique() %>% sort(), + expected = c(1, 2) + ) + + subset <- CohortDiagnostics:::subsetToRequiredCohorts( + cohorts = cohortDefinitionSet, + task = "runCohortTimeSeries", + incremental = incremental + ) %>% + dplyr::arrange(cohortId) + + testthat::expect_equal( + object = subset$cohortId, + expected = c(1, 2, 1000, 2000) + ) + }) +}) + +test_that("Testing cohort time series execution, incremental = TRUE", { + testServer <- "sqlite" + skip_if_not(testServer %in% names(testServers)) + server <- testServers[[testServer]] + con <- DatabaseConnector::connect(server$connectionDetails) + exportFolder <- file.path(tempdir(), paste0(testServer, "exp")) + recordKeepingFile <- file.path(exportFolder, "record.csv") + incremental <- TRUE + + with_dbc_connection(con, { + cohort <- dplyr::tibble( + cohortDefinitionId = c(1, 1, 2, 2), + subjectId = c(1, 1, 1, 2), + cohortStartDate = c( + as.Date("2005-01-15"), + as.Date("2005-07-15"), + as.Date("2005-01-15"), + as.Date("2005-07-15") + ), + cohortEndDate = c( + as.Date("2005-05-15"), + as.Date("2005-09-15"), + as.Date("2005-05-15"), + as.Date("2005-09-15") + ) + ) + + cohort <- dplyr::bind_rows( + cohort, + cohort %>% + dplyr::mutate(cohortDefinitionId = cohortDefinitionId * 1000) + ) + + cohortDefinitionSet <- + cohort %>% + dplyr::select(cohortDefinitionId) %>% + dplyr::distinct() %>% + dplyr::rename("cohortId" = "cohortDefinitionId") %>% + dplyr::rowwise() %>% + dplyr::mutate(json = RJSONIO::toJSON(list( + cohortId = cohortId, + randomString = c( + sample(x = LETTERS, 5, replace = TRUE), + sample(x = LETTERS, 4, replace = TRUE), + sample(LETTERS, 1, replace = TRUE) + ) + ))) %>% + dplyr::ungroup() %>% + dplyr::mutate( + sql = json, + checksum = as.character(CohortDiagnostics:::computeChecksum(json)) + ) %>% + dplyr::ungroup() + + unlink( + x = exportFolder, + recursive = TRUE, + force = TRUE + ) + dir.create( + path = exportFolder, + showWarnings = FALSE, + recursive = TRUE + ) + + cohortTable <- + paste0( + "ct_", + format(Sys.time(), "%s"), + sample(1:100, 1) + ) + + DatabaseConnector::insertTable( + connection = con, + databaseSchema = server$cohortDatabaseSchema, + tableName = cohortTable, + data = cohort, + dropTableIfExists = TRUE, + createTable = TRUE, + tempTable = FALSE, + camelCaseToSnakeCase = TRUE, + progressBar = FALSE + ) + + CohortDiagnostics::runTimeSeries( + connection = con, + tempEmulationSchema = server$tempEmulationSchema, + cdmDatabaseSchema = server$cdmDatabaseSchema, + cohortDatabaseSchema = server$cohortDatabaseSchema, + cohortTable = cohortTable, + cohortDefinitionSet = cohortDefinitionSet %>% + dplyr::filter(cohortId %in% c(1, 2)), + runCohortTimeSeries = TRUE, + runDataSourceTimeSeries = FALSE, + databaseId = "testDatabaseId", + exportFolder = exportFolder, + minCellCount = 0, + instantiatedCohorts = cohort$cohortDefinitionId, + incremental = incremental, + recordKeepingFile = recordKeepingFile, + observationPeriodDateRange = dplyr::tibble( + observationPeriodMinDate = as.Date("2004-01-01"), + observationPeriodMaxDate = as.Date("2007-12-31") + ), + batchSize = 1 + ) + + recordKeepingFileData <- + readr::read_csv( + file = recordKeepingFile, + col_types = readr::cols() + ) + + # testing if check sum is written + testthat::expect_true("checksum" %in% colnames(recordKeepingFileData)) + + # result + timeSeriesResults1 <- + readr::read_csv( + file = file.path(exportFolder, "time_series.csv"), + col_types = readr::cols() + ) + + subset <- CohortDiagnostics:::subsetToRequiredCohorts( + cohorts = cohortDefinitionSet, + task = "runCohortTimeSeries", + incremental = incremental, + recordKeepingFile = recordKeepingFile + ) %>% + dplyr::arrange(cohortId) + + testthat::expect_equal( + object = subset$cohortId, + expected = c(1000, 2000) + ) + + # delete the previously written results file. To see if the previously executed cohorts will have results after deletion + unlink( + x = file.path(exportFolder, "time_series.csv"), + recursive = TRUE, + force = TRUE + ) + + CohortDiagnostics::runTimeSeries( + connection = con, + tempEmulationSchema = server$tempEmulationSchema, + cdmDatabaseSchema = server$cdmDatabaseSchema, + cohortDatabaseSchema = server$cohortDatabaseSchema, + cohortTable = cohortTable, + cohortDefinitionSet = cohortDefinitionSet, + runCohortTimeSeries = TRUE, + runDataSourceTimeSeries = FALSE, + databaseId = "testDatabaseId", + exportFolder = exportFolder, + minCellCount = 0, + instantiatedCohorts = cohort$cohortDefinitionId, + incremental = incremental, + recordKeepingFile = recordKeepingFile, + observationPeriodDateRange = dplyr::tibble( + observationPeriodMinDate = as.Date("2004-01-01"), + observationPeriodMaxDate = as.Date("2007-12-31") + ), + batchSize = 100 + ) + resultsNew <- + readr::read_csv( + file = file.path(exportFolder, "time_series.csv"), + col_types = readr::cols() + ) + + testthat::expect_equal( + object = resultsNew$cohort_id %>% unique() %>% sort(), + expected = c(1000, 2000) + ) + }) +}) + +test_that("Testing Data source time series execution, incremental = FALSE ", { + testServer <- "sqlite" + skip_if_not(testServer %in% names(testServers)) + server <- testServers[[testServer]] + con <- DatabaseConnector::connect(server$connectionDetails) + exportFolder <- file.path(tempdir(), paste0(testServer, "exp")) + recordKeepingFile <- file.path(exportFolder, "record.csv") + incremental <- FALSE + + with_dbc_connection(con, { + cohortDefinitionSet <- dplyr::tibble( + cohortId = -44819062, + # cohort id is identified by an omop concept id https://athena.ohdsi.org/search-terms/terms/44819062 + checksum = CohortDiagnostics:::computeChecksum(column = "data source time series") + ) + + unlink( + x = exportFolder, + recursive = TRUE, + force = TRUE + ) + dir.create( + path = exportFolder, + showWarnings = FALSE, + recursive = TRUE + ) + + CohortDiagnostics::runTimeSeries( + connection = con, + tempEmulationSchema = server$tempEmulationSchema, + cdmDatabaseSchema = server$cdmDatabaseSchema, + cohortDatabaseSchema = server$cohortDatabaseSchema, + cohortDefinitionSet = data.frame(), + runCohortTimeSeries = FALSE, + runDataSourceTimeSeries = TRUE, + databaseId = "testDatabaseId", + exportFolder = exportFolder, + minCellCount = 0, + incremental = incremental, + observationPeriodDateRange = dplyr::tibble( + observationPeriodMinDate = as.Date("2004-01-01"), + observationPeriodMaxDate = as.Date("2007-12-31") + ) + ) + + # result + dataSourceTimeSeriesResult <- readr::read_csv(file = file.path(exportFolder, "time_series.csv"), + col_types = readr::cols()) + + subset <- subsetToRequiredCohorts( + cohorts = cohortDefinitionSet, + task = "runDataSourceTimeSeries", + incremental = incremental + ) %>% + dplyr::arrange(cohortId) + + testthat::expect_equal( + object = nrow(subset), + expected = 1 + ) + }) +}) + +test_that("Testing Data source time series execution, incremental = TRUE ", { + testServer <- "sqlite" + skip_if_not(testServer %in% names(testServers)) + server <- testServers[[testServer]] + con <- DatabaseConnector::connect(server$connectionDetails) + exportFolder <- file.path(tempdir(), paste0(testServer, "exp")) + recordKeepingFile <- file.path(exportFolder, "record.csv") + incremental <- TRUE + + with_dbc_connection(con, { + cohortDefinitionSet <- dplyr::tibble( + cohortId = -44819062, + # cohort id is identified by an omop concept id https://athena.ohdsi.org/search-terms/terms/44819062 + checksum = CohortDiagnostics:::computeChecksum(column = "data source time series") + ) + + unlink( + x = exportFolder, + recursive = TRUE, + force = TRUE + ) + dir.create( + path = exportFolder, + showWarnings = FALSE, + recursive = TRUE + ) + + CohortDiagnostics::runTimeSeries( + connection = con, + tempEmulationSchema = server$tempEmulationSchema, + cdmDatabaseSchema = server$cdmDatabaseSchema, + cohortDatabaseSchema = server$cohortDatabaseSchema, + cohortDefinitionSet = data.frame(), + runCohortTimeSeries = FALSE, + runDataSourceTimeSeries = TRUE, + databaseId = "testDatabaseId", + exportFolder = exportFolder, + minCellCount = 0, + incremental = incremental, + recordKeepingFile = recordKeepingFile, + observationPeriodDateRange = dplyr::tibble( + observationPeriodMinDate = as.Date("2004-01-01"), + observationPeriodMaxDate = as.Date("2007-12-31") + ) + ) + + recordKeepingFileData <- readr::read_csv(file = recordKeepingFile, + col_types = readr::cols()) + + # testing if check sum is written + testthat::expect_true("checksum" %in% colnames(recordKeepingFileData)) + testthat::expect_equal(object = recordKeepingFileData$cohortId, expected = -44819062) + + # result + dataSourceTimeSeriesResult <- readr::read_csv(file = file.path(exportFolder, "time_series.csv"), + col_types = readr::cols()) + + subset <- subsetToRequiredCohorts( + cohorts = cohortDefinitionSet, + task = "runDataSourceTimeSeries", + incremental = incremental, + recordKeepingFile = recordKeepingFile + ) %>% + dplyr::arrange(cohortId) + + testthat::expect_equal( + object = nrow(subset), + expected = 0 + ) + }) +}) diff --git a/tests/testthat/test-runVisitContext.R b/tests/testthat/test-runVisitContext.R index 39e78990d..d0d0948c7 100644 --- a/tests/testthat/test-runVisitContext.R +++ b/tests/testthat/test-runVisitContext.R @@ -1,565 +1,565 @@ -# library(SqlRender) -# library(readxl) -# library(dplyr) -# library(readr) -# -# -# for (nm in names(testServers)) { -# -# server <- testServers[[nm]] -# -# con <- DatabaseConnector::connect(server$connectionDetails) -# -# exportFolder <- tempfile() -# -# dir.create(exportFolder) -# -# test_that(paste("test temporary table #concept_ids creation"), { -# -# DatabaseConnector::disconnect(con) -# con <- DatabaseConnector::connect(server$connectionDetails) -# -# getVisitContext(connection = con, -# cdmDatabaseSchema = server$cdmDatabaseSchema, -# tempEmulationSchema = server$tempEmulationSchema, -# cohortDatabaseSchema = server$cohortDatabaseSchema, -# cohortTable = server$cohortTable, -# cohortIds = server$cohortIds, -# conceptIdTable = "#concept_ids", -# cdmVersion = 5 -# ) -# -# expect_true(tempTableExists(con, "concept_ids")) -# -# }) -# -# -# test_that(paste("test no duplicates in concept_ids table for getVisitContext function"), { -# -# DatabaseConnector::disconnect(con) -# con <- DatabaseConnector::connect(server$connectionDetails) -# -# sql <- "SELECT * FROM #concept_ids" -# -# translatedSql <- translate(sql, targetDialect = server$connectionDetails$dbms) -# -# firstTime <- system.time( -# -# visitContextResult <- getVisitContext(connection = con, -# cdmDatabaseSchema = server$cdmDatabaseSchema, -# tempEmulationSchema = server$tempEmulationSchema, -# cohortDatabaseSchema = server$cohortDatabaseSchema, -# cohortTable = server$cohortTable, -# cohortIds = server$cohortIds, -# conceptIdTable = "#concept_ids", -# cdmVersion = 5 -# ) -# ) -# -# firstResult <- querySql(con, translatedSql) -# -# secondTime <- system.time( -# -# visitContextResult <- getVisitContext(connection = con, -# cdmDatabaseSchema = server$cdmDatabaseSchema, -# tempEmulationSchema = server$tempEmulationSchema, -# cohortDatabaseSchema = server$cohortDatabaseSchema, -# cohortTable = server$cohortTable, -# cohortIds = server$cohortIds, -# conceptIdTable = "#concept_ids", -# cdmVersion = 5 -# ) -# ) -# -# secondResult <- querySql(con, translatedSql) -# -# expect_equal(firstResult, secondResult) -# -# }) -# -# -# # For testing the runVisitContext, there is no need to run it on multiple database systems -# # since no sql other than the one included in the getVisitContext is executed. -# if (nm == "sqlite"){ -# -# test_that(paste("test that when incremental is FALSE the incremental file is not generated"), { -# -# DatabaseConnector::disconnect(con) -# con <- DatabaseConnector::connect(server$connectionDetails) -# exportFolder <- tempfile() -# dir.create(exportFolder) -# -# -# expect_false(file.exists(file.path(exportFolder,"incremental"))) -# -# runVisitContext(connection = con, -# cohortDefinitionSet = server$cohortDefinitionSet, -# exportFolder = exportFolder, -# databaseId = nm , -# cohortDatabaseSchema = server$cohortDatabaseSchema, -# cdmDatabaseSchema = server$cdmDatabaseSchema, -# minCellCount = 0, -# incremental = FALSE -# ) -# -# expect_false(file.exists(file.path(exportFolder,"incremental"))) -# }) -# -# test_that(paste("test that when incremental is TRUE the incremental file is generated when it doesn't exist"), { -# -# DatabaseConnector::disconnect(con) -# con <- DatabaseConnector::connect(server$connectionDetails) -# exportFolder <- tempfile() -# dir.create(exportFolder) -# -# expect_false(file.exists(file.path(exportFolder, "incremental"))) -# -# runVisitContext(connection = con, -# cohortDefinitionSet = server$cohortDefinitionSet, -# exportFolder = exportFolder, -# databaseId = nm , -# cohortDatabaseSchema = server$cohortDatabaseSchema, -# cdmDatabaseSchema = server$cdmDatabaseSchema, -# minCellCount = 0, -# incremental = TRUE -# ) -# -# expect_true(file.exists(file.path(exportFolder, "incremental"))) -# -# }) -# -# -# test_that(paste("test that the output file visit_context.csv is generated and is identical with the output of getVisitContext()"), { -# -# DatabaseConnector::disconnect(con) -# con <- DatabaseConnector::connect(server$connectionDetails) -# exportFolder <- tempfile() -# dir.create(exportFolder) -# -# getVisitContextResult <- getVisitContext(connection = con, -# cdmDatabaseSchema = server$cdmDatabaseSchema, -# tempEmulationSchema = server$tempEmulationSchema, -# cohortDatabaseSchema = server$cohortDatabaseSchema, -# cohortTable = server$cohortTable, -# cohortIds = server$cohortIds, -# conceptIdTable = "#concept_ids", -# cdmVersion = 5 -# ) -# -# getVisitContextResult <- unname(getVisitContextResult) -# -# runVisitContext(connection = con, -# cohortDefinitionSet = server$cohortDefinitionSet, -# exportFolder = exportFolder, -# databaseId = nm, -# cohortTable = server$cohortTable, -# cohortDatabaseSchema = server$cohortDatabaseSchema, -# cdmDatabaseSchema = server$cdmDatabaseSchema, -# minCellCount = 0, -# incremental = FALSE -# ) -# -# resultCsv <- file.path(exportFolder, "visit_context.csv") -# -# expect_true(file.exists(resultCsv)) -# -# runVisitContextResult <- read.csv(resultCsv, header = TRUE, sep = ",") -# runVisitContextResult$database_id <- NULL -# runVisitContextResult <- unname(runVisitContextResult) -# -# expect_equal(getVisitContextResult, runVisitContextResult) -# -# }) -# -# -# test_that(paste("test that incremental logic is correct: incremental run for the first time"), { -# -# DatabaseConnector::disconnect(con) -# con <- DatabaseConnector::connect(server$connectionDetails) -# exportFolder <- tempfile() -# dir.create(exportFolder) -# -# cohortIds <- c(17492) -# -# runVisitContext(connection = con, -# cohortDefinitionSet = loadTestCohortDefinitionSet(cohortIds, useSubsets = FALSE), -# exportFolder = exportFolder, -# databaseId = nm, -# cohortTable = server$cohortTable, -# cohortDatabaseSchema = server$cohortDatabaseSchema, -# cdmDatabaseSchema = server$cdmDatabaseSchema, -# minCellCount = 0, -# incremental = TRUE -# ) -# -# resultCsv <- file.path(exportFolder, "visit_context.csv") -# -# expect_true(file.exists(resultCsv)) -# -# results <- read.csv(resultCsv, header = TRUE, stringsAsFactors = FALSE) -# -# # Csv should contain results only from the specified cohort -# expect_equal(unique(results$cohort_id), c(17492)) -# -# }) -# -# test_that(paste("test that incremental logic is correct: no new cohorts"), { -# -# DatabaseConnector::disconnect(con) -# con <- DatabaseConnector::connect(server$connectionDetails) -# exportFolder <- tempfile() -# dir.create(exportFolder) -# -# cohortIds <- c(17492) -# -# runVisitContext(connection = con, -# cohortDefinitionSet = loadTestCohortDefinitionSet(cohortIds, useSubsets = FALSE), -# exportFolder = exportFolder, -# databaseId = nm, -# cohortTable = server$cohortTable, -# cohortDatabaseSchema = server$cohortDatabaseSchema, -# cdmDatabaseSchema = server$cdmDatabaseSchema, -# minCellCount = 0, -# incremental = TRUE -# ) -# -# resultCsv <- file.path(exportFolder, "visit_context.csv") -# -# expect_true(file.exists(resultCsv)) -# -# results1 <- read.csv(resultCsv, header = TRUE, stringsAsFactors = FALSE) -# -# runVisitContext(connection = con, -# cohortDefinitionSet = loadTestCohortDefinitionSet(cohortIds, useSubsets = FALSE), -# exportFolder = exportFolder, -# databaseId = nm, -# cohortTable = server$cohortTable, -# cohortDatabaseSchema = server$cohortDatabaseSchema, -# cdmDatabaseSchema = server$cdmDatabaseSchema, -# minCellCount = 0, -# incremental = TRUE -# ) -# -# resultCsv <- file.path(exportFolder, "visit_context.csv") -# -# expect_true(file.exists(resultCsv)) -# -# results2 <- read.csv(resultCsv, header = TRUE, stringsAsFactors = FALSE) -# -# # Csv should contain the same result after the first run and the second run as no new cohorts were added -# expect_equal(results1, results2) -# -# }) -# -# test_that(paste("test that incremental logic is correct: output visit_context.csv must contain results for new cohorts"), { -# -# DatabaseConnector::disconnect(con) -# con <- DatabaseConnector::connect(server$connectionDetails) -# exportFolder <- tempfile() -# dir.create(exportFolder) -# -# cohortIds <- c(17492) -# -# runVisitContext(connection = con, -# cohortDefinitionSet = loadTestCohortDefinitionSet(cohortIds, useSubsets = FALSE), -# exportFolder = exportFolder, -# databaseId = nm, -# cohortTable = server$cohortTable, -# cohortDatabaseSchema = server$cohortDatabaseSchema, -# cdmDatabaseSchema = server$cdmDatabaseSchema, -# minCellCount = 0, -# incremental = TRUE -# ) -# -# resultCsv <- file.path(exportFolder, "visit_context.csv") -# -# expect_true(file.exists(resultCsv)) -# -# results1 <- read.csv(resultCsv, header = TRUE, stringsAsFactors = FALSE) -# -# # Csv should contain results only from the specified cohort -# expect_equal(unique(results1$cohort_id), c(17492)) -# -# cohortIds <- c(17492, 17493) -# -# runVisitContext(connection = con, -# cohortDefinitionSet = loadTestCohortDefinitionSet(cohortIds, useSubsets = FALSE), -# exportFolder = exportFolder, -# databaseId = nm, -# cohortTable = server$cohortTable, -# cohortDatabaseSchema = server$cohortDatabaseSchema, -# cdmDatabaseSchema = server$cdmDatabaseSchema, -# minCellCount = 0, -# incremental = TRUE -# ) -# -# resultCsv <- file.path(exportFolder, "visit_context.csv") -# -# expect_true(file.exists(resultCsv)) -# -# results2 <- read.csv(resultCsv, header = TRUE, stringsAsFactors = FALSE) -# -# # Csv should contain results from both runs, hence both cohorts -# expect_equal(unique(results2$cohort_id), c(17492, 17493)) -# -# }) -# -# test_that(paste("test that the export folder is created if is not already there"), { -# -# DatabaseConnector::disconnect(con) -# con <- DatabaseConnector::connect(server$connectionDetails) -# exportFolder <- tempfile() -# -# expect_false(dir.exists(exportFolder)) -# -# cohortIds <- c(17492) -# -# runVisitContext(connection = con, -# cohortDefinitionSet = loadTestCohortDefinitionSet(cohortIds, useSubsets = FALSE), -# exportFolder = exportFolder, -# databaseId = nm, -# cohortTable = server$cohortTable, -# cohortDatabaseSchema = server$cohortDatabaseSchema, -# cdmDatabaseSchema = server$cdmDatabaseSchema, -# minCellCount = 0, -# incremental = TRUE -# ) -# -# expect_true(dir.exists(exportFolder)) -# -# }) -# -# } -# } -# -# -# ##### Test cases with custom data ##### -# -# test_that(paste("test that the subject counts per cohort, visit concept and visit context are correct"), { -# -# cohortDataFilePath <- system.file("test_cases/runVisitContext/testSubjectCounts/test_getVisitContext_cohort.xlsx", package = "CohortDiagnostics") -# -# patientDataFilePath <- "test_cases/runVisitContext/testSubjectCounts/test_getVisitContext_patientData.json" -# -# connectionDetailsCustomCDM <- createCustomCdm(patientDataFilePath) -# -# -# connection <- DatabaseConnector::connect(connectionDetails = connectionDetailsCustomCDM) -# -# addCohortTable(connection, cohortDataFilePath) -# -# -# visitContextResult <- getVisitContext(connection = connection, -# cdmDatabaseSchema = "main", -# tempEmulationSchema = "main", -# cohortDatabaseSchema = "main", -# cohortTable = "cohort", -# cohortIds = list(1,2), -# conceptIdTable = "#concept_ids", -# cdmVersion = 5 -# ) -# -# resultPath <- system.file("test_cases/runVisitContext/testSubjectCounts/expectedResult.xlsx", package = "CohortDiagnostics") -# -# resultData <- readxl::read_excel(resultPath, col_types = c("numeric", "numeric", "text", -# "numeric")) -# -# visitContextResult <- visitContextResult[order(visitContextResult$cohortId, visitContextResult$visitConceptId, visitContextResult$visitContext, visitContextResult$subjects), ] -# visitContextResult <- as.data.frame(lapply(visitContextResult, as.character), stringsAsFactors = FALSE) -# -# resultData <- resultData[order(resultData$cohortId, resultData$visitConceptId, resultData$visitContext, resultData$subjects), ] -# resultData <- as.data.frame(lapply(resultData, as.character), stringsAsFactors = FALSE) -# -# are_equal <- identical(visitContextResult, resultData) -# -# expect_true(are_equal) -# -# }) -# -# test_that(paste("test that only the new visit_concept_id are inserted into the #concept_ids table"), { -# -# cohortDataFilePath <- system.file("test_cases/runVisitContext/testSubjectCounts/test_getVisitContext_cohort.xlsx", package = "CohortDiagnostics") -# -# patientDataFilePath <- "test_cases/runVisitContext/testSubjectCounts/test_getVisitContext_patientData.json" -# -# connectionDetailsCustomCDM <- createCustomCdm(patientDataFilePath) -# -# connection <- DatabaseConnector::connect(connectionDetails = connectionDetailsCustomCDM) -# -# addCohortTable(connection, cohortDataFilePath) -# -# getVisitContext(connection = connection, -# cdmDatabaseSchema = "main", -# tempEmulationSchema = "main", -# cohortDatabaseSchema = "main", -# cohortTable = "cohort", -# cohortIds = list(1,2), -# conceptIdTable = "#concept_ids", -# cdmVersion = 5 -# ) -# -# sql <- "select * from #concept_ids" -# -# translatedSQL <- translate(sql, targetDialect = "sqlite") -# -# res1 <- querySql(connection = connection, sql = translatedSQL) -# -# -# are_equal <- all(sort(unlist(list(262, 9201))) == sort(unlist(res1$CONCEPT_ID))) -# -# expect_true(are_equal) -# -# new_row <- data.frame( -# visit_occurrence_id = 5, -# person_id = 2, -# visit_concept_id = 261, -# visit_start_date = as.Date("2015-01-10"), -# visit_start_datetime = as.POSIXct("2015-01-10 08:00:00", format = "%Y-%m-%d %H:%M:%S", tz = "UTC"), -# visit_end_date = as.Date("2015-01-10"), -# visit_end_datetime = as.POSIXct("2015-01-10 18:00:00", format = "%Y-%m-%d %H:%M:%S", tz = "UTC"), -# visit_type_concept_id = 32817, -# provider_id = 1, -# care_site_id = 1, -# visit_source_value = 0, -# visit_source_concept_id = 0, -# admitting_source_concept_id = 8870, -# admitting_source_value = "TRANSFER FROM HOSPITAL", -# discharge_to_concept_id = 581476, -# discharge_to_source_value = "HOME HEALTH CARE", -# preceding_visit_occurrence_id = 0 -# ) -# -# DBI::dbAppendTable(connection, "visit_occurrence", new_row) -# -# getVisitContext(connection = connection, -# cdmDatabaseSchema = "main", -# tempEmulationSchema = "main", -# cohortDatabaseSchema = "main", -# cohortTable = "cohort", -# cohortIds = list(1,2), -# conceptIdTable = "#concept_ids", -# cdmVersion = 5 -# ) -# -# sql <- "select * from #concept_ids" -# -# translatedSQL <- translate(sql, targetDialect = "sqlite") -# -# res2 <- querySql(connection = connection, sql = translatedSQL) -# -# are_equal <- all(sort(unlist(list(262, 9201, 261))) == sort(unlist(res2$CONCEPT_ID))) -# -# expect_true(are_equal) -# }) -# -# -# -# test_that(paste("test that to infer subject counts per cohort, visit concept, and visit context, visits within 30 days before or after cohort creation are considered"), { -# -# cohortDataFilePath <- system.file("test_cases/runVisitContext/testSubjectCountsDates/test_getVisitContext_cohort.xlsx", package = "CohortDiagnostics") -# -# patientDataFilePath <- "test_cases/runVisitContext/testSubjectCountsDates/test_getVisitContext_patientData.json" -# -# connectionDetailsCustomCDM <- createCustomCdm(patientDataFilePath) -# -# connection <- DatabaseConnector::connect(connectionDetails = connectionDetailsCustomCDM) -# -# addCohortTable(connection, cohortDataFilePath) -# -# visitContextResult <- getVisitContext(connection = connection, -# cdmDatabaseSchema = "main", -# tempEmulationSchema = "main", -# cohortDatabaseSchema = "main", -# cohortTable = "cohort", -# cohortIds = list(1,2), -# conceptIdTable = "#concept_ids", -# cdmVersion = 5 -# ) -# -# resultPath <- system.file("test_cases/runVisitContext/testSubjectCountsDates/expectedResult.xlsx", package = "CohortDiagnostics") -# -# resultData <- readxl::read_excel(resultPath, col_types = c("numeric", "numeric", "text", -# "numeric")) -# -# visitContextResult <- visitContextResult[order(visitContextResult$cohortId, visitContextResult$visitConceptId, visitContextResult$visitContext, visitContextResult$subjects), ] -# visitContextResult <- as.data.frame(lapply(visitContextResult, as.character), stringsAsFactors = FALSE) -# -# resultData <- resultData[order(resultData$cohortId, resultData$visitConceptId, resultData$visitContext, resultData$subjects), ] -# resultData <- as.data.frame(lapply(resultData, as.character), stringsAsFactors = FALSE) -# -# are_equal <- identical(visitContextResult, resultData) -# -# expect_true(are_equal) -# -# }) -# -# test_that(paste("test that no other cohorts than the ones specified in cohortIds are included in the output"), { -# -# cohortDataFilePath <- system.file("test_cases/runVisitContext/testSubjectCounts/test_getVisitContext_cohort.xlsx", package = "CohortDiagnostics") -# -# patientDataFilePath <- "test_cases/runVisitContext/testSubjectCounts/test_getVisitContext_patientData.json" -# -# connectionDetailsCustomCDM <- createCustomCdm(patientDataFilePath) -# -# connection <- DatabaseConnector::connect(connectionDetails = connectionDetailsCustomCDM) -# -# addCohortTable(connection, cohortDataFilePath) -# -# visitContextResult <- getVisitContext(connection = connection, -# cdmDatabaseSchema = "main", -# tempEmulationSchema = "main", -# cohortDatabaseSchema = "main", -# cohortTable = "cohort", -# cohortIds = list(1), -# conceptIdTable = "#concept_ids", -# cdmVersion = 5 -# ) -# -# print(visitContextResult) -# expect_true(identical(unique(visitContextResult$cohortId), c(1))) -# -# }) -# -# test_that(paste("test that when the subjects in the cohort have no visits an empty data frame is returned"), { -# -# cohortDataFilePath <- system.file("test_cases/runVisitContext/testSubjectCountsNoVisits/test_getVisitContext_cohort.xlsx", package = "CohortDiagnostics") -# -# patientDataFilePath <- "test_cases/runVisitContext/testSubjectCountsNoVisits/test_getVisitContext_patientData.json" -# -# connectionDetailsCustomCDM <- createCustomCdm(patientDataFilePath) -# -# connection <- DatabaseConnector::connect(connectionDetails = connectionDetailsCustomCDM) -# -# addCohortTable(connection, cohortDataFilePath) -# -# sql <- "delete from visit_occurrence;" -# -# translatedSQL <- translate(sql = sql, targetDialect = "sqlite") -# -# executeSql(connection = connection, sql = translatedSQL) -# -# visitContextResult <- getVisitContext(connection = connection, -# cdmDatabaseSchema = "main", -# tempEmulationSchema = "main", -# cohortDatabaseSchema = "main", -# cohortTable = "cohort", -# cohortIds = list(1,2), -# conceptIdTable = "#concept_ids", -# cdmVersion = 5 -# ) -# -# resultPath <- system.file("test_cases/runVisitContext/testSubjectCountsNoVisits/expectedResult.xlsx", package = "CohortDiagnostics") -# -# resultData <- readxl::read_excel(resultPath, col_types = c("numeric", "numeric", "text", -# "numeric")) -# -# visitContextResult <- visitContextResult[order(visitContextResult$cohortId, visitContextResult$visitConceptId, visitContextResult$visitContext, visitContextResult$subjects), ] -# visitContextResult <- as.data.frame(lapply(visitContextResult, as.character), stringsAsFactors = FALSE) -# -# resultData <- resultData[order(resultData$cohortId, resultData$visitConceptId, resultData$visitContext, resultData$subjects), ] -# resultData <- as.data.frame(lapply(resultData, as.character), stringsAsFactors = FALSE) -# -# are_equal <- identical(visitContextResult, resultData) -# -# expect_true(are_equal) -# }) +library(SqlRender) +library(readxl) +library(dplyr) +library(readr) + + +for (nm in names(testServers)) { + + server <- testServers[[nm]] + + con <- DatabaseConnector::connect(server$connectionDetails) + + exportFolder <- tempfile() + + dir.create(exportFolder) + + test_that(paste("test temporary table #concept_ids creation"), { + + DatabaseConnector::disconnect(con) + con <- DatabaseConnector::connect(server$connectionDetails) + + getVisitContext(connection = con, + cdmDatabaseSchema = server$cdmDatabaseSchema, + tempEmulationSchema = server$tempEmulationSchema, + cohortDatabaseSchema = server$cohortDatabaseSchema, + cohortTable = server$cohortTable, + cohortIds = server$cohortIds, + conceptIdTable = "#concept_ids", + cdmVersion = 5 + ) + + expect_true(tempTableExists(con, "concept_ids")) + + }) + + + test_that(paste("test no duplicates in concept_ids table for getVisitContext function"), { + + DatabaseConnector::disconnect(con) + con <- DatabaseConnector::connect(server$connectionDetails) + + sql <- "SELECT * FROM #concept_ids" + + translatedSql <- translate(sql, targetDialect = server$connectionDetails$dbms) + + firstTime <- system.time( + + visitContextResult <- getVisitContext(connection = con, + cdmDatabaseSchema = server$cdmDatabaseSchema, + tempEmulationSchema = server$tempEmulationSchema, + cohortDatabaseSchema = server$cohortDatabaseSchema, + cohortTable = server$cohortTable, + cohortIds = server$cohortIds, + conceptIdTable = "#concept_ids", + cdmVersion = 5 + ) + ) + + firstResult <- querySql(con, translatedSql) + + secondTime <- system.time( + + visitContextResult <- getVisitContext(connection = con, + cdmDatabaseSchema = server$cdmDatabaseSchema, + tempEmulationSchema = server$tempEmulationSchema, + cohortDatabaseSchema = server$cohortDatabaseSchema, + cohortTable = server$cohortTable, + cohortIds = server$cohortIds, + conceptIdTable = "#concept_ids", + cdmVersion = 5 + ) + ) + + secondResult <- querySql(con, translatedSql) + + expect_equal(firstResult, secondResult) + + }) + + + # For testing the runVisitContext, there is no need to run it on multiple database systems + # since no sql other than the one included in the getVisitContext is executed. + if (nm == "sqlite"){ + + test_that(paste("test that when incremental is FALSE the incremental file is not generated"), { + + DatabaseConnector::disconnect(con) + con <- DatabaseConnector::connect(server$connectionDetails) + exportFolder <- tempfile() + dir.create(exportFolder) + + + expect_false(file.exists(file.path(exportFolder,"incremental"))) + + runVisitContext(connection = con, + cohortDefinitionSet = server$cohortDefinitionSet, + exportFolder = exportFolder, + databaseId = nm , + cohortDatabaseSchema = server$cohortDatabaseSchema, + cdmDatabaseSchema = server$cdmDatabaseSchema, + minCellCount = 0, + incremental = FALSE + ) + + expect_false(file.exists(file.path(exportFolder,"incremental"))) + }) + + test_that(paste("test that when incremental is TRUE the incremental file is generated when it doesn't exist"), { + + DatabaseConnector::disconnect(con) + con <- DatabaseConnector::connect(server$connectionDetails) + exportFolder <- tempfile() + dir.create(exportFolder) + + expect_false(file.exists(file.path(exportFolder, "incremental"))) + + runVisitContext(connection = con, + cohortDefinitionSet = server$cohortDefinitionSet, + exportFolder = exportFolder, + databaseId = nm , + cohortDatabaseSchema = server$cohortDatabaseSchema, + cdmDatabaseSchema = server$cdmDatabaseSchema, + minCellCount = 0, + incremental = TRUE + ) + + expect_true(file.exists(file.path(exportFolder, "incremental"))) + + }) + + + test_that(paste("test that the output file visit_context.csv is generated and is identical with the output of getVisitContext()"), { + + DatabaseConnector::disconnect(con) + con <- DatabaseConnector::connect(server$connectionDetails) + exportFolder <- tempfile() + dir.create(exportFolder) + + getVisitContextResult <- getVisitContext(connection = con, + cdmDatabaseSchema = server$cdmDatabaseSchema, + tempEmulationSchema = server$tempEmulationSchema, + cohortDatabaseSchema = server$cohortDatabaseSchema, + cohortTable = server$cohortTable, + cohortIds = server$cohortIds, + conceptIdTable = "#concept_ids", + cdmVersion = 5 + ) + + getVisitContextResult <- unname(getVisitContextResult) + + runVisitContext(connection = con, + cohortDefinitionSet = server$cohortDefinitionSet, + exportFolder = exportFolder, + databaseId = nm, + cohortTable = server$cohortTable, + cohortDatabaseSchema = server$cohortDatabaseSchema, + cdmDatabaseSchema = server$cdmDatabaseSchema, + minCellCount = 0, + incremental = FALSE + ) + + resultCsv <- file.path(exportFolder, "visit_context.csv") + + expect_true(file.exists(resultCsv)) + + runVisitContextResult <- read.csv(resultCsv, header = TRUE, sep = ",") + runVisitContextResult$database_id <- NULL + runVisitContextResult <- unname(runVisitContextResult) + + expect_equal(getVisitContextResult, runVisitContextResult) + + }) + + + test_that(paste("test that incremental logic is correct: incremental run for the first time"), { + + DatabaseConnector::disconnect(con) + con <- DatabaseConnector::connect(server$connectionDetails) + exportFolder <- tempfile() + dir.create(exportFolder) + + cohortIds <- c(17492) + + runVisitContext(connection = con, + cohortDefinitionSet = loadTestCohortDefinitionSet(cohortIds, useSubsets = FALSE), + exportFolder = exportFolder, + databaseId = nm, + cohortTable = server$cohortTable, + cohortDatabaseSchema = server$cohortDatabaseSchema, + cdmDatabaseSchema = server$cdmDatabaseSchema, + minCellCount = 0, + incremental = TRUE + ) + + resultCsv <- file.path(exportFolder, "visit_context.csv") + + expect_true(file.exists(resultCsv)) + + results <- read.csv(resultCsv, header = TRUE, stringsAsFactors = FALSE) + + # Csv should contain results only from the specified cohort + expect_equal(unique(results$cohort_id), c(17492)) + + }) + + test_that(paste("test that incremental logic is correct: no new cohorts"), { + + DatabaseConnector::disconnect(con) + con <- DatabaseConnector::connect(server$connectionDetails) + exportFolder <- tempfile() + dir.create(exportFolder) + + cohortIds <- c(17492) + + runVisitContext(connection = con, + cohortDefinitionSet = loadTestCohortDefinitionSet(cohortIds, useSubsets = FALSE), + exportFolder = exportFolder, + databaseId = nm, + cohortTable = server$cohortTable, + cohortDatabaseSchema = server$cohortDatabaseSchema, + cdmDatabaseSchema = server$cdmDatabaseSchema, + minCellCount = 0, + incremental = TRUE + ) + + resultCsv <- file.path(exportFolder, "visit_context.csv") + + expect_true(file.exists(resultCsv)) + + results1 <- read.csv(resultCsv, header = TRUE, stringsAsFactors = FALSE) + + runVisitContext(connection = con, + cohortDefinitionSet = loadTestCohortDefinitionSet(cohortIds, useSubsets = FALSE), + exportFolder = exportFolder, + databaseId = nm, + cohortTable = server$cohortTable, + cohortDatabaseSchema = server$cohortDatabaseSchema, + cdmDatabaseSchema = server$cdmDatabaseSchema, + minCellCount = 0, + incremental = TRUE + ) + + resultCsv <- file.path(exportFolder, "visit_context.csv") + + expect_true(file.exists(resultCsv)) + + results2 <- read.csv(resultCsv, header = TRUE, stringsAsFactors = FALSE) + + # Csv should contain the same result after the first run and the second run as no new cohorts were added + expect_equal(results1, results2) + + }) + + test_that(paste("test that incremental logic is correct: output visit_context.csv must contain results for new cohorts"), { + + DatabaseConnector::disconnect(con) + con <- DatabaseConnector::connect(server$connectionDetails) + exportFolder <- tempfile() + dir.create(exportFolder) + + cohortIds <- c(17492) + + runVisitContext(connection = con, + cohortDefinitionSet = loadTestCohortDefinitionSet(cohortIds, useSubsets = FALSE), + exportFolder = exportFolder, + databaseId = nm, + cohortTable = server$cohortTable, + cohortDatabaseSchema = server$cohortDatabaseSchema, + cdmDatabaseSchema = server$cdmDatabaseSchema, + minCellCount = 0, + incremental = TRUE + ) + + resultCsv <- file.path(exportFolder, "visit_context.csv") + + expect_true(file.exists(resultCsv)) + + results1 <- read.csv(resultCsv, header = TRUE, stringsAsFactors = FALSE) + + # Csv should contain results only from the specified cohort + expect_equal(unique(results1$cohort_id), c(17492)) + + cohortIds <- c(17492, 17493) + + runVisitContext(connection = con, + cohortDefinitionSet = loadTestCohortDefinitionSet(cohortIds, useSubsets = FALSE), + exportFolder = exportFolder, + databaseId = nm, + cohortTable = server$cohortTable, + cohortDatabaseSchema = server$cohortDatabaseSchema, + cdmDatabaseSchema = server$cdmDatabaseSchema, + minCellCount = 0, + incremental = TRUE + ) + + resultCsv <- file.path(exportFolder, "visit_context.csv") + + expect_true(file.exists(resultCsv)) + + results2 <- read.csv(resultCsv, header = TRUE, stringsAsFactors = FALSE) + + # Csv should contain results from both runs, hence both cohorts + expect_equal(unique(results2$cohort_id), c(17492, 17493)) + + }) + + test_that(paste("test that the export folder is created if is not already there"), { + + DatabaseConnector::disconnect(con) + con <- DatabaseConnector::connect(server$connectionDetails) + exportFolder <- tempfile() + + expect_false(dir.exists(exportFolder)) + + cohortIds <- c(17492) + + runVisitContext(connection = con, + cohortDefinitionSet = loadTestCohortDefinitionSet(cohortIds, useSubsets = FALSE), + exportFolder = exportFolder, + databaseId = nm, + cohortTable = server$cohortTable, + cohortDatabaseSchema = server$cohortDatabaseSchema, + cdmDatabaseSchema = server$cdmDatabaseSchema, + minCellCount = 0, + incremental = TRUE + ) + + expect_true(dir.exists(exportFolder)) + + }) + + } +} + + +##### Test cases with custom data ##### + +test_that(paste("test that the subject counts per cohort, visit concept and visit context are correct"), { + + cohortDataFilePath <- system.file("test_cases/runVisitContext/testSubjectCounts/test_getVisitContext_cohort.xlsx", package = "CohortDiagnostics") + + patientDataFilePath <- "test_cases/runVisitContext/testSubjectCounts/test_getVisitContext_patientData.json" + + connectionDetailsCustomCDM <- createCustomCdm(patientDataFilePath) + + + connection <- DatabaseConnector::connect(connectionDetails = connectionDetailsCustomCDM) + + addCohortTable(connection, cohortDataFilePath) + + + visitContextResult <- getVisitContext(connection = connection, + cdmDatabaseSchema = "main", + tempEmulationSchema = "main", + cohortDatabaseSchema = "main", + cohortTable = "cohort", + cohortIds = list(1,2), + conceptIdTable = "#concept_ids", + cdmVersion = 5 + ) + + resultPath <- system.file("test_cases/runVisitContext/testSubjectCounts/expectedResult.xlsx", package = "CohortDiagnostics") + + resultData <- readxl::read_excel(resultPath, col_types = c("numeric", "numeric", "text", + "numeric")) + + visitContextResult <- visitContextResult[order(visitContextResult$cohortId, visitContextResult$visitConceptId, visitContextResult$visitContext, visitContextResult$subjects), ] + visitContextResult <- as.data.frame(lapply(visitContextResult, as.character), stringsAsFactors = FALSE) + + resultData <- resultData[order(resultData$cohortId, resultData$visitConceptId, resultData$visitContext, resultData$subjects), ] + resultData <- as.data.frame(lapply(resultData, as.character), stringsAsFactors = FALSE) + + are_equal <- identical(visitContextResult, resultData) + + expect_true(are_equal) + +}) + +test_that(paste("test that only the new visit_concept_id are inserted into the #concept_ids table"), { + + cohortDataFilePath <- system.file("test_cases/runVisitContext/testSubjectCounts/test_getVisitContext_cohort.xlsx", package = "CohortDiagnostics") + + patientDataFilePath <- "test_cases/runVisitContext/testSubjectCounts/test_getVisitContext_patientData.json" + + connectionDetailsCustomCDM <- createCustomCdm(patientDataFilePath) + + connection <- DatabaseConnector::connect(connectionDetails = connectionDetailsCustomCDM) + + addCohortTable(connection, cohortDataFilePath) + + getVisitContext(connection = connection, + cdmDatabaseSchema = "main", + tempEmulationSchema = "main", + cohortDatabaseSchema = "main", + cohortTable = "cohort", + cohortIds = list(1,2), + conceptIdTable = "#concept_ids", + cdmVersion = 5 + ) + + sql <- "select * from #concept_ids" + + translatedSQL <- translate(sql, targetDialect = "sqlite") + + res1 <- querySql(connection = connection, sql = translatedSQL) + + + are_equal <- all(sort(unlist(list(262, 9201))) == sort(unlist(res1$CONCEPT_ID))) + + expect_true(are_equal) + + new_row <- data.frame( + visit_occurrence_id = 5, + person_id = 2, + visit_concept_id = 261, + visit_start_date = as.Date("2015-01-10"), + visit_start_datetime = as.POSIXct("2015-01-10 08:00:00", format = "%Y-%m-%d %H:%M:%S", tz = "UTC"), + visit_end_date = as.Date("2015-01-10"), + visit_end_datetime = as.POSIXct("2015-01-10 18:00:00", format = "%Y-%m-%d %H:%M:%S", tz = "UTC"), + visit_type_concept_id = 32817, + provider_id = 1, + care_site_id = 1, + visit_source_value = 0, + visit_source_concept_id = 0, + admitting_source_concept_id = 8870, + admitting_source_value = "TRANSFER FROM HOSPITAL", + discharge_to_concept_id = 581476, + discharge_to_source_value = "HOME HEALTH CARE", + preceding_visit_occurrence_id = 0 + ) + + DBI::dbAppendTable(connection, "visit_occurrence", new_row) + + getVisitContext(connection = connection, + cdmDatabaseSchema = "main", + tempEmulationSchema = "main", + cohortDatabaseSchema = "main", + cohortTable = "cohort", + cohortIds = list(1,2), + conceptIdTable = "#concept_ids", + cdmVersion = 5 + ) + + sql <- "select * from #concept_ids" + + translatedSQL <- translate(sql, targetDialect = "sqlite") + + res2 <- querySql(connection = connection, sql = translatedSQL) + + are_equal <- all(sort(unlist(list(262, 9201, 261))) == sort(unlist(res2$CONCEPT_ID))) + + expect_true(are_equal) +}) + + + +test_that(paste("test that to infer subject counts per cohort, visit concept, and visit context, visits within 30 days before or after cohort creation are considered"), { + + cohortDataFilePath <- system.file("test_cases/runVisitContext/testSubjectCountsDates/test_getVisitContext_cohort.xlsx", package = "CohortDiagnostics") + + patientDataFilePath <- "test_cases/runVisitContext/testSubjectCountsDates/test_getVisitContext_patientData.json" + + connectionDetailsCustomCDM <- createCustomCdm(patientDataFilePath) + + connection <- DatabaseConnector::connect(connectionDetails = connectionDetailsCustomCDM) + + addCohortTable(connection, cohortDataFilePath) + + visitContextResult <- getVisitContext(connection = connection, + cdmDatabaseSchema = "main", + tempEmulationSchema = "main", + cohortDatabaseSchema = "main", + cohortTable = "cohort", + cohortIds = list(1,2), + conceptIdTable = "#concept_ids", + cdmVersion = 5 + ) + + resultPath <- system.file("test_cases/runVisitContext/testSubjectCountsDates/expectedResult.xlsx", package = "CohortDiagnostics") + + resultData <- readxl::read_excel(resultPath, col_types = c("numeric", "numeric", "text", + "numeric")) + + visitContextResult <- visitContextResult[order(visitContextResult$cohortId, visitContextResult$visitConceptId, visitContextResult$visitContext, visitContextResult$subjects), ] + visitContextResult <- as.data.frame(lapply(visitContextResult, as.character), stringsAsFactors = FALSE) + + resultData <- resultData[order(resultData$cohortId, resultData$visitConceptId, resultData$visitContext, resultData$subjects), ] + resultData <- as.data.frame(lapply(resultData, as.character), stringsAsFactors = FALSE) + + are_equal <- identical(visitContextResult, resultData) + + expect_true(are_equal) + +}) + +test_that(paste("test that no other cohorts than the ones specified in cohortIds are included in the output"), { + + cohortDataFilePath <- system.file("test_cases/runVisitContext/testSubjectCounts/test_getVisitContext_cohort.xlsx", package = "CohortDiagnostics") + + patientDataFilePath <- "test_cases/runVisitContext/testSubjectCounts/test_getVisitContext_patientData.json" + + connectionDetailsCustomCDM <- createCustomCdm(patientDataFilePath) + + connection <- DatabaseConnector::connect(connectionDetails = connectionDetailsCustomCDM) + + addCohortTable(connection, cohortDataFilePath) + + visitContextResult <- getVisitContext(connection = connection, + cdmDatabaseSchema = "main", + tempEmulationSchema = "main", + cohortDatabaseSchema = "main", + cohortTable = "cohort", + cohortIds = list(1), + conceptIdTable = "#concept_ids", + cdmVersion = 5 + ) + + print(visitContextResult) + expect_true(identical(unique(visitContextResult$cohortId), c(1))) + +}) + +test_that(paste("test that when the subjects in the cohort have no visits an empty data frame is returned"), { + + cohortDataFilePath <- system.file("test_cases/runVisitContext/testSubjectCountsNoVisits/test_getVisitContext_cohort.xlsx", package = "CohortDiagnostics") + + patientDataFilePath <- "test_cases/runVisitContext/testSubjectCountsNoVisits/test_getVisitContext_patientData.json" + + connectionDetailsCustomCDM <- createCustomCdm(patientDataFilePath) + + connection <- DatabaseConnector::connect(connectionDetails = connectionDetailsCustomCDM) + + addCohortTable(connection, cohortDataFilePath) + + sql <- "delete from visit_occurrence;" + + translatedSQL <- translate(sql = sql, targetDialect = "sqlite") + + executeSql(connection = connection, sql = translatedSQL) + + visitContextResult <- getVisitContext(connection = connection, + cdmDatabaseSchema = "main", + tempEmulationSchema = "main", + cohortDatabaseSchema = "main", + cohortTable = "cohort", + cohortIds = list(1,2), + conceptIdTable = "#concept_ids", + cdmVersion = 5 + ) + + resultPath <- system.file("test_cases/runVisitContext/testSubjectCountsNoVisits/expectedResult.xlsx", package = "CohortDiagnostics") + + resultData <- readxl::read_excel(resultPath, col_types = c("numeric", "numeric", "text", + "numeric")) + + visitContextResult <- visitContextResult[order(visitContextResult$cohortId, visitContextResult$visitConceptId, visitContextResult$visitContext, visitContextResult$subjects), ] + visitContextResult <- as.data.frame(lapply(visitContextResult, as.character), stringsAsFactors = FALSE) + + resultData <- resultData[order(resultData$cohortId, resultData$visitConceptId, resultData$visitContext, resultData$subjects), ] + resultData <- as.data.frame(lapply(resultData, as.character), stringsAsFactors = FALSE) + + are_equal <- identical(visitContextResult, resultData) + + expect_true(are_equal) +}) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 185d60499..be4d9af64 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -1,217 +1,217 @@ -# library(testthat) -# -# # check makeDataExportable function -# test_that("Check function makeDataExportable", { -# cohortCountTableCorrect <- dplyr::tibble( -# cohortId = 3423, -# cohortEntries = 432432, -# cohortSubjects = 34234, -# databaseId = "ccae" -# ) -# cohortCountTableCorrect <- -# CohortDiagnostics:::makeDataExportable( -# x = cohortCountTableCorrect, -# tableName = "cohort_count" -# ) -# cohortCountTableCorrectNames <- -# SqlRender::camelCaseToSnakeCase(names(cohortCountTableCorrect)) %>% sort() -# -# resultsDataModel <- getResultsDataModelSpecifications() %>% -# dplyr::filter(tableName == "cohort_count") %>% -# dplyr::select(columnName) %>% -# dplyr::pull() %>% -# sort() -# -# expect_true(identical(cohortCountTableCorrectNames, resultsDataModel)) -# -# cohortCountTableInCorrect <- dplyr::tibble( -# cohortIdXXX = 3423, -# cohortEntryXXX = 432432, -# cohortSubjectsXXX = 34234 -# ) -# -# expect_error( -# CohortDiagnostics:::makeDataExportable( -# x = cohortCountTableInCorrect, -# tableName = "cohort_count" -# ) -# ) -# -# cohortCountTableCorrectDuplicated <- -# dplyr::bind_rows( -# cohortCountTableCorrect, -# cohortCountTableCorrect -# ) -# expect_error( -# CohortDiagnostics:::makeDataExportable( -# x = cohortCountTableCorrectDuplicated, -# tableName = "cohort_count" -# ) -# ) -# }) -# -# test_that("timeExecutions function", { -# readr::local_edition(1) -# temp <- tempfile() -# on.exit(unlink(temp, force = TRUE, recursive = TRUE)) -# dir.create(temp) -# -# # Basic test -# timeExecution( -# exportFolder = temp, -# taskName = "test_task1", -# cohortIds = c(1, 2, 3, 4), -# expr = { -# Sys.sleep(0.001) -# } -# ) -# expectedFilePath <- file.path(temp, "executionTimes.csv") -# checkmate::expect_file_exists(expectedFilePath) -# result <- readr::read_csv(expectedFilePath, col_types = readr::cols()) -# checkmate::expect_data_frame(result, nrows = 1, ncols = 5) -# -# expect_false(all(is.na(result$startTime))) -# expect_false(all(is.na(result$executionTime))) -# -# # Test append -# timeExecution( -# exportFolder = temp, -# taskName = "test_task2", -# cohortIds = NULL, -# expr = { -# Sys.sleep(0.001) -# } -# ) -# -# result <- readr::read_csv(expectedFilePath, col_types = readr::cols()) -# checkmate::expect_data_frame(result, nrows = 2, ncols = 5) -# -# # Parent string -# timeExecution( -# exportFolder = temp, -# taskName = "test_task3", -# parent = "testthat", -# cohortIds = NULL, -# expr = { -# Sys.sleep(0.001) -# } -# ) -# -# result <- readr::read_csv(expectedFilePath, col_types = readr::cols()) -# checkmate::expect_data_frame(result, nrows = 3, ncols = 5) -# -# # custom start/end times -# timeExecution( -# exportFolder = temp, -# taskName = "test_task4", -# parent = "testthat", -# cohortIds = NULL, -# start = "foo", -# execTime = "Foo" -# ) -# -# result <- readr::read_csv(expectedFilePath, col_types = readr::cols()) -# checkmate::expect_data_frame(result, nrows = 4, ncols = 5) -# -# timeExecution( -# exportFolder = temp, -# taskName = "test_task5", -# parent = "testthat", -# cohortIds = NULL, -# start = Sys.time() -# ) -# -# result <- readr::read_csv(expectedFilePath, col_types = readr::cols()) -# checkmate::expect_data_frame(result, nrows = 5, ncols = 5) -# expect_false(all(is.na(result$startTime))) -# }) -# -# test_that("enforceMinCellValue replaces values below minimum with negative of minimum", { -# data <- data.frame(a = c(1, 2, 3, 4, 5)) -# minValues <- 3 -# result <- enforceMinCellValue(data, "a", minValues, silent = TRUE) -# -# expect_equal(result$a, c(-3, -3, 3, 4, 5)) -# }) -# -# test_that("enforceMinCellValue does not replace NA values", { -# data <- data.frame(a = c(1, 2, NA, 4, 5)) -# minValues <- 3 -# result <- enforceMinCellValue(data, "a", minValues, silent = TRUE) -# -# expect_equal(result$a, c(-3, -3, NA, 4, 5)) -# }) -# -# test_that("enforceMinCellValue does not replace zero values", { -# data <- data.frame(a = c(0, 2, 3, 4, 5)) -# minValues <- 3 -# result <- enforceMinCellValue(data, "a", minValues, silent = TRUE) -# -# expect_equal(result$a, c(0, -3, 3, 4, 5)) -# }) -# -# test_that("enforceMinCellValue works with vector of minimum values", { -# data <- data.frame(a = c(1, 2, 3, 4, 5)) -# minValues <- c(1, 2, 3, 4, 5) -# result <- enforceMinCellValue(data, "a", minValues, silent = TRUE) -# -# expect_equal(result$a, c(1, 2, 3, 4, 5)) -# }) -# -# test_that("timeExecution uses minutes as unit", { -# exportFolder <- tempfile() -# dir.create(exportFolder) -# timeExecution(exportFolder, -# taskName = "test 1 second", -# expr = Sys.sleep(1)) -# -# start <- as.POSIXct("2024-10-09 03:37:46") -# oneMinute <- start - as.POSIXct("2024-10-09 03:36:46") -# timeExecution(exportFolder, -# taskName = "test 1 minute", -# start = start, -# execTime = oneMinute) -# -# start <- as.POSIXct("2024-10-09 03:37:46") -# oneHour <- start - as.POSIXct("2024-10-09 02:37:46") -# timeExecution(exportFolder, -# taskName = "test 1 hour", -# start = start, -# execTime = oneHour) -# -# list.files(exportFolder) -# df <- readr::read_csv(file.path(exportFolder, "executionTimes.csv"), show_col_types = F) -# -# expect_equal(df$task, c("test 1 second", "test 1 minute", "test 1 hour")) -# expect_equal(df$executionTime, c(round(1/60, 4), 1, 60)) -# }) -# -# for (server in testServers) { -# test_that(paste("tempTableExists works on ", server$connectionDetails$dbms), { -# con <- DatabaseConnector::connect(server$connectionDetails) -# DatabaseConnector::renderTranslateExecuteSql(con, "create table #tmp110010 (a int);", -# progressBar = F, -# reportOverallTime = F) -# expect_false(tempTableExists(con, "tmp98765")) -# expect_true(tempTableExists(con, "tmp110010")) -# DatabaseConnector::renderTranslateExecuteSql(con, "drop table #tmp110010;", -# progressBar = F, -# reportOverallTime = F) -# DatabaseConnector::disconnect(con) -# }) -# } -# -# test_that("assertCohortDefinitionSetContainsAllParents works", { -# cohorts <- loadTestCohortDefinitionSet() -# -# expect_no_error( -# CohortDiagnostics:::assertCohortDefinitionSetContainsAllParents(cohorts) -# ) -# -# expect_error( -# CohortDiagnostics:::assertCohortDefinitionSetContainsAllParents( -# dplyr::filter(cohorts, !(.data$cohortId %in% cohorts$subsetParent)) -# ) -# ) -# }) +library(testthat) + +# check makeDataExportable function +test_that("Check function makeDataExportable", { + cohortCountTableCorrect <- dplyr::tibble( + cohortId = 3423, + cohortEntries = 432432, + cohortSubjects = 34234, + databaseId = "ccae" + ) + cohortCountTableCorrect <- + CohortDiagnostics:::makeDataExportable( + x = cohortCountTableCorrect, + tableName = "cohort_count" + ) + cohortCountTableCorrectNames <- + SqlRender::camelCaseToSnakeCase(names(cohortCountTableCorrect)) %>% sort() + + resultsDataModel <- getResultsDataModelSpecifications() %>% + dplyr::filter(tableName == "cohort_count") %>% + dplyr::select(columnName) %>% + dplyr::pull() %>% + sort() + + expect_true(identical(cohortCountTableCorrectNames, resultsDataModel)) + + cohortCountTableInCorrect <- dplyr::tibble( + cohortIdXXX = 3423, + cohortEntryXXX = 432432, + cohortSubjectsXXX = 34234 + ) + + expect_error( + CohortDiagnostics:::makeDataExportable( + x = cohortCountTableInCorrect, + tableName = "cohort_count" + ) + ) + + cohortCountTableCorrectDuplicated <- + dplyr::bind_rows( + cohortCountTableCorrect, + cohortCountTableCorrect + ) + expect_error( + CohortDiagnostics:::makeDataExportable( + x = cohortCountTableCorrectDuplicated, + tableName = "cohort_count" + ) + ) +}) + +test_that("timeExecutions function", { + readr::local_edition(1) + temp <- tempfile() + on.exit(unlink(temp, force = TRUE, recursive = TRUE)) + dir.create(temp) + + # Basic test + timeExecution( + exportFolder = temp, + taskName = "test_task1", + cohortIds = c(1, 2, 3, 4), + expr = { + Sys.sleep(0.001) + } + ) + expectedFilePath <- file.path(temp, "executionTimes.csv") + checkmate::expect_file_exists(expectedFilePath) + result <- readr::read_csv(expectedFilePath, col_types = readr::cols()) + checkmate::expect_data_frame(result, nrows = 1, ncols = 5) + + expect_false(all(is.na(result$startTime))) + expect_false(all(is.na(result$executionTime))) + + # Test append + timeExecution( + exportFolder = temp, + taskName = "test_task2", + cohortIds = NULL, + expr = { + Sys.sleep(0.001) + } + ) + + result <- readr::read_csv(expectedFilePath, col_types = readr::cols()) + checkmate::expect_data_frame(result, nrows = 2, ncols = 5) + + # Parent string + timeExecution( + exportFolder = temp, + taskName = "test_task3", + parent = "testthat", + cohortIds = NULL, + expr = { + Sys.sleep(0.001) + } + ) + + result <- readr::read_csv(expectedFilePath, col_types = readr::cols()) + checkmate::expect_data_frame(result, nrows = 3, ncols = 5) + + # custom start/end times + timeExecution( + exportFolder = temp, + taskName = "test_task4", + parent = "testthat", + cohortIds = NULL, + start = "foo", + execTime = "Foo" + ) + + result <- readr::read_csv(expectedFilePath, col_types = readr::cols()) + checkmate::expect_data_frame(result, nrows = 4, ncols = 5) + + timeExecution( + exportFolder = temp, + taskName = "test_task5", + parent = "testthat", + cohortIds = NULL, + start = Sys.time() + ) + + result <- readr::read_csv(expectedFilePath, col_types = readr::cols()) + checkmate::expect_data_frame(result, nrows = 5, ncols = 5) + expect_false(all(is.na(result$startTime))) +}) + +test_that("enforceMinCellValue replaces values below minimum with negative of minimum", { + data <- data.frame(a = c(1, 2, 3, 4, 5)) + minValues <- 3 + result <- enforceMinCellValue(data, "a", minValues, silent = TRUE) + + expect_equal(result$a, c(-3, -3, 3, 4, 5)) +}) + +test_that("enforceMinCellValue does not replace NA values", { + data <- data.frame(a = c(1, 2, NA, 4, 5)) + minValues <- 3 + result <- enforceMinCellValue(data, "a", minValues, silent = TRUE) + + expect_equal(result$a, c(-3, -3, NA, 4, 5)) +}) + +test_that("enforceMinCellValue does not replace zero values", { + data <- data.frame(a = c(0, 2, 3, 4, 5)) + minValues <- 3 + result <- enforceMinCellValue(data, "a", minValues, silent = TRUE) + + expect_equal(result$a, c(0, -3, 3, 4, 5)) +}) + +test_that("enforceMinCellValue works with vector of minimum values", { + data <- data.frame(a = c(1, 2, 3, 4, 5)) + minValues <- c(1, 2, 3, 4, 5) + result <- enforceMinCellValue(data, "a", minValues, silent = TRUE) + + expect_equal(result$a, c(1, 2, 3, 4, 5)) +}) + +test_that("timeExecution uses minutes as unit", { + exportFolder <- tempfile() + dir.create(exportFolder) + timeExecution(exportFolder, + taskName = "test 1 second", + expr = Sys.sleep(1)) + + start <- as.POSIXct("2024-10-09 03:37:46") + oneMinute <- start - as.POSIXct("2024-10-09 03:36:46") + timeExecution(exportFolder, + taskName = "test 1 minute", + start = start, + execTime = oneMinute) + + start <- as.POSIXct("2024-10-09 03:37:46") + oneHour <- start - as.POSIXct("2024-10-09 02:37:46") + timeExecution(exportFolder, + taskName = "test 1 hour", + start = start, + execTime = oneHour) + + list.files(exportFolder) + df <- readr::read_csv(file.path(exportFolder, "executionTimes.csv"), show_col_types = F) + + expect_equal(df$task, c("test 1 second", "test 1 minute", "test 1 hour")) + expect_equal(df$executionTime, c(round(1/60, 4), 1, 60)) +}) + +for (server in testServers) { + test_that(paste("tempTableExists works on ", server$connectionDetails$dbms), { + con <- DatabaseConnector::connect(server$connectionDetails) + DatabaseConnector::renderTranslateExecuteSql(con, "create table #tmp110010 (a int);", + progressBar = F, + reportOverallTime = F) + expect_false(tempTableExists(con, "tmp98765")) + expect_true(tempTableExists(con, "tmp110010")) + DatabaseConnector::renderTranslateExecuteSql(con, "drop table #tmp110010;", + progressBar = F, + reportOverallTime = F) + DatabaseConnector::disconnect(con) + }) +} + +test_that("assertCohortDefinitionSetContainsAllParents works", { + cohorts <- loadTestCohortDefinitionSet() + + expect_no_error( + CohortDiagnostics:::assertCohortDefinitionSetContainsAllParents(cohorts) + ) + + expect_error( + CohortDiagnostics:::assertCohortDefinitionSetContainsAllParents( + dplyr::filter(cohorts, !(.data$cohortId %in% cohorts$subsetParent)) + ) + ) +}) From 95ef0acab937b9b7d4228d4ad5b47be27eb7e25e Mon Sep 17 00:00:00 2001 From: Ger Inberg Date: Fri, 11 Oct 2024 14:04:21 +0200 Subject: [PATCH 17/18] doc --- tests/testthat/test-runInclusionStatistics.R | 137 +++++++++---------- 1 file changed, 61 insertions(+), 76 deletions(-) diff --git a/tests/testthat/test-runInclusionStatistics.R b/tests/testthat/test-runInclusionStatistics.R index f1eb3d09c..321a9d74c 100644 --- a/tests/testthat/test-runInclusionStatistics.R +++ b/tests/testthat/test-runInclusionStatistics.R @@ -1,76 +1,61 @@ -# # Copyright 2024 Observational Health Data Sciences and Informatics -# # -# # This file is part of CohortDiagnostics -# # -# # Licensed under the Apache License, Version 2.0 (the "License"); -# # you may not use this file except in compliance with the License. -# # You may obtain a copy of the License at -# # -# # http://www.apache.org/licenses/LICENSE-2.0 -# # -# # Unless required by applicable law or agreed to in writing, software -# # distributed under the License is distributed on an "AS IS" BASIS, -# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# # See the License for the specific language governing permissions and -# # limitations under the License. -# -# for (nm in names(testServers)) { -# -# server <- testServers[[nm]] -# con <- connect(server$connectionDetails) -# -# exportFolder <- file.path(tempdir(), paste0(nm, "exp")) -# databaseId <- "myDB" -# minCellCount <- 5 -# recordKeepingFile <- file.path(exportFolder, "record.csv") -# cohortTableNames <- CohortGenerator::getCohortTableNames(cohortTable = server$cohortTable) -# -# test_that(paste("test run inclusion statistics output", nm), { -# -# dir.create(exportFolder) -# runInclusionStatistics(connection = con, -# exportFolder = exportFolder, -# databaseId = databaseId, -# cohortDefinitionSet = server$cohortDefinitionSet, -# cohortDatabaseSchema = server$cohortDatabaseSchema, -# cohortTableNames = cohortTableNames, -# incremental = TRUE, -# minCellCount = minCellCount, -# recordKeepingFile = recordKeepingFile) -# -# # Check cohort_inc_result -# expect_true(file.exists(file.path(exportFolder, "cohort_inc_result.csv"))) -# incResult <- read.csv(file.path(exportFolder, "cohort_inc_result.csv")) -# expect_equal(colnames(incResult), c("database_id", "cohort_id", "mode_id", "inclusion_rule_mask" , "person_count")) -# expect_equal(unique(incResult$database_id), databaseId) -# expect_true(all(incResult$cohort_id %in% server$cohortDefinitionSet$cohortId)) -# -# # Check cohort_inc_stats -# expect_true(file.exists(file.path(exportFolder, "cohort_inc_stats.csv"))) -# incStatsResult <- read.csv(file.path(exportFolder, "cohort_inc_stats.csv")) -# expect_equal(colnames(incStatsResult), c("cohort_definition_id", "rule_sequence", "person_count", "gain_count" , "person_total", "mode_id")) -# -# # Check cohort_inclusion -# expect_true(file.exists(file.path(exportFolder, "cohort_inclusion.csv"))) -# inclusionResult <- read.csv(file.path(exportFolder, "cohort_inclusion.csv")) -# expect_equal(colnames(inclusionResult), c("database_id", "cohort_id", "rule_sequence", "name" , "description")) -# expect_equal(unique(inclusionResult$database_id), databaseId) -# expect_true(all(inclusionResult$cohort_id %in% server$cohortDefinitionSet$cohortId)) -# -# # Check cohort_summary_stats -# expect_true(file.exists(file.path(exportFolder, "cohort_summary_stats.csv"))) -# sumStatsResult <- read.csv(file.path(exportFolder, "cohort_summary_stats.csv")) -# expect_equal(colnames(sumStatsResult), c("database_id", "cohort_id", "mode_id", "base_count" , "final_count")) -# expect_equal(unique(sumStatsResult$database_id), databaseId) -# expect_true(all(sumStatsResult$cohort_id %in% server$cohortDefinitionSet$cohortId)) -# -# # Check recordKeepingFile -# expect_true(file.exists(recordKeepingFile)) -# recordKeeping <- read.csv(recordKeepingFile) -# expect_equal(colnames(recordKeeping), c("cohortId", "task", "checksum" , "timeStamp")) -# expect_equal(unique(recordKeeping$task), "runInclusionStatistics") -# expect_true(all(recordKeeping$cohortId %in% server$cohortDefinitionSet$cohortId)) -# -# unlink(exportFolder) -# }) -# } +# test runInclusionStatistics for all databases +for (nm in names(testServers)) { + + server <- testServers[[nm]] + con <- connect(server$connectionDetails) + + exportFolder <- file.path(tempdir(), paste0(nm, "exp")) + databaseId <- "myDB" + minCellCount <- 5 + recordKeepingFile <- file.path(exportFolder, "record.csv") + cohortTableNames <- CohortGenerator::getCohortTableNames(cohortTable = server$cohortTable) + + test_that(paste("test run inclusion statistics output", nm), { + + dir.create(exportFolder) + runInclusionStatistics(connection = con, + exportFolder = exportFolder, + databaseId = databaseId, + cohortDefinitionSet = server$cohortDefinitionSet, + cohortDatabaseSchema = server$cohortDatabaseSchema, + cohortTableNames = cohortTableNames, + incremental = TRUE, + minCellCount = minCellCount, + recordKeepingFile = recordKeepingFile) + + # Check cohort_inc_result + expect_true(file.exists(file.path(exportFolder, "cohort_inc_result.csv"))) + incResult <- read.csv(file.path(exportFolder, "cohort_inc_result.csv")) + expect_equal(colnames(incResult), c("database_id", "cohort_id", "mode_id", "inclusion_rule_mask" , "person_count")) + expect_equal(unique(incResult$database_id), databaseId) + expect_true(all(incResult$cohort_id %in% server$cohortDefinitionSet$cohortId)) + + # Check cohort_inc_stats + expect_true(file.exists(file.path(exportFolder, "cohort_inc_stats.csv"))) + incStatsResult <- read.csv(file.path(exportFolder, "cohort_inc_stats.csv")) + expect_equal(colnames(incStatsResult), c("cohort_definition_id", "rule_sequence", "person_count", "gain_count" , "person_total", "mode_id")) + + # Check cohort_inclusion + expect_true(file.exists(file.path(exportFolder, "cohort_inclusion.csv"))) + inclusionResult <- read.csv(file.path(exportFolder, "cohort_inclusion.csv")) + expect_equal(colnames(inclusionResult), c("database_id", "cohort_id", "rule_sequence", "name" , "description")) + expect_equal(unique(inclusionResult$database_id), databaseId) + expect_true(all(inclusionResult$cohort_id %in% server$cohortDefinitionSet$cohortId)) + + # Check cohort_summary_stats + expect_true(file.exists(file.path(exportFolder, "cohort_summary_stats.csv"))) + sumStatsResult <- read.csv(file.path(exportFolder, "cohort_summary_stats.csv")) + expect_equal(colnames(sumStatsResult), c("database_id", "cohort_id", "mode_id", "base_count" , "final_count")) + expect_equal(unique(sumStatsResult$database_id), databaseId) + expect_true(all(sumStatsResult$cohort_id %in% server$cohortDefinitionSet$cohortId)) + + # Check recordKeepingFile + expect_true(file.exists(recordKeepingFile)) + recordKeeping <- read.csv(recordKeepingFile) + expect_equal(colnames(recordKeeping), c("cohortId", "task", "checksum" , "timeStamp")) + expect_equal(unique(recordKeeping$task), "runInclusionStatistics") + expect_true(all(recordKeeping$cohortId %in% server$cohortDefinitionSet$cohortId)) + + unlink(exportFolder) + }) +} From 05fee9fad6aa4d05f91efbd00f76f50c910ed7dc Mon Sep 17 00:00:00 2001 From: Ger Inberg Date: Fri, 11 Oct 2024 14:14:46 +0200 Subject: [PATCH 18/18] doc --- man/runCohortCharacterization.Rd | 44 +++++--------------------------- 1 file changed, 7 insertions(+), 37 deletions(-) diff --git a/man/runCohortCharacterization.Rd b/man/runCohortCharacterization.Rd index 3652c15cf..8f9b2594f 100644 --- a/man/runCohortCharacterization.Rd +++ b/man/runCohortCharacterization.Rd @@ -1,36 +1,9 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/runCohortCharacterization.R, -% R/runTemporalCohortCharacterization.R +% Please edit documentation in R/runCohortCharacterization.R \name{runCohortCharacterization} \alias{runCohortCharacterization} -\title{runTemporalCohortCharacterization} +\title{runCohortCharacterization} \usage{ -runCohortCharacterization( - connection, - databaseId, - exportFolder, - cdmDatabaseSchema, - cohortDatabaseSchema, - cohortTable, - covariateSettings, - tempEmulationSchema, - cdmVersion, - cohorts, - cohortCounts, - minCellCount, - instantiatedCohorts, - incremental, - recordKeepingFile, - covariateValueFileName = file.path(exportFolder, "temporal_covariate_value.csv"), - covariateValueContFileName = file.path(exportFolder, - "temporal_covariate_value_dist.csv"), - covariateRefFileName = file.path(exportFolder, "temporal_covariate_ref.csv"), - analysisRefFileName = file.path(exportFolder, "temporal_analysis_ref.csv"), - timeRefFileName = file.path(exportFolder, "temporal_time_ref.csv"), - minCharacterizationMean = 0.001, - batchSize = getOption("CohortDiagnostics-FE-batch-size", default = 20) -) - runCohortCharacterization( connection, databaseId, @@ -118,19 +91,16 @@ on covariates that have very low values. The default is 0.001 (i.e. 0.1 percent) of which cohort diagnostics has been executed.} } \value{ -None, it will write results to disk - None, it will write results to disk } \description{ -A short description... - This function takes cohorts as input and generates the covariates for these cohorts. The covariates are generated using FeatureExtraction. The output from this package is slightly modified before the output is written to disk. These are the files written to disk, if available: - * cohort_inc_result.csv - * cohort_inc_stats.csv - * cohort_inclusion.csv - * cohort_summary_stats.csv + * temporal_analysis_ref.csv + * temporal_covariate_ref.csv + * temporal_covariate_value.csv + * temporal_covariate_value_dist.csv + * temporal_time_ref.csv }