Skip to content

Commit

Permalink
Merge dcf79da into a503197
Browse files Browse the repository at this point in the history
  • Loading branch information
azimov authored Nov 16, 2023
2 parents a503197 + dcf79da commit a4bccf3
Show file tree
Hide file tree
Showing 65 changed files with 843 additions and 1,940 deletions.
10 changes: 6 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: CohortDiagnostics
Type: Package
Title: Diagnostics for OHDSI Cohorts
Version: 3.2.4
Version: 3.3.0
Date: 2022-12-19
Authors@R: c(
person("Jamie", "Gilbert", email = "gilbert@ohdsi.org", role = c("aut", "cre")),
Expand All @@ -25,7 +25,7 @@ Depends:
R (>= 4.1.0)
Imports:
Andromeda (>= 0.6.0),
ResultModelManager,
ResultModelManager (>= 0.5.2),
checkmate,
clock,
digest,
Expand All @@ -50,14 +50,16 @@ Suggests:
zip,
knitr,
shiny,
OhdsiShinyModules
OhdsiShinyModules,
rsconnect,
yaml
Remotes:
ohdsi/Eunomia,
ohdsi/FeatureExtraction,
ohdsi/ResultModelManager,
ohdsi/ROhdsiWebApi,
ohdsi/CirceR,
ohdsi/CohortGenerator,
ohdsi/CohortGenerator@random_sample,
ohdsi/OhdsiShinyModules
License: Apache License
VignetteBuilder: knitr
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
export(createDiagnosticsExplorerZip)
export(createMergedResultsFile)
export(createResultsDataModel)
export(deployPositConnectApp)
export(executeDiagnostics)
export(getCdmDataSourceInformation)
export(getCohortCounts)
Expand Down
19 changes: 19 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,22 @@
CohortDiagnostics 3.3.0
=======================

Changes:

1. Resolved issues with package creating build errors

2. Added function to make deployment to posit connect servers easier

3. Added ability to use CohortGenerator sample functionality to executeDiagnostics which speeds up execution for very
large cohort definitions

Bug fix:

1. Fixes to loading shiny app in OHDSI shiny modules

2. Bug fixes to prevent null value issues with included source concept diagnostics


CohortDiagnostics 3.2.4
=======================
Bug Fix:
Expand Down
34 changes: 17 additions & 17 deletions R/CohortCharacterizationDiagnostics.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,11 @@ getCohortCharacteristics <- function(connectionDetails = NULL,
covariates <- featureExtractionOutput$covariates %>%
dplyr::rename("cohortId" = "cohortDefinitionId") %>%
dplyr::left_join(populationSize, by = "cohortId", copy = TRUE) %>%
dplyr::mutate(p = sumValue / populationSize)
dplyr::mutate(p = .data$sumValue / populationSize)

if (nrow(covariates %>%
dplyr::filter(p > 1) %>%
dplyr::collect()) > 0) {
dplyr::filter(.data$p > 1) %>%
dplyr::collect()) > 0) {
stop(
paste0(
"During characterization, population size (denominator) was found to be smaller than features Value (numerator).",
Expand All @@ -96,10 +96,10 @@ getCohortCharacteristics <- function(connectionDetails = NULL,
}

covariates <- covariates %>%
dplyr::mutate(sd = sqrt(p * (1 - p))) %>%
dplyr::select(-p) %>%
dplyr::mutate(sd = sqrt(.data$p * (1 - .data$p))) %>%
dplyr::select(-"p") %>%
dplyr::rename("mean" = "averageValue") %>%
dplyr::select(-populationSize)
dplyr::select(-"populationSize")

if (FeatureExtraction::isTemporalCovariateData(featureExtractionOutput)) {
covariates <- covariates %>%
Expand All @@ -113,13 +113,13 @@ getCohortCharacteristics <- function(connectionDetails = NULL,
)

tidNaCount <- covariates %>%
dplyr::filter(is.na(timeId)) %>%
dplyr::filter(is.na(.data$timeId)) %>%
dplyr::count() %>%
dplyr::pull()

if (tidNaCount > 0) {
covariates <- covariates %>%
dplyr::mutate(timeId = if_else(is.na(.data$timeId), -1, .data$timeId))
dplyr::mutate(timeId = dplyr::if_else(is.na(.data$timeId), -1, .data$timeId))

Check warning on line 122 in R/CohortCharacterizationDiagnostics.R

View check run for this annotation

Codecov / codecov/patch

R/CohortCharacterizationDiagnostics.R#L122

Added line #L122 was not covered by tests
}
} else {
covariates <- covariates %>%
Expand All @@ -144,9 +144,9 @@ getCohortCharacteristics <- function(connectionDetails = NULL,
dplyr::pull(dplyr::count(featureExtractionOutput$covariatesContinuous)) > 0) {
covariates <- featureExtractionOutput$covariatesContinuous %>%
dplyr::rename(
mean = averageValue,
sd = standardDeviation,
cohortId = cohortDefinitionId
mean = "averageValue",
sd = "standardDeviation",
cohortId = "cohortDefinitionId"
)
covariatesContinuous <- covariates
if (FeatureExtraction::isTemporalCovariateData(featureExtractionOutput)) {
Expand All @@ -162,13 +162,13 @@ getCohortCharacteristics <- function(connectionDetails = NULL,
)

tidNaCount <- covariates %>%
dplyr::filter(is.na(timeId)) %>%
dplyr::filter(is.na(.data$timeId)) %>%
dplyr::count() %>%
dplyr::pull()

if (tidNaCount > 0) {
covariates <- covariates %>%
dplyr::mutate(timeId = if_else(is.na(.data$timeId), -1, .data$timeId))
dplyr::mutate(timeId = dplyr::if_else(is.na(.data$timeId), -1, .data$timeId))
}
} else {
covariates <- covariates %>%
Expand Down Expand Up @@ -235,7 +235,7 @@ executeCohortCharacterization <- function(connection,
startCohortCharacterization <- Sys.time()
subset <- subsetToRequiredCohorts(
cohorts = cohorts %>%
dplyr::filter(cohortId %in% instantiatedCohorts),
dplyr::filter(.data$cohortId %in% instantiatedCohorts),
task = task,
incremental = incremental,
recordKeepingFile = recordKeepingFile
Expand Down Expand Up @@ -286,7 +286,7 @@ executeCohortCharacterization <- function(connection,
tempEmulationSchema = tempEmulationSchema,
cohortDatabaseSchema = cohortDatabaseSchema,
cohortTable = cohortTable,
cohortIds = subset[start:end,]$cohortId,
cohortIds = subset[start:end, ]$cohortId,
covariateSettings = covariateSettings,
cdmVersion = cdmVersion,
exportFolder = exportFolder
Expand All @@ -308,9 +308,9 @@ executeCohortCharacterization <- function(connection,
)

recordTasksDone(
cohortId = subset[start:end,]$cohortId,
cohortId = subset[start:end, ]$cohortId,
task = task,
checksum = subset[start:end,]$checksum,
checksum = subset[start:end, ]$checksum,
recordKeepingFile = recordKeepingFile,
incremental = incremental
)
Expand Down
32 changes: 16 additions & 16 deletions R/CohortRelationship.R
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ runCohortRelationshipDiagnostics <-

timePeriods <- relationshipDays %>%
dplyr::distinct() %>%
dplyr::arrange(startDay, endDay) %>%
dplyr::arrange(.data$startDay, .data$endDay) %>%
dplyr::mutate(timeId = dplyr::row_number())

ParallelLogger::logTrace(" - Creating Andromeda object to collect results")
Expand Down Expand Up @@ -161,12 +161,12 @@ runCohortRelationshipDiagnostics <-
resultsInAndromeda$cohortRelationships <-
resultsInAndromeda$cohortRelationships %>%
dplyr::inner_join(resultsInAndromeda$timePeriods, by = "timeId") %>%
dplyr::select(-timeId) %>%
dplyr::select(-"timeId") %>%
dplyr::arrange(
cohortId,
comparatorCohortId,
startDay,
endDay
.data$cohortId,
.data$comparatorCohortId,
.data$startDay,
.data$endDay
)
resultsInAndromeda$timePeriods <- NULL

Expand Down Expand Up @@ -209,21 +209,21 @@ executeCohortRelationshipDiagnostics <- function(connection,
startCohortRelationship <- Sys.time()

allCohortIds <- cohortDefinitionSet %>%
dplyr::select(cohortId, checksum) %>%
dplyr::select("cohortId", "checksum") %>%
dplyr::rename(
targetCohortId = cohortId,
targetChecksum = checksum
targetCohortId = "cohortId",
targetChecksum = "checksum"
) %>%
dplyr::distinct()
combinationsOfPossibleCohortRelationships <- allCohortIds %>%
tidyr::crossing(allCohortIds %>%
dplyr::rename(
comparatorCohortId = targetCohortId,
comparatorChecksum = targetChecksum
comparatorCohortId = "targetCohortId",
comparatorChecksum = "targetChecksum"
)) %>%
dplyr::filter(targetCohortId != comparatorCohortId) %>%
dplyr::arrange(targetCohortId, comparatorCohortId) %>%
dplyr::mutate(checksum = paste0(targetChecksum, comparatorChecksum))
dplyr::filter(.data$targetCohortId != .data$comparatorCohortId) %>%
dplyr::arrange(.data$targetCohortId, .data$comparatorCohortId) %>%
dplyr::mutate(checksum = paste0(.data$targetChecksum, .data$comparatorChecksum))

subset <- subsetToRequiredCombis(
combis = combinationsOfPossibleCohortRelationships,
Expand All @@ -247,15 +247,15 @@ executeCohortRelationshipDiagnostics <- function(connection,
(nrow(combinationsOfPossibleCohortRelationships) - (
nrow(
combinationsOfPossibleCohortRelationships %>%
dplyr::filter(targetCohortId %in% c(subset$targetCohortId))
dplyr::filter(.data$targetCohortId %in% c(subset$targetCohortId))
)
)) > 0) {
ParallelLogger::logInfo(
sprintf(
" - Skipping %s combinations in incremental mode because these were previously computed.",
nrow(combinationsOfPossibleCohortRelationships) - nrow(
combinationsOfPossibleCohortRelationships %>%
dplyr::filter(targetCohortId %in% c(subset$targetCohortId))
dplyr::filter(.data$targetCohortId %in% c(subset$targetCohortId))

Check warning on line 258 in R/CohortRelationship.R

View check run for this annotation

Codecov / codecov/patch

R/CohortRelationship.R#L258

Added line #L258 was not covered by tests
)
)
)
Expand Down
59 changes: 40 additions & 19 deletions R/ConceptSets.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ extractConceptSetsSqlFromCohortSql <- function(cohortSql) {
}
sql <- gsub("with primary_events.*", "", cohortSql)

if (is.null(sql) || length(nchar(sql)) == 0 || is.na(nchar(sql)) || is.nan(nchar(sql))) {
return(tidyr::tibble())
}
# Find opening and closing parentheses:
starts <- stringr::str_locate_all(sql, "\\(")[[1]][, 1]
ends <- stringr::str_locate_all(sql, "\\)")[[1]][, 1]
Expand Down Expand Up @@ -71,8 +74,14 @@ extractConceptSetsSqlFromCohortSql <- function(cohortSql) {


extractConceptSetsJsonFromCohortJson <- function(cohortJson) {
cohortDefinition <-
RJSONIO::fromJSON(content = cohortJson, digits = 23)
cohortDefinition <- tryCatch(
{
RJSONIO::fromJSON(content = cohortJson, digits = 23)
},
error = function(msg) {
return(list())
}
)
if ("expression" %in% names(cohortDefinition)) {
expression <- cohortDefinition$expression
} else {
Expand Down Expand Up @@ -121,10 +130,10 @@ combineConceptSetsFromCohorts <- function(cohorts) {
checkmate::reportAssertions(errorMessage)
checkmate::assertDataFrame(
x = cohorts %>% dplyr::select(
cohortId,
sql,
json,
cohortName
"cohortId",
"sql",
"json",
"cohortName"
),
any.missing = FALSE,
min.cols = 4,
Expand Down Expand Up @@ -176,7 +185,7 @@ combineConceptSetsFromCohorts <- function(cohorts) {
}
}
if (length(conceptSets) == 0) {
return(NULL)
return(data.frame())
}
conceptSets <- dplyr::bind_rows(conceptSets) %>%
dplyr::arrange(.data$cohortId, .data$conceptSetId)
Expand All @@ -187,7 +196,10 @@ combineConceptSetsFromCohorts <- function(cohorts) {
dplyr::distinct()

conceptSets <- conceptSets %>%
dplyr::inner_join(uniqueConceptSets, by = "conceptSetExpression") %>%
dplyr::inner_join(uniqueConceptSets,
by = "conceptSetExpression",
relationship = "many-to-many"
) %>%
dplyr::distinct() %>%
dplyr::relocate(
"uniqueConceptSetId",
Expand Down Expand Up @@ -321,7 +333,7 @@ getCodeSetIds <- function(criterionList) {
return(NULL)
} else {
return(dplyr::tibble(domain = names(criterionList), codeSetIds = codeSetIds)
%>% dplyr::filter(!is.na(codeSetIds)))
%>% dplyr::filter(!is.na(.data$codeSetIds)))
}
}

Expand All @@ -330,11 +342,17 @@ exportConceptSets <- function(cohortDefinitionSet, exportFolder, minCellCount, d
# We need to get concept sets from all cohorts in case subsets are present and
# Added incrementally after cohort generation
conceptSets <- combineConceptSetsFromCohorts(cohortDefinitionSet)

if (!hasData(conceptSets)) {
return(invisible(NULL))
}

conceptSets <- conceptSets %>%
dplyr::select(-"uniqueConceptSetId") %>%
dplyr::distinct()
# Save concept set metadata ---------------------------------------
conceptSetsExport <- makeDataExportable(
x = conceptSets %>%
dplyr::select(-uniqueConceptSetId) %>%
dplyr::distinct(),
x = conceptSets,
tableName = "concept_sets",
minCellCount = minCellCount,
databaseId = databaseId
Expand Down Expand Up @@ -404,7 +422,7 @@ runConceptSetDiagnostics <- function(connection,
subset <- dplyr::distinct(subset)

if (nrow(subset) == 0) {
return()
return(NULL)
}

# We need to get concept sets from all cohorts in case subsets are present and
Expand Down Expand Up @@ -514,7 +532,8 @@ runConceptSetDiagnostics <- function(connection,
"cohortId",
"conceptSetId"
) %>% dplyr::distinct(),
by = "uniqueConceptSetId"
by = "uniqueConceptSetId",
relationship = "many-to-many"
) %>%
dplyr::select(-"uniqueConceptSetId") %>%
dplyr::mutate(databaseId = !!databaseId) %>%
Expand Down Expand Up @@ -664,7 +683,7 @@ runConceptSetDiagnostics <- function(connection,
)
return(tidyr::tibble())
}
primaryCodesetIds <- primaryCodesetIds %>% dplyr::filter(domain %in%
primaryCodesetIds <- primaryCodesetIds %>% dplyr::filter(.data$domain %in%
c(domains$domain %>% unique()))
if (nrow(primaryCodesetIds) == 0) {
warning(
Expand Down Expand Up @@ -927,7 +946,8 @@ runConceptSetDiagnostics <- function(connection,
"cohortId",
"conceptSetId"
) %>% dplyr::distinct(),
by = "uniqueConceptSetId"
by = "uniqueConceptSetId",
relationship = "many-to-many"
) %>%
dplyr::select(-"uniqueConceptSetId") %>%
dplyr::select(
Expand All @@ -943,8 +963,8 @@ runConceptSetDiagnostics <- function(connection,
.data$conceptId
) %>%
dplyr::summarise(
conceptCount = max(conceptCount),
conceptSubjects = max(conceptSubjects)
conceptCount = max(.data$conceptCount),
conceptSubjects = max(.data$conceptSubjects)
) %>%
dplyr::ungroup()
data <- makeDataExportable(
Expand Down Expand Up @@ -1012,7 +1032,8 @@ runConceptSetDiagnostics <- function(connection,
dplyr::tibble() %>%
dplyr::rename("uniqueConceptSetId" = "codesetId") %>%
dplyr::inner_join(conceptSets %>% dplyr::distinct(),
by = "uniqueConceptSetId"
by = "uniqueConceptSetId",
relationship = "many-to-many"
) %>%
dplyr::select(
"cohortId",
Expand Down
Loading

0 comments on commit a4bccf3

Please sign in to comment.