Merge pull request #102 from darwin-eu-dev/issue_96

Fix r-cmd check
darwin-eu-dev · Oct 29, 2024 · f653152 · f653152
2 parents 3800d7f + b5e940d
commit f653152
Show file tree

Hide file tree

Showing 60 changed files with 707 additions and 637 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -31,4 +31,4 @@ compare_versions
 ^Meta$
 inst/shiny/DiagnosticsExplorer/renv
 work/
-sql/
+^sql/$
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -51,7 +51,8 @@ Suggests:
     shiny,
     OhdsiShinyModules,
     rsconnect,
-    yaml
+    yaml,
+    ggplot2
 Remotes:
   ohdsi/OhdsiShinyModules
 License: Apache License

diff --git a/NAMESPACE b/NAMESPACE
@@ -10,6 +10,7 @@ export(getCdmDataSourceInformation)
 export(getCohortCounts)
 export(getConceptCountsTableName)
 export(getDataMigrator)
+export(getDefaultCovariateSettings)
 export(getDefaultVocabularyTableNames)
 export(getResultsDataModelSpecifications)
 export(launchDiagnosticsExplorer)
@@ -22,6 +23,7 @@ export(runIncidenceRate)
 export(runIncludedSourceConcepts)
 export(runInclusionStatistics)
 export(runOrphanConcepts)
+export(runResolvedConceptSets)
 export(runTimeSeries)
 export(runVisitContext)
 export(uploadResults)

diff --git a/R/ConceptSetUtils.R b/R/ConceptSetUtils.R
@@ -301,7 +301,7 @@ exportConceptSets <- function(cohortDefinitionSet, exportFolder, minCellCount, d
     minCellCount = minCellCount,
     databaseId = databaseId,
     incremental = FALSE,
-    cohortId = conceptSetsExport$cohortId
+    cohortId = cohortDefinitionSet$cohortId
   )
 }
 

diff --git a/R/ResultsDataModel.R b/R/ResultsDataModel.R
@@ -141,19 +141,22 @@ uploadResults <- function(connectionDetails,
 
   ParallelLogger::logInfo("Unzipping ", zipFileName)
   zip::unzip(zipFileName, exdir = unzipFolder)
-
-  ResultModelManager::uploadResults(
-    connectionDetails = connectionDetails,
-    schema = schema,
-    resultsFolder = unzipFolder,
-    tablePrefix = tablePrefix,
-    forceOverWriteOfSpecifications = forceOverWriteOfSpecifications,
-    purgeSiteDataBeforeUploading = purgeSiteDataBeforeUploading,
-    runCheckAndFixCommands = TRUE,
-    databaseIdentifierFile = "database.csv",
-    specifications = getResultsDataModelSpecifications(),
-    warnOnMissingTable = FALSE,
-    ...
+
+  # suppressing warning for reserved keywords in SQL
+  suppressWarnings(
+    ResultModelManager::uploadResults(
+      connectionDetails = connectionDetails,
+      schema = schema,
+      resultsFolder = unzipFolder,
+      tablePrefix = tablePrefix,
+      forceOverWriteOfSpecifications = forceOverWriteOfSpecifications,
+      purgeSiteDataBeforeUploading = purgeSiteDataBeforeUploading,
+      runCheckAndFixCommands = TRUE,
+      databaseIdentifierFile = "database.csv",
+      specifications = getResultsDataModelSpecifications(),
+      warnOnMissingTable = FALSE,
+      ...
+    )
   )
 }
 

diff --git a/R/executeDiagnostics.R b/R/executeDiagnostics.R
@@ -17,6 +17,8 @@
 #' Get default covariate settings
 #' @description
 #' Default covariate settings for cohort diagnostics execution
+#' 
+#' @return Default covariate settings
 #' @export
 getDefaultCovariateSettings <- function() {
   FeatureExtraction::createTemporalCovariateSettings(
@@ -94,27 +96,27 @@ getDefaultCovariateSettings <- function() {
 #' using \code{RFeatureExtraction::createTemporalCovariateSettings}
 #' Alternatively, a covariate setting object may be created using the above as an example.
 #'
-#' @template Connection
+#' @template connectionDetails
 #' @template CdmDatabaseSchema
 #' @template VocabularyDatabaseSchema
 #' @template CohortDatabaseSchema
 #' @template TempEmulationSchema
 #' @template CohortTable
 #' @template CohortSetReference
 #' @template exportFolder              
-#' @template cohortIds
+#' @template CohortIds
 #' @template cohortDefinitionSet
 #' @template MinCellCount
 #' @template Incremental
 #' @template cdmVersion
 #' @template databaseId
 #' @template minCharacterizationMean     
 #' 
-#' @param cohortTableNames            Cohort Table names used by CohortGenerator package
+#' @param cohortTableNames            Cohort Table names used by CohortGenerator package.
 #' @param conceptCountsTable          Concepts count table name. The default is "#concept_counts" to create a temporal concept counts table.
-#'                                    If an external concept counts table is used, provide the name in character, e.g. "concept_counts" without a hash
-#' @param databaseName                The full name of the database. If NULL, defaults to value in cdm_source table
-#' @param databaseDescription         A short description (several sentences) of the database. If NULL, defaults to value in cdm_source table
+#'                                    If an external concept counts table is used, provide the name in character, e.g. "concept_counts" without a hash.
+#' @param databaseName                The full name of the database. If NULL, defaults to value in cdm_source table.
+#' @param databaseDescription         A short description (several sentences) of the database. If NULL, defaults to value in cdm_source table.
 #' @param runInclusionStatistics      Generate and export statistic on the cohort inclusion rules?
 #' @param runIncludedSourceConcepts   Generate and export the source concepts included in the cohorts?
 #' @param runOrphanConcepts           Generate and export potential orphan concepts?
@@ -130,9 +132,6 @@ getDefaultCovariateSettings <- function() {
 #'                                    the createTemporalCovariateSettings function in the FeatureExtraction package, or a list
 #'                                    of such objects.
 #' @param irWashoutPeriod             Number of days washout to include in calculation of incidence rates - default is 0
-#' @param incrementalFolder           If \code{incremental = TRUE}, specify a folder where records are kept
-#'                                    of which cohort diagnostics has been executed.
-#' @param useExternalConceptCountsTable If TRUE an external table for the cohort concept counts will be used.
 #' @param runFeatureExtractionOnSample Logical. If TRUE, the function will operate on a sample of the data.
 #'                                    Default is FALSE, meaning the function will operate on the full data set.
 #'
@@ -144,20 +143,6 @@ getDefaultCovariateSettings <- function() {
 #'
 #' @param seedArgs                    List. Additional arguments to pass to the sampling function.
 #'                                    This can be used to control aspects of the sampling process beyond the seed and sample size.
-#'
-#' @param sampleIdentifierExpression Character. An expression that generates unique identifiers for each sample.
-#'                                   This expression can use the variables 'cohortId' and 'seed'.
-#'                                   Default is "cohortId * 1000 + seed", which ensures unique identifiers
-#'                                   as long as there are fewer than 1000 cohorts.
-#'                                   
-#' @param useAchilles                Logical. Should the pre-computed Achilles analyses be used to get concept counts? TRUE or FALSE (default)
-#'
-#' @param achillesDatabaseSchema     Character. The name of the schema where the Achilles results tables are located. 
-#'                                   Require if `useAchilles` is TRUE and ignored otherwise.
-#'                                   
-#' @param workDatabaseSchema         Character. The name of a schema where the user has write access. Intermediate tables for concept counts 
-#'                                   and orphan concepts will be created in this schema if supplied. If NULL (default) intermediate tables will
-#'                                   be created as temporary tables.                        
 #' @examples
 #' \dontrun{
 #' # Load cohorts (assumes that they have already been instantiated)
@@ -364,19 +349,9 @@ executeDiagnostics <- function(cohortDefinitionSet,
   }
 
   # Create output and incremental folders. check that we have write access.
-  if (!file.exists(gsub("/$", "", exportFolder))) {
-    dir.create(name, recursive = TRUE)
-    ParallelLogger::logInfo("Created export folder", exportFolder)
-  }
-  checkmate::assertDirectory(exportFolder, access = "w", add = errorMessage)
-
-  if (incremental) {
-    if (!file.exists(gsub("/$", "", exportFolder))) {
-      dir.create(name, recursive = TRUE)
-      ParallelLogger::logInfo("Created incremental folder", incrementalFolder)
-    }
-    checkmate::assertDirectory(incrementalFolder, access = "w", add = errorMessage)
-  }
+  checkArg(exportFolder, add = errorMessage)
+  checkArg(incremental, add = errorMessage)
+  checkArg(incrementalFolder, add = errorMessage)
 
   if (is(temporalCovariateSettings, "covariateSettings")) {
     temporalCovariateSettings <- list(temporalCovariateSettings)
@@ -676,7 +651,7 @@ executeDiagnostics <- function(cohortDefinitionSet,
         minCellCount = minCellCount,
         databaseId = databaseId,
         incremental = FALSE,
-        cohortId = cohorts$cohortId
+        cohortId = cohortDefinitionSet$cohortId
       )
     }
   )
@@ -926,6 +901,8 @@ executeDiagnostics <- function(cohortDefinitionSet,
           )
 
           feCohortTable <- cohortTableNames$cohortSampleTable
+          # work around for cohortGenerator 0.11.1
+          cohortDefinitionSet$cohortIds <-  cohortDefinitionSet$cohortId
           feCohortDefinitionSet <-
             CohortGenerator::sampleCohortDefinitionSet(
               connection = connection,
@@ -941,15 +918,13 @@ executeDiagnostics <- function(cohortDefinitionSet,
               incrementalFolder = incrementalFolder
             )
 
-          feCohortCounts <- computeCohortCounts(
+          feCohortCounts <- CohortGenerator::getCohortCounts(
             connection = connection,
             cohortDatabaseSchema = cohortDatabaseSchema,
             cohortTable = cohortTableNames$cohortSampleTable,
-            cohorts = feCohortDefinitionSet,
-            exportFolder = exportFolder,
-            minCellCount = minCellCount,
-            databaseId = databaseId,
-            writeResult = FALSE
+            cohortDefinitionSet = feCohortDefinitionSet,
+            cohortIds = cohortDefinitionSet$cohortId,
+            databaseId = databaseId
           )
         }
 

diff --git a/R/getCdmDataSourceInformation.R b/R/getCdmDataSourceInformation.R
@@ -22,7 +22,7 @@
 #' Returns CDM source name, description, release date, CDM release date, version
 #' and vocabulary version, where available.
 #'
-#' @template Connection
+#' @template connectionDetails
 #'
 #' @template CdmDatabaseSchema
 #'

diff --git a/R/plotLogFile.R b/R/plotLogFile.R
@@ -1,5 +1,5 @@
 readLog <- function(path) {
-  df <- read.csv(path, sep = "\t", header = FALSE)
+  df <- utils::read.csv(path, sep = "\t", header = FALSE)
   names(df) <- c("time", "thread", "level", "package", "task", "message")
 
   df %>%

diff --git a/R/runCohortCharacterization.R b/R/runCohortCharacterization.R
@@ -311,7 +311,7 @@ getCohortCharacteristics <- function(connection = NULL,
   return(results)
 }
 
-#' runCohortCharacterization
+#' Generate and export the temporal cohort characterization
 #' 
 #' @description
 #' This function takes cohorts as input and generates the covariates for these cohorts.
@@ -324,32 +324,30 @@ getCohortCharacteristics <- function(connection = NULL,
 #'  * temporal_covariate_value_dist.csv
 #'  * temporal_time_ref.csv
 #' 
-#' @template connection 
+#' @template Connection 
 #' @template databaseId 
-#' @template exportFolder 
-#' @template cdmDatabaseSchema 
-#' @template cohortDatabaseSchema 
-#' @template cohortTable 
-#' @template tempEmulationSchema 
+#' @template ExportFolder 
+#' @template CdmDatabaseSchema 
+#' @template CohortDatabaseSchema 
+#' @template CohortTable 
+#' @template TempEmulationSchema 
 #' @template cdmVersion 
-#' @template minCellCount 
-#' @template instantiatedCohorts 
+#' @template MinCellCount 
+#' @template InstantiatedCohorts 
 #' @template Incremental
-#' @template batchSize 
+#' @template BatchSize 
 #'
-#' @param cohorts                    The cohorts for which the covariates need to be obtained
-#' @param cohortCounts               A dataframe with the cohort counts
+#' @param cohorts                    The cohorts for which the covariates need to be obtained.
+#' @param cohortCounts               A dataframe with the cohort counts.
 #' @param covariateSettings          Either an object of type \code{covariateSettings} as created using one of
 #'                                   the createTemporalCovariateSettings function in the FeatureExtraction package, or a list
 #'                                   of such objects.
 #' @param minCharacterizationMean    The minimum mean value for characterization output. Values below this will be cut off from output. This
 #'                                   will help reduce the file size of the characterization output, but will remove information
-#'                                   on covariates that have very low values. The default is 0.001 (i.e. 0.1 percent)
+#'                                   on covariates that have very low values. The default is 0.001 (i.e. 0.1 percent).
 #'
-#' @return None, it will write results to disk
+#' @return None, it will write results to disk.
 #' @export
-#'
-#' @examples
 runCohortCharacterization <- function(connection,
                                       databaseId,
                                       exportFolder,

diff --git a/R/runCohortRelationship.R b/R/runCohortRelationship.R
@@ -174,11 +174,11 @@ getCohortRelationship <- function(
   }
 
 
-#' runCohortRelationship
+#' Generate and export the cohort temporal relationships
 #' 
 #' @description
-#' Generate and export the cohort relationship. Cohort relationship checks the temporal relationship between two or more cohorts
-#' and derives subject counts for cohorts with different temporal relationships.
+#' Generate and export the cohort relationship. Cohort relationship checks the temporal relationship
+#' between two or more cohorts and derives subject counts for cohorts with different temporal relationships.
 #' 
 #' @template Connection
 #' @template cohortDefinitionSet
@@ -192,7 +192,8 @@ getCohortRelationship <- function(
 #' @template MinCellCount 
 #' @template Incremental 
 #' @template BatchSize 
-#'
+#' 
+#' @return None, it will write the results to a csv file.
 #' @export
 runCohortRelationship <- function(
     connection,

diff --git a/R/runIncidenceRate.R b/R/runIncidenceRate.R
@@ -199,23 +199,23 @@ aggregateIr <- function(ratesSummary, aggregateList) {
 }
 
 #' Run the incidence rate cohort diagnostic
-#' 
+#' @description
 #' runIncidenceRate computes incidence rates for cohorts in the CDM population stratified
 #' by age, sex, and calendar year.
 #'
-#' @template connection 
-#' @template cohortDefinitionSet 
+#' @template Connection 
+#' @template CohortDefinitionSet 
 #' @param washoutPeriod Then minimum number of required observation days prior to 
-#'                      cohort index to be included in the numerator of the incidence rate
-#' @template tempEmulationSchema 
-#' @template cdmDatabaseSchema 
+#'                      cohort index to be included in the numerator of the incidence rate.
+#' @template TempEmulationSchema 
+#' @template CdmDatabaseSchema 
 #' @template CohortTable 
 #' @template databaseId 
-#' @template exportFolder 
-#' @template minCellCount 
+#' @template ExportFolder 
+#' @template MinCellCount 
 #' @template Incremental 
 #'
-#' @return
+#' @return None, it will write the results to a csv file.
 #' @export
 runIncidenceRate <- function(connection,
                              cohortDefinitionSet,
@@ -299,7 +299,10 @@ runIncidenceRate <- function(connection,
 
     data <- lapply(split(subset, subset$cohortId), runOneIncidenceRate)
     data <- dplyr::bind_rows(data)
-
+    data <- dplyr::mutate(data, databaseId = databaseId)
+
+    data <- data %>% dplyr::select("cohortCount", "personYears", "gender", "gender", "ageGroup",
+                                  "calendarYear", "incidenceRate", "cohortId", "databaseId")
     exportDataToCsv(
       data = data,
       tableName = "incidence_rate",
-Original file line number
+Diff line change
@@ Expand Up / @@ -31,4 +31,4 @@ compare_versions @@
     ^Meta$
     inst/shiny/DiagnosticsExplorer/renv
     work/
-    sql/
+    ^sql/$