Skip to content

Commit

Permalink
Merge pull request #44 from Big-Life-Lab/PBC-database
Browse files Browse the repository at this point in the history
Add PBC database as demostration data
  • Loading branch information
yulric authored Dec 13, 2024
2 parents 404576e + 5a1da9b commit d9ef762
Show file tree
Hide file tree
Showing 28 changed files with 726 additions and 265 deletions.
4 changes: 4 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
^CONTRIBUTING\.md$
^path$
^renv$
^renv\.lock$
^.*\.Rproj$
Expand All @@ -6,3 +8,5 @@
^docs$
^pkgdown$
^\.github$
^data-raw$
^path/to/venv/
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@
# Mac
.DS_Store
inst/doc

*.code-workspace
12 changes: 7 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@ Type: Package
Title: Contains functions to interface with variable details sheets, including recoding variables and converting them to PMML
Version: 0.1.0
Authors@R: c(
person(given = "Yulric", family = "Sequeira", role = c("aut"), email = "ysequeira@ohri.ca"),
person(given = "Luke",family = "Bailey", email = "lbailey@toh.ca", role = c("aut")),
person(given = "Rostyslav", family = "Vyuha", role = c("aut","cre"), email = "rvyuha@toh.ca"))
Maintainer: Rostyslav Vyuha <rvyuha@toh.ca>
person("Yulric", "Sequeira", email = "ysequeira@ohri.ca", role = c("aut", "cre")),
person("Luke", "Bailey", role = c("aut")),
person("Rostyslav", role = c("aut"))
)
Maintainer: Yulric Sequeria <ysequeira@ohri.ca>
Description: Recode and harmonize data using variable and details sheets.
Depends:
R (>= 3.1.0)
Expand All @@ -24,12 +25,13 @@ URL: https://github.com/Big-Life-Lab/recodeflow
BugReports: https://github.com/Big-Life-Lab/recodeflow/issues
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.2.3
RoxygenNote: 7.3.2
Suggests:
DT,
kableExtra,
knitr,
rmarkdown,
readr,
testthat (>= 3.0.0)
Config/testthat/edition: 3
VignetteBuilder: knitr
88 changes: 88 additions & 0 deletions R/data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
#' The pbc dataset
#'
#' @format A data frame with 418 observations and 20 variables.
#' \describe{
#' \item{id}{case number}
#' \item{time}{number of days between registration and the earlier of death, transplantation, or study analysis time}
#' \item{status}{status at endpoint, 0/1/2 for censored, transplant, dead}
#' \item{trt}{1/2/NA for D-penicillamine, placebo, or not randomized}
#' \item{age}{age in years}
#' \item{sex}{m/f}
#' \item{ascites}{presence of ascites}
#' \item{hepato}{presence of hepatomegaly or enlarged liver}
#' \item{spiders}{blood vessel malformations in the skin}
#' \item{edema}{0 no edema, 0.5 untreated or successfully treated, 1 edema despite diuretic therapy}
#' \item{bili}{serum bilirubin (mg/dl)}
#' \item{chol}{serum cholesterol (mg/dl)}
#' \item{albumin}{serum albumin (g/dl)}
#' \item{copper}{urine copper (ug/day)}
#' \item{alk.phos}{alkaline phosphotase (U/liter)}
#' \item{ast}{aspartate aminotransferase (U/ml)}
#' \item{trig}{triglycerides (mg/dl)}
#' \item{platelet}{platelet count}
#' \item{protime}{standardised blood clotting time}
#' \item{stage}{histologic stage of disease (1, 2, 3, or 4)}
#' }
#' @source {https://cran.r-project.org/web/packages/survival/survival.pdf}
"pbc"

#' Metadata for the pbc dataset using the DCIM standard
#'
#' @format A list containing DCMI metadata:
#' \describe{
#' \item{title}{title}
#' \item{creator}{creator}
#' \item{subject}{subject}
#' \item{description}{description}
#' \item{publisher}{publisher}
#' \item{date}{date}
#' \item{type}{type}
#' \item{format}{format}
#' \item{identifier}{identifier}
#' \item{source}{source}
#' \item{language}{language}
#' \item{rights}{rights}
#' \item{references}{references}
#' }
"pbc_metadata"

#' Variables sheet for the pbc dataset
#'
#' @format A data frame with 24 rows and 11 columns:
#' \describe{
#' \item{variable}{variable name}
#' \item{label}{variable label}
#' \item{labelLong}{variable label long}
#' \item{subject}{subject}
#' \item{section}{section}
#' \item{variableType}{variable type}
#' \item{databaseStart}{database start}
#' \item{units}{units}
#' \item{variableStart}{variable start}
#' \item{notes}{logical indicating presence of notes}
#' \item{description}{logical indicating presence of description}
#' }
"pbc_variables"

#' Variable details sheet for the pbc dataset
#'
#' @format A data frame with 69 rows and 16 columns:
#' \describe{
#' \item{variable}{variable name}
#' \item{dummyVariable}{dummy variable name}
#' \item{typeEnd}{end type}
#' \item{databaseStart}{database start}
#' \item{variableStart}{variable start}
#' \item{typeStart}{start type}
#' \item{recEnd}{record end}
#' \item{recStart}{record start}
#' \item{catLabel}{category label}
#' \item{catLabelLong}{category long label}
#' \item{nubValidCat}{number of valid categories (numeric)}
#' \item{units}{logical indicating presence of units}
#' \item{notes}{logical indicating presence of notes}
#' \item{catStartLabel}{category start label}
#' \item{variableStartShortLabel}{variable start short label}
#' \item{variableStartLabel}{variable start label}
#' }
"pbc_variable_details"
2 changes: 1 addition & 1 deletion R/example_der_function.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#' example_der_fun caluclates chol*bili
#' @param chol the row value for chol
#' @param bili the row value for bili
#' @export
#' @keywords internal
example_der_fun <- function(chol, bili){
# as numeric is used to coerce in case categorical numeric variables are used.
# Warning either chol or bili being NA will result in NA return
Expand Down
11 changes: 8 additions & 3 deletions R/recode-with-table.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
#'
#' is_equal(NA,NA)
#' # TRUE
#' @export
#' @keywords internal
is_equal <- function(v1, v2) {
same <- (v1 == v2) | (is.na(v1) & is.na(v2))
# anything compared to NA equals NA
Expand All @@ -44,7 +44,7 @@ is_equal <- function(v1, v2) {
#' Creates new variables by recoding variables in a dataset using the rules
#' specified in a variables details sheet
#'
#' The \href{https://github.com/Big-Life-Lab/recodeflow/blob/master/inst/extdata/PBC-variableDetails.csv}{variable_details}
#' The \href{https://github.com/Big-Life-Lab/recodeflow/blob/master/inst/extdata/pbc_variable_details.csv}{variable_details}
#' dataframe needs the following columns:
#' \describe{
#' \item{variable}{Name of the new variable created. The name of the new
Expand Down Expand Up @@ -523,6 +523,7 @@ recode_call <-
#' @param variable_being_checked the name of the recoded variable
#'
#' @return the data equivalent of variable_being_checked
#' @keywords internal
get_data_variable_name <-
function(data_name,
data,
Expand Down Expand Up @@ -586,6 +587,7 @@ get_data_variable_name <-
#' @param tables A list of reference tables
#'
#' @return Returns recoded and labeled data
#' @keywords internal
recode_columns <-
function(data,
variables_details_rows_to_process,
Expand Down Expand Up @@ -979,7 +981,7 @@ recode_non_derived_variables <- function(
)
if (length(else_value) > 0) {
extra_row <- nrow(log_table) + 1
log_table[extra_row , "value_to"] <- else_value
log_table[extra_row , "value_to"] <- as.character(else_value)
log_table[extra_row , "From"] <-
"else"
log_table[extra_row , "rows_recoded"] <-
Expand Down Expand Up @@ -1009,6 +1011,7 @@ recode_non_derived_variables <- function(
#'
#' @return a boolean vector containing true for rows where the
#' comparison is true
#' @keywords internal
compare_value_based_on_interval <-
function(left_boundary,
right_boundary,
Expand Down Expand Up @@ -1101,6 +1104,7 @@ update_variable_details_based_on_variable_sheet <-
#' @param var_type the toType of a variable
#'
#' @return an appropriately coded tagged NA
#' @keywords internal
format_recoded_value <- function(cell_value, var_type) {
recode_value <- NULL
if (grepl("NA", cell_value)) {
Expand Down Expand Up @@ -1370,6 +1374,7 @@ calculate_custom_function_row_value <-
#' @param variable_details_row A data frame with a single row which will be
#' checked
#' @return A boolean
#' @keywords internal
is_derived_var <- function(variable_details_row) {
derived_var_regex <- "DerivedVar::\\[(.+?)\\]|DerivedVar::\\[\\]"
return(length(grep(
Expand Down
Loading

0 comments on commit d9ef762

Please sign in to comment.