diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 0000000..e36443d --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1,4 @@ +^.*\.Rproj$ +^\.Rproj\.user$ +^man/figures$ +^README\.Rmd$ diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..65caa3b --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.Rproj.user +mixcurelps.Rproj +README.Rmd \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..b5da71a --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,24 @@ +Package: mixcurelps +Type: Package +Title: Approximate Bayesian Inference in Mixture Cure Models +Version: 0.1.1 +Depends: R (>= 4.1) +Authors@R: person("Oswaldo","Gressani", + email="oswaldo_gressani@hotmail.fr", role=c("aut","cre")) +Maintainer: Oswaldo Gressani +Description: This package implements Laplace approximations and P-splines for + fast approximate Bayesian inference in mixture cure models. +License: file LICENSE +Encoding: UTF-8 +Imports: + Rcpp (>= 1.0.7), + survival (>= 3.2-11), + ggplot2 (>= 3.3.5), + progress (>= 1.2.2), + crayon (>= 1.4.1), + survminer (>= 0.4.3) +LazyData: true +RoxygenNote: 7.1.1 +LinkingTo: + RcppArmadillo, + Rcpp diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..1c4141b --- /dev/null +++ b/LICENSE @@ -0,0 +1,2 @@ +YEAR: 2021 +COPYRIGHT HOLDER: Oswaldo Gressani diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..e0573af --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,12 @@ +# Generated by roxygen2: do not edit by hand + +S3method(plot,simdatmixcure) +S3method(print,lpsmc) +export(curefit) +export(lpsmc) +export(postpendist) +export(simdatmixcure) +export(simlpsmc) +export(survcurve) +importFrom(Rcpp,sourceCpp) +useDynLib(mixcurelps, .registration = TRUE) diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 0000000..9a655a1 --- /dev/null +++ b/NEWS.md @@ -0,0 +1,5 @@ +# Newspaper for the mixcurelps package # + +### Version 0.1.1 ### + +* **2021-09-09:** Release of unstable version on Github. Version name: "Keeping CPU at low temperature". diff --git a/R/RcppExports.R b/R/RcppExports.R new file mode 100644 index 0000000..e28cc5b --- /dev/null +++ b/R/RcppExports.R @@ -0,0 +1,11 @@ +# Generated by using Rcpp::compileAttributes() -> do not edit by hand +# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +Rcpp_Laplace <- function(lat0, v, K, PDcorrect, Dloglik, D2loglik, Qv) { + .Call(`_mixcurelps_Rcpp_Laplace`, lat0, v, K, PDcorrect, Dloglik, D2loglik, Qv) +} + +Rcpp_cubicBspline <- function(x, lower, upper, K) { + .Call(`_mixcurelps_Rcpp_cubicBspline`, x, lower, upper, K) +} + diff --git a/R/S3_lpsmc_print.R b/R/S3_lpsmc_print.R new file mode 100644 index 0000000..2fd55c8 --- /dev/null +++ b/R/S3_lpsmc_print.R @@ -0,0 +1,65 @@ +#' Print a lpsmc object. +#' +#' @description Print method for a \code{lpsmc} object. +#' +#' @param x An object of class \code{lpsmc}. +#' @param ... Further arguments to be passed to print routine. +#' +#' +#' @author Oswaldo Gressani \email{oswaldo_gressani@hotmail.fr} . +#' +#' @export + +print.lpsmc <- function(x,...){ + + tabcolnames <- c("Estimate","sd","CI90%.low","CI90.up%", + "CI95%.low","CI95%.up") + K <- x$K + + # Table for the incidence part + tabincidrow <- x$p + tabincidout <- as.data.frame(matrix(0, nrow = tabincidrow, ncol = 6)) + rownames(tabincidout) <- colnames(x$X) + colnames(tabincidout) <- tabcolnames + tabincidout[,1] <- x$betahat + tabincidout[,2] <- sqrt(diag(x$Covhat)[(K + 1):(K + tabincidrow)]) + tabincidout[,3] <- as.numeric(x$CI90[,1])[1:tabincidrow] + tabincidout[,4] <- x$CI90[,2][1:tabincidrow] + tabincidout[,5] <- x$CI95[,1][1:tabincidrow] + tabincidout[,6] <- x$CI95[,2][1:tabincidrow] + iscolnum <- sapply(tabincidout, is.numeric) + tabincidout[iscolnum] <- lapply(tabincidout[iscolnum], round, 3) + + # Table for the latency part + tablatencyrow <- x$q + dimlat <- x$K+x$p+x$q + tablatencyout <- as.data.frame(matrix(0, nrow = tablatencyrow, ncol = 6)) + rownames(tablatencyout) <- colnames(x$Z) + colnames(tablatencyout) <- tabcolnames + tablatencyout[,1] <- x$gammahat + tablatencyout[,2] <- sqrt(diag(x$Covhat)[(K + tabincidrow + 1):dimlat]) + tablatencyout[,3] <- as.numeric(x$CI90[,1])[(tabincidrow + 1):(dimlat-K)] + tablatencyout[,4] <- x$CI90[,2][(tabincidrow + 1):(dimlat-K)] + tablatencyout[,5] <- x$CI95[,1][(tabincidrow + 1):(dimlat-K)] + tablatencyout[,6] <- x$CI95[,2][(tabincidrow + 1):(dimlat-K)] + iscolnum <- sapply(tablatencyout, is.numeric) + tablatencyout[iscolnum] <- lapply(tablatencyout[iscolnum], round, 3) + + # Print output table + + cat("Fitting mixture cure model with Laplacian-P-splines \n") + cat(paste(rep("-",50),collapse = ""),"\n") + cat("Sample size: ", length(x$ftime), "\n") + cat("No. of B-splines: ", K, "\n") + cat(paste(rep("-",90),collapse = ""),"\n") + cat(" (Incidence) \n") + cat(paste(rep("-",90),collapse = ""),"\n") + print.table(as.matrix(tabincidout), digits = 3, justify = "left") + cat(paste(rep("-",90),collapse = ""),"\n") + cat(" (Latency) \n") + cat(paste(rep("-",90),collapse = ""),"\n") + print.table(as.matrix(tablatencyout), digits = 3, justify = "left") + cat(paste(rep("-",90),collapse = ""),"\n") + cat(paste0("'Real' elapsed time: ",x$timer, " seconds.\n")) + +} diff --git a/R/S3_simdatmixcure_plot.R b/R/S3_simdatmixcure_plot.R new file mode 100644 index 0000000..e3fd310 --- /dev/null +++ b/R/S3_simdatmixcure_plot.R @@ -0,0 +1,60 @@ +#' @method plot simdatmixcure +#' @export + +# Plot method for an object of class simdatmixcure +plot.simdatmixcure <- function(x, ...) { + + + # tdom <- seq(0, x$tup, length = 1000) + + # graphics::plot(x$fitKM, mark.time = TRUE, mark = "x", xlab = "t", + # ylab = expression(S[0](t)), main = "Baseline survival", + # cex.main = 0.9) + # graphics::abline(v = x$plateau, lty = 2, lwd = 2, col = "orange") + # graphics::lines(tdom, sapply(tdom, x$S0), type = "l", col = "blue") + # graphics::legend("topright", lty = c(1,1,2), + # col = c("black", "blue", "orange"), + # c("Kaplan-Meier", "Weibull baseline survival", + # "Start of plateau"), bty = "n", cex = 0.8) + + + # With survminer + tobs <- x$tobs + status <- x$delta + dataKapM <- data.frame(tobs, status) + fitKapM <- survival::survfit(survival::Surv(tobs, status) ~ 1, + data = dataKapM) + plotsurv <- survminer::ggsurvplot(fitKapM, + data = dataKapM, + censor.shape="x", + censor.size = 5.5, + size = 1, + palette = "#0089FF", + conf.int = TRUE, + font.tickslab = c(14,"darkblue"), + font.x =c(14,"black"), + font.y = c(14,"black"), + ggtheme = ggplot2::theme_light(), + risk.table = "percentage", + risk.table.col ="darkblue", + legend="none", + legend.title="", + tables.theme = survminer::theme_cleantable() + ) + plotsurv$table <- plotsurv$table + ggplot2::theme( + axis.text.y = ggplot2::element_blank(), + axis.text.x = ggplot2::element_text(size=14), + axis.title.x = ggplot2::element_text(size=14) + ) + + plotsurv $plot<- plotsurv$plot + ggplot2::geom_vline(xintercept = x$plateau, + linetype = "dashed", size = 1, + colour = "#15BA57") + + plotsurv + + + + + +} diff --git a/R/breastcancer.R b/R/breastcancer.R new file mode 100644 index 0000000..f38d52a --- /dev/null +++ b/R/breastcancer.R @@ -0,0 +1,23 @@ +#' Breast cancer data. +#' +#' @docType data +#' +#' @description Breast cancer data from the \code{breastCancerVDX} package. +#' +#' @usage data(breastcancer) +#' +#' @format A data frame with 286 rows and 4 columns. +#' \describe{ +#' \item{\code{tobs}}{Distant-metastasis-free survival (in days).} +#' \item{\code{delta}}{Event indicator \code{1}=death or relapse, \code{0}=censored.} +#' \item{\code{AGE}}{Age of patients.} +#' \item{\code{ER}}{Estrogen receptor \code{0}="<=10fmol", \code{1}=">10fmol".} +#' } +#' +#' +#' @source \url{https://doi.org/doi:10.18129/B9.bioc.breastCancerVDX} +#' +#' @references Schroeder M, Haibe-Kains B, Culhane A, Sotiriou C, Bontempi G, +#' Quackenbush J (2021). breastCancerVDX: Gene expression datasets published +#' by Wang et al. [2005] and Minn et al. [2007] (VDX). R package version 1.30.0. +"breastcancer" diff --git a/R/cpprelated.R b/R/cpprelated.R new file mode 100644 index 0000000..828cc16 --- /dev/null +++ b/R/cpprelated.R @@ -0,0 +1,5 @@ +## usethis namespace: start +#' @useDynLib mixcurelps, .registration = TRUE +#' @importFrom Rcpp sourceCpp +## usethis namespace: end +NULL diff --git a/R/curefit.R b/R/curefit.R new file mode 100644 index 0000000..7c6d317 --- /dev/null +++ b/R/curefit.R @@ -0,0 +1,68 @@ +#' Estimated cure proportion +#' +#' @description +#' Computes the estimated cure proportion based on a mixture cure model fit +#' with \code{lpsmc}. Both estimates and approximate 90% and 95% credible +#' intervals are shown. +#' +#' @param x A lpsmc object. +#' @param covarprofile The covariate profile on which to compute the +#' cure proportion. +#' +#' @return A table with the estimated cure proportion. +#' +#' @author Oswaldo Gressani \email{oswaldo_gressani@hotmail.fr} . +#' +#' @examples +#' ### Application on breast cancer data +#' rm(list=ls()) +#' data("breastcancer") +#' formula <- Surv(tobs, delta) ~ inci(AGE + ER) + late(AGE + ER) +#' fitcancer <- lpsmc(formula = formula, data = breastcancer, K = 20) +#' covarprofile <- matrix(c(1, 30, 1, 1, 40, 0), nrow = 2 , byrow = TRUE) +#' fitcure <- curefit(fitcancer,covarprofile) +#' fitcure$estimcure +#' +#' @export + + +curefit <- function(x, covarprofile){ + + betahat <- x$betahat + phat <- x$px(betahat, covarprofile) + p <- x$p + K <- x$K + nprofiles <- nrow(covarprofile) + + CIcure <- function(y, alpha){ + gbhat <- log(log(as.numeric(1 + exp(y %*% betahat)))) + Sigmabhat <- x$Covhat[(K + 1):(K + p), (K + 1):(K + p)] + gradbhat <- (x$px(betahat, y)/log(as.numeric(1 + exp(y %*% betahat)))) * y + qz_alpha <- stats::qnorm(alpha * 0.5, lower.tail = FALSE) + postsd <- sqrt(as.numeric(t(gradbhat) %*% Sigmabhat %*% gradbhat)) + CIcure_alpha <- c(gbhat - qz_alpha * postsd, gbhat + qz_alpha * postsd) + CIcure_original <- rev(exp(-exp(CIcure_alpha))) + return(CIcure_original) + } + + CIcuremat <- matrix(0, nrow = nprofiles , ncol = p + 5) + colnames(CIcuremat) <- c(as.character(colnames(x$X)),"1-p(x)", + "CI90.low","CI90.up","CI95.low","CI95.up") + rownames(CIcuremat) <- paste0("x.profile", seq(nprofiles)) + for(j in 1:nprofiles){ + CIcuremat[j, 1:p] <- covarprofile[j, ] + CIcuremat[j, p+1] <- 1-phat[j] + CIcuremat[j, (p+2):(p+3)] <- CIcure(covarprofile[j,], 0.10) + CIcuremat[j, (p+4):(p+5)] <- CIcure(covarprofile[j,], 0.05) + } + + # cat(paste(rep("-",90),collapse = ""),"\n") + # cat("Estimated cure proportion \n") + # cat(paste(rep("-",90),collapse = ""),"\n") + # print.table(CIcuremat, digits = 3, justify = "left") + # cat(paste(rep("-",90),collapse = ""),"\n") + + + outlist <- list(estimcure = CIcuremat) + +} diff --git a/R/ecog1684.R b/R/ecog1684.R new file mode 100644 index 0000000..b528d90 --- /dev/null +++ b/R/ecog1684.R @@ -0,0 +1,33 @@ +#' Phase III Melanoma clinical trial. +#' +#' @docType data +#' +#' @description Melanoma data from the phase III Eastern Cooperative +#' Oncology Group (ECOG) two-arm clinical trial studied in +#' Kirkwood et al. (1996) and obtained from the \code{smcure} package. +#' +#' @usage data(ecog1684) +#' +#' @format A data frame with 284 rows and 5 columns. +#' \describe{ +#' \item{\code{tobs}}{Relapse-free survival (in years).} +#' \item{\code{delta}}{\code{1}=death or relapse, \code{0}=censored.} +#' \item{\code{TRT}}{Treatment: \code{0}=control, +#' \code{1}=Interferon alpha-2b (IFN).} +#' \item{\code{AGE}}{Age centered to the mean.} +#' \item{\code{SEX}}{\code{0}=Male, \code{1}=Female.} +#' } +#' +#' +#' @source \url{https://CRAN.R-project.org/package=smcure} +#' +#' @references Kirkwood, J. M., Strawderman, M. H., Ernstoff, M. S., +#' Smith, T. J., Borden, E. C. and Blum, R. H. (1996). Interferon alfa-2b +#' adjuvant therapy of high-risk resected cutaneous melanoma: the Eastern +#' Cooperative Oncology Group Trial EST 1684. +#' \emph{Journal of clinical oncology} \strong{14}(1): 7-17. +#' @references Corbiere, F. and Joly, P. (2007). A SAS macro for parametric +#' and semiparametric mixture cure models. \emph{Computer methods and programs +#' in Biomedicine} \strong{85}(2): 173-180. +#' \url{https://doi.org/10.1016/j.cmpb.2006.10.008} +"ecog1684" diff --git a/R/lpsmc.R b/R/lpsmc.R new file mode 100644 index 0000000..4aded22 --- /dev/null +++ b/R/lpsmc.R @@ -0,0 +1,474 @@ +#' Fit a mixture cure model with Laplacian-P-splines +#' +#' This routine fits a mixture cure model with a logistic link function for +#' the incidence part and a flexible Cox proportional hazards model for the +#' latency part where the baseline survival is approximated with penalized +#' B-splines. +#' +#' @param formula A model formula of the form \code{Surv(tobs,delta)~ +#' inci()+late()}. +#' @param data A data frame (optional). +#' @param K The number of B-spline coefficients. +#' @param penorder The order of the penalty. +#' @param stepsize The stepsize taken to maximize the log posterior penalty. +#' +#' @return An object of class \code{lpsmc}. +#' +#' @seealso \link{lpsmc.object} +#' +#' @author Oswaldo Gressani \email{oswaldo_gressani@hotmail.fr} . +#' +#' @examples +#' ### Real data application ECOG e1684 clinical trial +#' data("ecog1684") +#' formula <- Surv(tobs,delta) ~ inci(SEX + TRT + AGE) + late(SEX + TRT + AGE) +#' fite1684 <- lpsmc(formula = formula, data = ecog1684) +#' fite1684 +#' +#' ### Application on breast cancer data +#' rm(list=ls()) +#' data("breastcancer") +#' formula <- Surv(tobs, delta) ~ inci(AGE + ER) + late(AGE + ER) +#' fitcancer <- lpsmc(formula = formula, data = breastcancer, K = 20) +#' fitcancer +#' +#' @export + +lpsmc <- function(formula, data, K = 15, penorder = 3, stepsize = 0.2){ + + #--- Start clock + tic <- proc.time() + + #--- Extracting dimensions + if(!inherits(formula, "formula")) + stop("Incorrect model formula") + # Reordering if needed + if (as.character(stats::terms(formula)[[3]][[2]])[1] == "late" && + as.character(stats::terms(formula)[[3]][[3]])[1] == "inci") { + formula <- stats::as.formula(paste( + paste0("Surv(", all.vars(formula)[1], ",", all.vars(formula)[2], ")~"), + attr(stats::terms(formula), "term.labels")[2], + "+", + attr(stats::terms(formula), "term.labels")[1], + sep = "")) + } + varnames <- all.vars(formula, unique = FALSE) + collect <- parse(text = paste0("list(", paste(varnames, collapse = ","), ")"), + keep.source = FALSE) + + if (missing(data)) { + mff <- data.frame(matrix(unlist(eval(collect)), + ncol = length(varnames), byrow = FALSE)) + } else{ + mff <- data.frame(matrix(unlist(eval(collect, envir = data)), + ncol = length(varnames), byrow = FALSE)) + } + colnames(mff) <- varnames + incidstruct <- attr(stats::terms(formula), "term.labels")[1] + ncovar.inci <- sum(strsplit(incidstruct, "+")[[1]] == "+") + 1 + latestruct <- attr(stats::terms(formula), "term.labels")[2] + ncovar.late <- sum(strsplit(latestruct, "+")[[1]] == "+") + 1 + + ftime <- mff[, 1] # Event times + event <- mff[, 2] # Event indicator + tu <- max(ftime) # Upper bound of follow-up + n <- nrow(mff) + X <- as.matrix(cbind(rep(1, n), mff[, (3:(2 + ncovar.inci))])) + colnames(X) <- c("(Intercept)", colnames(mff[(3:(2 + ncovar.inci))])) + if(any(is.infinite(X))) + stop("Data contains infinite covariates") + if(ncol(X) == 0) + stop("The model has no covariates for the incidence part") + Z <- as.matrix(mff[, (2 + ncovar.inci + 1) : ncol(mff)], ncol = ncovar.late) + if(any(is.infinite(Z))) + stop("Data contains infinite covariates") + if(ncol(Z) == 0) + stop("The model has no covariates for the latency part") + colnames(Z) <- colnames(mff[(2 + ncovar.inci + 1) : ncol(mff)]) + if (!is.vector(K, mode = "numeric") || length(K) > 1 || is.na(K)) + stop("K must be a numeric scalar") + if (K < 10) + stop("K must be at least 10") + p <- ncol(X) + q <- ncol(Z) + dimlat <- K + p + q + penorder <- floor(penorder) + if(penorder < 2 || penorder > 3) + stop("Penalty order must be either 2 or 3") + + #--- Cubic B-spline basis + Bt <- Rcpp_cubicBspline(ftime, lower = 0, upper = tu, K = K) + + #-- Bspline basis evaluated on grid to approximate S0 + J <- 300 # Number of bins + phij <- seq(0, tu, length = J + 1) # Grid points + Deltaj <- phij[2] - phij[1] # Interval width + sj <- phij[1:J] + Deltaj * 0.5 # Interval midpoint + Bsj <- Rcpp_cubicBspline(sj, lower = 0, upper = tu, K = K) # B-spline basis + + #--- Prior precision and penalty matrix + D <- diag(K) # Diagonal matrix + for (k in 1:penorder) D <- diff(D) # Difference matrix of dimension K-r by K + P <- t(D) %*% D # Penalty matrix of dimension K-1 by K-1 + P <- P + diag(1e-06,K) # Diagonal perturbation to make P full rank + prec_betagamma <- 1e-06 # Prior precision for the regression coeffs. + a_delta <- 1e-04 # Prior for delta + b_delta <- 1e-04 # Prior for delta + nu <- 3 # Prior for lambda + + # Precision matrix + Qv <- function(v) { + Qmat <- matrix(0, nrow = dimlat, ncol = dimlat) + Qmat[1:K, 1:K] <- exp(v) * P + Qmat[(K+1):dimlat, (K+1):dimlat] <- diag(prec_betagamma, p + q) + return(Qmat) + } + + BKL <- list() + for (k in 1:K) BKL[[k]] <- Bsj * matrix(rep(Bsj[, k], K), + ncol = K, byrow = FALSE) + + cumult <- function(t, theta,k,l){ + bin_index <- as.integer(t / Deltaj) + 1 + bin_index[which(bin_index == (J + 1))] <- J + h0sj <- exp(colSums(theta * t(Bsj))) + + if(k==0 && l==0){ + output <- (cumsum(h0sj)[bin_index]) * Deltaj + } else if (k==1 && l==0){ + h0mat <- matrix(rep(h0sj, K), ncol = K, byrow = FALSE) + output <- (apply(h0mat * Bsj, 2, cumsum)[bin_index, ]) * Deltaj + } else if (k==1 && l>0){ + h0mat <- matrix(rep(h0sj, K), ncol = K, byrow = FALSE) + output <- (apply(h0mat * BKL[[l]], 2, cumsum)[bin_index, ]) * Deltaj + } + return(output) + } + + #--- Incidence function (logistic) + px <- function(betalat, x) as.numeric((1 + exp(-(x %*% betalat))) ^ (-1)) + + #--- Population survival function + Spop <- function(latent){ + thetalat <- latent[1:K] + betalat <- latent[(K + 1):(K + p)] + gammalat <- latent[(K + p + 1):dimlat] + pX <- px(betalat,X) + Zg <- as.numeric(Z%*%gammalat) + Su <- exp(-exp(Zg)*cumult(ftime,thetalat,k=0,l=0)) + Spop_value <- 1-pX+pX*Su + return(Spop_value) + } + + #--- Log-likelihood function + loglik <- function(latent){ + thetalat <- latent[1:K] + betalat <- latent[(K + 1):(K + p)] + gammalat <- latent[(K + p + 1):dimlat] + pX <- px(betalat, X) + Zg <- as.numeric(Z %*% gammalat) + Btheta <- as.numeric(Bt %*% thetalat) + cumul <- cumult(ftime, thetalat, k = 0, l = 0) + Su <- exp(-exp(Zg) * cumul) + loglikelihood <- sum(event * (log(pX) + Zg + Btheta - exp(Zg) * cumul) + + (1 - event) * log(1 - pX + pX * Su)) + return(loglikelihood) + } + + ## Gradient + Dloglik <- function(latent) { + + gradloglik <- c() + thetalat <- latent[1:K] + betalat <- latent[(K + 1):(K + p)] + gammalat <- latent[(K + p + 1):dimlat] + pX <- px(betalat,X) + Zg <- as.numeric(Z %*% gammalat) + Sdelta_ratio <- (1 - event) / Spop(latent) + + # Compute preliminary quantities + omega_oi <- cumult(ftime, thetalat, k=0, l=0) + omega_oik <- cumult(ftime, thetalat, k=1, l=0) + + # Partial derivative wrt spline coefficients + gradloglik[1:K] <- colSums(event * (Bt - exp(Zg) * omega_oik) - + Sdelta_ratio * pX * + exp(Zg - exp(Zg) * omega_oi) * omega_oik) + + # Partial derivative wrt beta coefficients + gradloglik[(K + 1):(K + p)] <- colSums((event * (1 - pX) + Sdelta_ratio * pX * + (1 - pX) * (exp(-exp(Zg) * omega_oi) - 1)) * X) + + # Partial derivative wrt gamma coefficients + gradloglik[(K + p + 1):dimlat] <- colSums((event * (1 - exp(Zg) * omega_oi) - + Sdelta_ratio * pX *exp(Zg - exp(Zg) * + omega_oi) * omega_oi) * Z) + + return(gradloglik) + + } + + ## Hessian + D2loglik <- function(latent){ + thetalat <- latent[1:K] + betalat <- latent[(K + 1):(K + p)] + gammalat <- latent[(K + p + 1):dimlat] + pX <- px(betalat,X) + dpX <-pX*(1-pX)*X + Zg <- as.numeric(Z %*% gammalat) + Sp <- Spop(latent) + + # Compute preliminary quantities + omega_oi <- cumult(ftime, thetalat, k=0, l=0) + omega_oik <- cumult(ftime, thetalat, k=1, l=0) + ff <- exp(Zg-exp(Zg)*omega_oi)*omega_oik + dSp_beta <- pX*(1-pX)*(exp(-exp(Zg)*omega_oi)-1)*X + dSp_gamma <- (-pX*exp(Zg-exp(Zg)*omega_oi)*omega_oi)*Z + ftilde <- pX*(1-pX)*(exp(-exp(Zg)*omega_oi)-1) + dftilde_beta <- (pX*(1-pX)*(1-2*pX)*(exp(-exp(Zg)*omega_oi)-1))*X + fbreve <- (exp(-exp(Zg)*omega_oi)-1) + dfbreve_gamma <- (-exp(Zg-exp(Zg)*omega_oi)*omega_oi)*Z + fddot <- exp(Zg-exp(Zg)*omega_oi) + dfddot_gamma <- fddot*(1-exp(Zg)*omega_oi)*Z + + # Block 11 + Block11 <- matrix(0, nrow=K, ncol=K) + for(l in 1:K){ + omega_oikl <- cumult(ftime, thetalat, k=1,l=l) + df <- exp(Zg-exp(Zg)*omega_oi)*omega_oikl-ff*exp(Zg)*omega_oik[,l] + dSp <- (-pX*exp(Zg-exp(Zg)*omega_oi)*omega_oik[,l]) + + Block11[l,] <- colSums((-event*exp(Zg)*omega_oikl)- + (1-event) * pX * (Sp^(-2)) * (df*Sp-ff*dSp)) + } + + # Block 12 + Block12 <- t(ff) %*% (-(1-event)*(dpX*Sp-pX*dSp_beta)*Sp^(-2)) + + # Block 13 + Block13 <- t(omega_oik)%*%(-event*exp(Zg)*Z)- + t(ff)%*%((((1-exp(Zg)*omega_oi)*Z*Sp)- + ((-pX*exp(Zg-exp(Zg)*omega_oi)*omega_oi)*Z)) * + ((1-event) * pX * Sp^(-2))) + + # Block 22 + Block22 <- t(-event*pX*(1-pX)*X)%*%X + + (t((1-event)*Sp^(-2)*(dftilde_beta*Sp-ftilde*dSp_beta))%*%X) + + # Block 23 + Block23 <- t(X)%*%((1-event)*pX*(1-pX)*Sp^(-2)* + (dfbreve_gamma*Sp-fbreve*dSp_gamma)) + + # Block 33 + Block33 <- t(-event*exp(Zg)*omega_oi*Z)%*%Z - + t(((1-event)*pX*omega_oi*Sp^(-2))* + (dfddot_gamma*Sp-fddot*dSp_gamma))%*%Z + + # Construction of Hessian matrix + Hess <- matrix(0, nrow = dimlat, ncol = dimlat) + Hess[(1:K),(1:K)] <- Block11 + Hess[(1:K),(K+1):(K+p)] <- Block12 + Hess[(K+1):(K+p),(1:K)] <- t(Block12) + Hess[(1:K),(K+p+1):dimlat] <- Block13 + Hess[(K+p+1):dimlat,(1:K)] <- t(Block13) + Hess[(K+1):(K+p),(K+1):(K+p)] <- Block22 + Hess[(K+1):(K+p),(K+p+1):(dimlat)] <- Block23 + Hess[(K+p+1):(dimlat),(K+1):(K+p)] <- t(Block23) + Hess[(K+p+1):(dimlat),(K+p+1):(dimlat)] <- Block33 + + return(Hess) + } + + ## Function to correct for positive definiteness if necessary + PDcorrect <- function(x, eigentol = 1e-07) { + eigvalues <- eigen(x, symmetric = TRUE, only.values = TRUE)$values + checkpositive <- !any(eigvalues < eigentol) + if (checkpositive == FALSE) { + correctPD <- x + diag(abs(min(eigvalues)) + 1e-04, ncol(x)) + note = "Matrix has been adjusted to PD" + isPD = 0 + } else{ + correctPD <- x + note = "Matrix is already PD" + isPD = 1 + } + outlist <- list(correctPD = correctPD, isPD = isPD, message = note) + return(outlist) + } + + ## Function to return full covariance matrix of dimension dimlat x dimlat + Cov_full <- function(Cmatrix){ + Covfull <- matrix(0, nrow = dimlat, ncol = dimlat) + Covfull[1:(K - 1), 1:(K - 1)] <- Cmatrix[1:(K - 1), 1:(K - 1)] + Covfull[1:(K - 1), (K + 1):dimlat] <- Cmatrix[1:(K - 1), (K:(dimlat-1))] + Covfull[(K + 1):dimlat, 1:(K - 1)] <- Cmatrix[K:(dimlat - 1), 1:(K - 1)] + Covfull[(K+1):dimlat,(K+1):dimlat] <- Cmatrix[K:(dimlat-1),K:(dimlat-1)] + return(Covfull) + } + + #--- Posterior of (log)penalty parameter v=log(lambda) + + #--- log p(v|D) function + logpv <- function(v, lat0){ + + LL <- Rcpp_Laplace(lat0 = lat0,v=v,K=K,PDcorrect,Dloglik,D2loglik,Qv) #Laplace approximation + latstar_cc <- LL$latstar # Mean of Laplace + Covstar_c <- LL$Covstar # Covariance matrix of Laplace + logdetCovstar_c <- Re(LL$logdetCovstarc) + Q <- Qv(v) # Precision of latent vector + + logpv_value <- 0.5 * logdetCovstar_c + loglik(latstar_cc) - + 0.5 * sum((latstar_cc * Q) %*% latstar_cc) + + 0.5 * (K + nu) * v - + (0.5 * nu + a_delta) * log(0.5 * nu * exp(v) + b_delta) + + outlist <- list(value = logpv_value, latstar = latstar_cc) + return(outlist) + + } + + + #--- Exploration with golden search + find_vmap <- function(){ + v0 <- 15 + vv <- c() + lpvv <- c() + vv[1] <- v0 + logpvinit <- logpv(v0, lat0 = rep(0, dimlat)) + lpvv[1] <- logpvinit$value + lat0 <- logpvinit$latstar + signdir <- 3 + signdirvec <- c() + m <- 2 + + while(signdir > 0){ + vv[m] <- vv[m - 1] - stepsize + logpvm <- logpv(vv[m], lat0 = lat0) + lpvv[m] <- logpvm$value + lat0 <- logpvm$latstar + signdir <- lpvv[m] - lpvv[m - 1] + m <- m + 1 + } + + vmap <- (vv[m - 1] + vv[m - 2]) * 0.5 + outlist <- list(vmap = vmap, latstar = lat0) + return(outlist) + } + + vfind <- find_vmap() + vstar <- vfind$vmap + lat0 <- vfind$latstar + LL <- Rcpp_Laplace(lat0 = lat0, v=vstar, K=K,PDcorrect,Dloglik,D2loglik,Qv) + lathat <- LL$latstar + + #--- Estimated coefficients + thetahat <- lathat[1:K] # Estimated latent vector + betahat <- lathat[(K + 1):(K + p)] # Estimated betas (incidence) + gammahat <- lathat[(K + p + 1):dimlat] # Estimated gammas (latency) + Covhat <- Cov_full(LL$Covstar) # Covariance matrix dimlat x dimlat + + logpv2 <- function(v){ + + LL <- Rcpp_Laplace(lat0 = lathat,v=v,K=K,PDcorrect,Dloglik,D2loglik,Qv) #Laplace approximation + latstar_cc <- LL$latstar # Mean of Laplace + Covstar_c <- LL$Covstar # Covariance matrix of Laplace + logdetCovstar_c <- Re(LL$logdetCovstarc) + Q <- Qv(v) # Precision of latent vector + + logpv_value <- 0.5 * logdetCovstar_c + loglik(latstar_cc) - + 0.5 * sum((latstar_cc * Q) %*% latstar_cc) + + 0.5 * (K + nu) * v - + (0.5 * nu + a_delta) * log(0.5 * nu * exp(v) + b_delta) + + return(logpv_value) + + } + + #---- Credible intervals of regression coefficients + + CI <- function(alpha){ + CImat <- matrix(0, nrow = (p + q), ncol = 2) + zq <- stats::qnorm(p = .5 * alpha, lower.tail = F) # Upper alpha/2 percentile N(0,1) + sdcoeff <- sqrt(diag(Covhat)[(K + 1):dimlat]) + CImat[, 1] <- c(betahat, gammahat) - zq * sdcoeff + CImat[, 2] <- c(betahat, gammahat) + zq * sdcoeff + colnames(CImat) <- c(paste("lower.", 1 - alpha), paste("upper.", 1 - alpha)) + return(CImat) + } + + CI90 <- CI(0.10) # 90% credible intervals for reg. coeffs. + CI95 <- CI(0.05) # 95% credible intervals for reg. coeffs. + + #---- Credible interval for incidence p(x) + + CIp <- function(x, alpha){ + gbhat <- log(log(as.numeric(1 + exp(-(x %*% betahat))))) + Sigmabhat <- Covhat[(K + 1):(K + p), (K + 1):(K + p)] + gradbhat <- (-1) * ((1 - px(betahat, x))/ + log(as.numeric(1 + exp(-(x %*% betahat))))) * x + qz_alpha <- stats::qnorm(alpha * 0.5, lower.tail = FALSE) + postsd <- sqrt(as.numeric(t(gradbhat) %*% Sigmabhat %*% gradbhat)) + CIp_alpha <- c(gbhat - qz_alpha * postsd, gbhat + qz_alpha * postsd) + CIp_original <- rev(exp(-exp(CIp_alpha))) + return(CIp_original) + } + + #---- Credible interval for cure rate 1-p(x) + + CIcure <- function(x, alpha){ + gbhat <- log(log(as.numeric(1 + exp(x %*% betahat)))) + Sigmabhat <- Covhat[(K + 1):(K + p), (K + 1):(K + p)] + gradbhat <- (px(betahat, x)/log(as.numeric(1 + exp(x %*% betahat)))) * x + qz_alpha <- stats::qnorm(alpha * 0.5, lower.tail = FALSE) + postsd <- sqrt(as.numeric(t(gradbhat) %*% Sigmabhat %*% gradbhat)) + CIcure_alpha <- c(gbhat - qz_alpha * postsd, gbhat + qz_alpha * postsd) + CIcure_original <- rev(exp(-exp(CIcure_alpha))) + return(CIcure_original) + } + + xmean <- as.numeric(apply(X,2,"mean")) # Mean covariates for X + + # Compute CI for incidence p(x) + CI_incidence90 <- CIp(xmean, 0.10) + CI_incidence95 <- CIp(xmean, 0.05) + + # Compute CI for cure rate 1-p(x) + CI_cure90 <- CIcure(xmean, 0.10) + CI_cure95 <- CIcure(xmean, 0.05) + + toc <- round(proc.time()-tic,3)[3] + + # List to output + + outlist <- list( + X = X, + Z = Z, + latenthat = lathat, + thetahat = thetahat, + betahat = betahat, + gammahat = gammahat, + vhat = vstar, + tu = tu, + ftime = ftime, + penorder = penorder, + K = K, + p = p, + q = q, + xmean = xmean, + px = px, + logpv = logpv2, + cumult = cumult, + CI90 = CI90, + CI95 = CI95, + CIcure90 = CI_cure90, + CIcure95 = CI_cure95, + CIincid90 = CI_incidence90, + CIincid95 = CI_incidence95, + Covhat = Covhat, + timer = toc + ) + + attr(outlist,"class") <- "lpsmc" + outlist +} + diff --git a/R/lpsmc_object.R b/R/lpsmc_object.R new file mode 100644 index 0000000..78d6789 --- /dev/null +++ b/R/lpsmc_object.R @@ -0,0 +1,48 @@ +#' Objects related to lpsmc routine. +#' +#' A list of objects resulting from the \link{lpsmc} routine. +#' +#' @return +#' +#' \item{X}{The matrix of covariates in the incidence part (including +#' an intercept). As the model includes an intercep, the first column +#' corresponds to a column of ones.} +#' \item{Z}{The matrix of covariates in the latency part.} +#' \item{latenthat}{The vector of estimated latent variables. This includes +#' the vector of B-spline coefficients, the regression coefficients +#' of the incidence part and latency part respectively.} +#' \item{thetahat}{The vector of estimated B-spline coefficients.} +#' \item{betahat}{The vector of estimated coefficients in the incidence part.} +#' \item{gammahat}{The vector of estimated coefficents in the latency part.} +#' \item{vhat}{The (approximate) posterior maximum for the penalty paramter +#' (in log scale).} +#' \item{tu}{The largest observed follow-up time.} +#' \item{ftime}{The vector of observed survival times.} +#' \item{penorder}{The chosen penalty order for P-splines.} +#' \item{K}{The number of B-spline coefficients.} +#' \item{p}{The number of regression in the incidence part (including +#' intercept).} +#' \item{q}{The number of regression coefficients in latency part.} +#' \item{xmean}{The mean covariate vector for the incidence part.} +#' \item{px}{The logistic link function} +#' \item{cumult}{A function required to compute the estimated baseline +#' survival} +#' \item{CI90}{The (approximate) 90% credible intervals for the regression +#' coefficients in the incidence and latency part.} +#' \item{CI95}{The (approximate) 95% credible intervals for the regression +#' coefficients in the incidence and latency part.} +#' \item{CIcure90}{The (approximate) 90% credible interval for the +#' cure rate (at the mean covariate profile).} +#' \item{CIcure95}{The (approximate) 95% credible interval for the +#' cure rate (at the mean covariate profile).} +#' \item{CIincid90}{The (approximate) 90% credible interval for the +#' incidence rate (at the mean covariate profile).} +#' \item{CIincid95}{The (approximate) 95% credible interval for the +#' incidence rate (at the mean covariate profile).} +#' +#' @seealso \link{lpsmc}. +#' +#' @author Oswaldo Gressani \email{oswado_gressani@hotmail.fr} . +#' +#' @name lpsmc.object +NULL diff --git a/R/postpendist.R b/R/postpendist.R new file mode 100644 index 0000000..f0d5854 --- /dev/null +++ b/R/postpendist.R @@ -0,0 +1,59 @@ +#' Plot the normalized approximate posterior of the penalty parameter +#' +#' +#' @description +#' Plots the normalized approximate postrior of the penalty parameter based +#' on a an object of class \code{lpsmc}. +#' +#' +#' @param x An object of class \code{lpsmc} +#' @param low The lower bound on the x-axis (in log scale). +#' @param up The upper bound on the x-axis (in log scale). +#' @param themetype The theme, either "classic", "gray", "light" or "dark". +#' +#' @examples +#' ### Posterior penalty distribution for breast cancer dataset +#' rm(list=ls()) +#' data("breastcancer") +#' formula <- Surv(tobs, delta) ~ inci(AGE + ER) + late(AGE + ER) +#' fitcancer <- lpsmc(formula = formula, data = breastcancer, K = 20, +#' stepsize = 0.1) +#' postpendist(fitcancer, 5, 12, themetype = "gray") +#' +#' @author Oswaldo Gressani \email{oswaldo_gressani@hotmail.fr} . +#' +#' @export + +postpendist <- function(x, low, up, themetype=c("classic","gray","light","dark")){ + + themetype <- match.arg(themetype) + if(themetype == "classic"){ + themeval<- eval(parse(text="ggplot2::theme_classic()")) + } else if(themetype == "gray"){ + themeval <- eval(parse(text="ggplot2::theme_gray()")) + } else if (themetype == "light"){ + themeval <- eval(parse(text="ggplot2::theme_light()")) + } else if (themetype == "dark"){ + themeval <- eval(parse(text="ggplot2::theme_dark()")) + } + + vgrid <- seq(low, up, length = 200) + dv <- vgrid[2] - vgrid[1] + logpvgrid <- unlist(lapply(lapply(vgrid, x$logpv),"[[",1)) + pvv <- exp(logpvgrid-max(logpvgrid)) + cnorm <- 1/sum(pvv * dv) + pv <- cnorm * pvv + pvimg <- data.frame(vgrid, pv) + skplot <- ggplot2::ggplot(data = pvimg, ggplot2::aes(x=vgrid, y=pv)) + skplot + ggplot2::geom_line(colour="darkblue", size=1.2) + + ggplot2::xlab("v") + + ggplot2::ylab("Normalized approximate posterior to p(v|D)") + + ggplot2:: theme(axis.title.x = ggplot2::element_text(size = 14), + axis.title.y = ggplot2::element_text(size = 14)) + + ggplot2::geom_vline(xintercept = x$vhat, linetype = "dashed", size = 1) + + themeval + +} + + + diff --git a/R/simdatmixcure.R b/R/simdatmixcure.R new file mode 100644 index 0000000..cd195d3 --- /dev/null +++ b/R/simdatmixcure.R @@ -0,0 +1,146 @@ +#' Simulation of survival data to fit mixture cure models +#' +#' @description +#' This routines simulates survival data with a cure fraction. The data is +#' simulated according to a mixture cure model with a logistic link for the +#' incidence part of the model. The incidence part includes two covariates +#' following a standard normal and a Bernoulli distribution with success +#' probability equal to 0.5. The latency part assumes a Weibull-Cox model with +#' two covariates (a standard normal variate and a Bernoulli variate with +#' success probability equal to 0.4). +#' +#' @param n Sample size. +#' @param wscale The positive scale parameter of the Weibull distribution used +#' to generate the survival times of the uncured subjects. +#' @param wshape The positive shape parameter of the Weibull distribution used +#' to generate the survival times of the uncured subjects. +#' @param setting The setting under which survival times will be generated. If +#' \code{setting = 1}, the coefficients of the incidence part are +#' \emph{beta0=0.70, beta1=-1.15 and beta2=0.95} and the coefficients of the +#' latency part are \emph{gamma1=-0.10 and gamma2=0.25}. If +#' \code{setting = 2}, the coefficients of the incidence part are +#' \emph{beta0=0.1.25, beta1=-0.75 and beta2=0.45} and the coefficients of the +#' latency part are \emph{gamma1=-0.10 and gamma2=0.20}. +#' +#' @return +#' An object of class \code{simixcure} containing different objects of the +#' simulated dataset. Details can be found by typing ?simixcure.object. +#' +#' @author Oswaldo Gressani \email{oswaldo_gressani@hotmail.fr} . +#' +#' @seealso \link{simdatmixcure.object} +#' +#' @examples +#' ### Simulate a sample of size n=300 under Scenario 1. +#' set.seed(4408) +#' simdat <- simdatmixcure(n = 300, wshape = 1.45, wscale = 0.25, setting = 1) +#' plot(simdat) # Plot the baseline survival and Kaplan-Meier curve +#' simdat$info # Print information on Cure and Censoring levels +#' +#' @export + +simdatmixcure <- function(n, wscale, wshape, setting) { + + #--- Incidence part (logistic model) + if(setting == 1){ + betas <- c(0.70,-1.15, 0.95) + gammas <- c(-0.10, 0.25) + censrate <- 0.16 + tau0 <- 8 + } else if (setting == 2){ + betas <- c(1.25, -0.75, 0.45) + gammas <- c(-0.10, 0.20) + censrate <- 0.05 + tau0 <- 8 + } + X <- cbind(rep(1, n), stats::rnorm(n), stats::rbinom(n, 1, prob = 0.5)) + Xbetas <- as.numeric(X %*% betas) + p <- (1 + exp(-Xbetas)) ^ (-1) + + #--- Generation of cure status + B <- stats::rbinom(n, size = 1, prob = p) # Cure status B = 1 --> uncured + + #--- Generation of survival times for uncured (B=1) subject from Weibull Cox PH + Z <- cbind(stats::rnorm(n), stats::rbinom(n, size = 1, prob = 0.4)) + + ## Draw survival times from Weibull distribution + weibshape <- wshape # Weibull shape parameter > 0 + weibscale <- wscale # Weibull scale parameter > 0 + S0 <- function(t) exp(-weibscale * t ^ (weibshape)) # True baseline survival + h0 <- function(t) weibscale * weibshape * t ^ (weibshape - 1) # True baseline hazard + U <- stats::runif(n) # Uniform draw + Tlat <- as.numeric((-log(U) / (weibscale * exp(Z %*% gammas))) ^ (1 / weibshape)) + Tlat[Tlat > tau0] <- tau0 # Truncation of survival times + Tlat[which(B == 0)] <- 20000 # Large (infinite) survival time for cured subjects + tobs <- Tlat + + #--- Censoring follows exponential distribution with rate lambda + tau1 <- 11 + tup <- 11 + C <- stats::rexp(n, rate = censrate) + C[C > tau1] <- tau1 + TgreatC <- which(Tlat > C) + tobs[TgreatC] <- C[TgreatC] + delta <- as.numeric(Tlat <= C) + + #--- Summary statistics on cure and censoring rates + dataKM <- as.data.frame(cbind(tobs, delta)) + fitKM <- survival::survfit(survival::Surv(tobs, delta) ~ 1, data = dataKM) + plateau <- fitKM$time[utils::tail(which((diff(fitKM$surv) < 0) == TRUE), 1) + 1] + nobs_plateau <- sum(tobs > plateau) + infomat <- matrix(0, nrow = 1, ncol = 6) + colnames(infomat) <- c("n", "Cure level", "Cens.rate", + "Cens.level","% obs in plateau", "% cured in plateau") + infomat[1, 1] <- n # Sample size + infomat[1, 2] <- round(sum(B == 0) / n * 100, 3) # Cure rate + infomat[1, 3] <- censrate + infomat[1, 4] <- round(sum(1 - delta) / n * 100, 3) # Censoring level + infomat[1, 5] <- round(nobs_plateau / n * 100, 3) # % of obs. in plateau + infomat[1, 6] <- round(sum(B[which(tobs > plateau)] == 0) / + nobs_plateau * 100, 3) + + # Extract variables + simdata <- data.frame(tobs, delta, X, Z) + colnames(simdata) <- c("tobs","event","Intercept","x1","x2","z1","z2") + + outlist <- list(n = n, # sample size + X = X, # covariate matrix of incidence part + Z = Z, # covariate matrix of latency part + betas = betas, # coeffs. of incidence model + gammas = gammas, # coeffs. of latency model + tobs = tobs, # observed failure or censoring time + delta = delta, # event indicator 1 --> event occurred + h0 = h0, # true baseline hazard + S0 = S0, # true baseline survival + tup = tup, # Upper bound of follow-up + fitKM = fitKM, # Kaplan-Meier fit + plateau = plateau, # Plateau value + info = infomat, # Summary statistics + setting = setting, # The chosen setting + wshape = wshape, # Shape parameter + wscale = wscale, # Scale parameter + simdata = simdata) # Simlated dataframe + + attr(outlist, "class") <- "simdatmixcure" + outlist +} + + + + + + + + + + + + + + + + + + + + diff --git a/R/simdatmixcure_object.R b/R/simdatmixcure_object.R new file mode 100644 index 0000000..005541f --- /dev/null +++ b/R/simdatmixcure_object.R @@ -0,0 +1,33 @@ +#' Object resulting from simulating survival data with a cure fraction. +#' +#' A list of objects resulting from the \link{simdatmixcure} routine. +#' +#' @return Values: +#' +#' \item{n}{The sample size.} +#' \item{X}{The generated matrix of covariates in the incidence part (including +#' an intercept). As the model includes an intercep, the first column +#' corresponds to a column of ones.} +#' \item{Z}{The generated matrix of covariates in the latency part.} +#' \item{betas}{The vector of regression coefficients in the incidence part.} +#' \item{gammas}{The vector of regression coefficents in the latency part.} +#' \item{tobs}{The vector of observed survival times.} +#' \item{delta}{The vector of event indicators.} +#' \item{h0}{A function corresponding to the Weibull baseline hazard used in +#' the latency part.} +#' \item{S0}{A function corresponding to the Weibull baseline survival used in +#' the latency part.} +#' \item{tup}{The largest observed follow-up time.} +#' \item{fitKM}{The Kaplan-Meier fit to the generated survival data.} +#' \item{plateau}{The x-axis value at which the plateau of the Kaplan-Meier +#' curve starts.} +#' \item{info}{A character string giving information on censoring and +#' cure rates.} +#' \item{setting}{The chosen setting, either 1 or 2.} +#' +#' @seealso \link{simdatmixcure} +#' +#' @author Oswaldo Gressani \email{oswado_gressani@hotmail.fr} . +#' +#' @name simdatmixcure.object +NULL diff --git a/R/simlpsmc.R b/R/simlpsmc.R new file mode 100644 index 0000000..cfd3846 --- /dev/null +++ b/R/simlpsmc.R @@ -0,0 +1,408 @@ +#' Assessing the statistical performance of lpsmc +#' +#' +#' @description +#' Simulates the lpsmc function +#' +#' @param n Sample size. +#' @param K Number of B-spline basis functions. +#' @param scenario Either 1 or 2. +#' @param S Total number of replications. +#' @param exactrep Exactly replicate results. +#' @param themetype The theme of the plot either "classic", "gray","light" +#' or "dark". +#' +#' @author Oswaldo Gressani \email{oswaldo_gressani@hotmail.fr} . +#' +#' @examples +#' ### Scenario 1, n=300 +#' sim1 <- simlpsmc(n = 300, scenario = 1, S = 10, exactrep = TRUE) +#' suppressWarnings(print(sim1$S0plot)) +#' sim1$ASEplot +#' +#' @export + +simlpsmc <- function(n = 300, K = 15, scenario = 1, S = 500, exactrep = FALSE, + themetype = c("classic","gray","light","dark")){ + + themetype <- match.arg(themetype) + if(themetype == "classic"){ + themeval<- eval(parse(text="ggplot2::theme_classic()")) + } else if(themetype == "gray"){ + themeval <- eval(parse(text="ggplot2::theme_gray()")) + } else if (themetype == "light"){ + themeval <- eval(parse(text="ggplot2::theme_light()")) + } else if (themetype == "dark"){ + themeval <- eval(parse(text="ggplot2::theme_dark()")) + } + + if(exactrep == TRUE) { + # Setting seeds (for reproducibility) + if (scenario == 1 && n == 300) { + set.seed(1989) # Fall of the Berlin wall + } else if (scenario == 2 && n == 300) { + set.seed(1789) # France initiates revolution + } else if (scenario == 1 && n == 600) { + set.seed(1080) # Battle on the Elster + } else if (scenario == 2 && n == 600) { + set.seed(1055) # Death of Constantine IX + } + } + + # Prepare matrix to host esimates + thetamat <- matrix(0, nrow = S, ncol = K) # Matrix to host estim. spline coef. + betamat <- matrix(0, nrow = S, ncol = 3) # Matrix to host estimated betas + gammamat <- matrix(0, nrow = S, ncol = 2) # Matrix to host estimated gammas + coverage90 <- matrix(0, nrow = S, ncol = 5) # Matrix for 90% coverage proba. + coverage95 <- matrix(0, nrow = S, ncol = 5) # Matrix for 95% coverage proba. + pvec <- c(0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 0.90) + S0cover90 <- matrix(0, nrow = S, ncol = length(pvec)) # For 90% coverage of S0 + S0cover95 <- matrix(0, nrow = S, ncol = length(pvec)) # For 95% coverage of S0 + Sucover90 <- matrix(0, nrow = S, ncol = length(pvec)) # For 90% coverage of Su + Sucover95 <- matrix(0, nrow = S, ncol = length(pvec)) # For 95% coverage of Su + ASEpvec <- c() + + if(scenario == 1){ + betas_true <- c(0.70,-1.15, 0.95) + gammas_true <- c(-0.10, 0.25) + regcoeffs_true <- c(betas_true, gammas_true) + } else if (scenario == 2){ + betas_true <- c(1.25, -0.75, 0.45) + gammas_true <- c(-0.10, 0.20) + regcoeffs_true <- c(betas_true, gammas_true) + } else{ + stop("Scenario must be either 1 or 2") + } + + #-- Progress bar + progbar <- progress::progress_bar$new( + format = crayon::white$yellow("Simulation in progress [:elapsed :spin] [:bar] :percent"), + total = S, + clear = FALSE + ) + + tic <- proc.time() #start clock + + for (s in 1:S) { + # Generate survival data from mixture cure model + simdat <- simdatmixcure(n = n, wshape = 1.45, wscale = 0.25, + setting = scenario) + + # Extract variables and construct formula + simdatframe <- simdat$simdata + formula <- Surv(tobs, event) ~ inci(x1 + x2) + late(z1 + z2) + + # Fit model + fitlpsmc <- lpsmc(formula = formula, data = simdatframe, K = K) + + # Fill host matrices + thetamat[s,] <- fitlpsmc$thetahat + betamat[s,] <- fitlpsmc$betahat + gammamat[s,] <- fitlpsmc$gammahat + + CI90 <- fitlpsmc$CI90 + CI95 <- fitlpsmc$CI95 + + for (j in 1:5) { + coverage90[s, j] <- regcoeffs_true[j] >= CI90[j, 1] && + regcoeffs_true[j] <= CI90[j, 2] + coverage95[s, j] <- regcoeffs_true[j] >= CI95[j, 1] && + regcoeffs_true[j] <= CI95[j, 2] + } + + + # Compute coverage probabilities for baseline survival at selected quantiles + + ## Inverse of true baseline survival to find quantiles of interest tq + quantileS0 <- function(p) { + val <- ((-1 / simdat$wscale) * log(p)) ^ (1 / simdat$wshape) + return(val) + } + + tq <- sapply(pvec, quantileS0) + S0tq <- sapply(tq, simdat$S0) + tqbounds <- rev(tq[c(utils::tail(which(pvec < 0.20), 1), + utils::head(which(pvec > 0.70), 1))]) + + # Function for CI for S0 at t + S0CI <- function(t, alpha, thetahat){ + cum_tt <- fitlpsmc$cumult(t = t, theta = thetahat, k = 0, l = 0) + gstar <- log(cum_tt) + grad_g <- fitlpsmc$cumult(t = t, theta = thetahat, k = 1, l = 0)[-K] * + (1 / cum_tt) + Sig_thetahat <- fitlpsmc$Covhat[1:(K-1), 1:(K-1)] + qz_alpha <- stats::qnorm(alpha * 0.5, lower.tail = FALSE) + post_sd <- sqrt(as.numeric(t(grad_g) %*% Sig_thetahat %*% grad_g)) + if(t<=tqbounds[1] || t>=tqbounds[2]){ + dfStud <- 10 + qz_alpha <- stats::qt(alpha * 0.5, df = dfStud, lower.tail = FALSE) + post_sd <- sqrt((dfStud/(dfStud-2)) * + as.numeric(t(grad_g) %*% Sig_thetahat %*% grad_g)) + } + CI_g <- c(gstar - qz_alpha * post_sd, gstar + qz_alpha * post_sd) + CIS <- rev(exp(-exp(CI_g))) + return(CIS) + } + + # Compute 90% and 95% coverage probability + + CI_S090 <- sapply(tq, S0CI, alpha = 0.10, thetahat = fitlpsmc$thetahat) + colnames(CI_S090) <- pvec + CI_S095 <- sapply(tq, S0CI, alpha = 0.05, thetahat = fitlpsmc$thetahat) + colnames(CI_S095) <- pvec + + for(j in 1:length(tq)){ + S0cover90[s,j] <- pvec[j] >= CI_S090[1,j] && pvec[j] <= CI_S090[2,j] + S0cover95[s,j] <- pvec[j] >= CI_S095[1,j] && pvec[j] <= CI_S095[2,j] + } + + + # Compute coverage probabilities for survival of uncured at selected + # quantiles and for covariate profile z=(0,0.4) + + zprofile <- c(0, 0.4) + + ## Inverse of survival for uncured to find quantiles of interest tq + quantileSu <- function(p) { + val <- ((-1 / (simdat$wscale*exp(as.numeric(zprofile%*%simdat$gammas)))) + * log(p)) ^ (1 / simdat$wshape) + return(val) + } + + tqq <- sapply(pvec, quantileSu) + Sutq <- sapply(tqq, simdat$S0)^(exp(as.numeric(zprofile%*%simdat$gammas))) + tqqbounds <- rev(tqq[c(utils::tail(which(pvec < 0.20), 1), + utils::head(which(pvec > 0.70), 1))]) + + SuCI <- function(t, alpha, thetahat){ + cum_tt <- fitlpsmc$cumult(t = t, theta = thetahat, k = 0, l = 0) + gstar <- as.numeric(zprofile %*% fitlpsmc$gammahat) + log(cum_tt) + grad_g <- c(fitlpsmc$cumult(t = t, theta = thetahat, k = 1, l = 0)[-K] * + (1 / cum_tt), zprofile) + p <- fitlpsmc$p + q<- fitlpsmc$q + dimlat <- K+p+q + SSighat <- matrix(0, nrow = (K - 1) + q, ncol = (K - 1) + q) + SSighat[1:(K-1),1:(K-1)] <- fitlpsmc$Covhat[1:(K-1), 1:(K-1)] + SSighat[1:(K-1),K:(K+1)] <- fitlpsmc$Covhat[1:(K - 1), (K + p + 1):dimlat] + SSighat[K:(K+1),1:(K-1)] <- t(fitlpsmc$Covhat[1:(K - 1), (K + p + 1):dimlat]) + SSighat[K:(K+1),K:(K+1)] <- fitlpsmc$Covhat[(K + p + 1):dimlat, (K + p + 1):dimlat] + qz_alpha <- stats::qnorm(alpha * 0.5, lower.tail = FALSE) + post_sd <- sqrt(as.numeric(t(grad_g) %*% SSighat %*% grad_g)) + if(t<=tqqbounds[1] || t>=tqqbounds[2]){ + dfStud <- 10 + qz_alpha <- stats::qt(alpha * 0.5, df = dfStud, lower.tail = FALSE) + post_sd <- sqrt((dfStud/(dfStud-2)) * + as.numeric(t(grad_g) %*% SSighat %*% grad_g)) + } + CI_g <- c(gstar - qz_alpha * post_sd, gstar + qz_alpha * post_sd) + CIS <- rev(exp(-exp(CI_g))) + return(CIS) + } + + CI_Su90 <- sapply(tqq, SuCI, alpha = 0.10, thetahat = fitlpsmc$thetahat) + colnames(CI_Su90) <- pvec + CI_Su95 <- sapply(tqq, SuCI, alpha = 0.05, thetahat = fitlpsmc$thetahat) + colnames(CI_Su95) <- pvec + + for(j in 1:length(tqq)){ + Sucover90[s,j] <- pvec[j] >= CI_Su90[1,j] && pvec[j] <= CI_Su90[2,j] + Sucover95[s,j] <- pvec[j] >= CI_Su95[1,j] && pvec[j] <= CI_Su95[2,j] + } + + progbar$tick() + } + + toc <- proc.time() - tic + toc <- round(toc[3],3) + + ## Compute output statistics Mean, Bias, ESE, RMSE, CP90%, CP95% + + # Define metrics + + # Bias + Bias <- function(bmat, btrue, S) { + btruemat <- matrix(rep(btrue, S), ncol = length(btrue), byrow = T) + diff <- bmat - btruemat + biasvec <- apply(diff, 2, "mean") + return(biasvec) + } + + # Empirical standard error (ESE) + ESE <- function(bmat, btrue, S) { + bmean <- apply(bmat, 2, "mean") + bmean_mat <- matrix(rep(bmean, S), ncol = length(btrue), byrow = T) + ESEvec <- sqrt((1 / (S - 1)) * colSums((bmat - bmean_mat) ^ 2)) + return(ESEvec) + } + + # Root mean square error (RMSE) + RMSE <- function(bmat, btrue,S){ + btruemat <- matrix(rep(btrue, S), ncol = length(btrue), byrow = T) + RMSEvec <- sqrt(apply((bmat - btruemat) ^ 2, 2, "mean")) + return(RMSEvec) + } + + + # Create output matrix + simulres <- matrix(0, nrow = 5 , ncol = 8) + colnames(simulres) <- c("Scenario","Parameters", "Mean", "Bias", "ESE", "RMSE", + "CP90","CP95") + rownames(simulres) <- c("beta0","beta1","beta2","gamma1","gamma2") + simulres[, 1] <- rep(scenario, 5) + simulres[, 2] <- regcoeffs_true + simulres[, 3] <- round(colMeans(cbind(betamat,gammamat)),3) + simulres[, 4] <- round(c(Bias(betamat,betas_true,S), + Bias(gammamat,gammas_true,S)),3) + simulres[, 5] <- round(c(ESE(betamat,betas_true,S), + ESE(gammamat,gammas_true,S)),3) + simulres[, 6] <- round(c(RMSE(betamat, betas_true, S), + RMSE(gammamat,gammas_true, S)),3) + simulres[, 7] <- round(colMeans(coverage90) * 100, 2) + simulres[, 8] <- round(colMeans(coverage95) * 100, 2) + + # Create plot for baseline survival + + S0tdom <- seq(0, fitlpsmc$tu, length = 200) + S0tar <- sapply(S0tdom, simdat$S0) + S0hat <- matrix(0, nrow = S, ncol = length(S0tdom)) + + basehaz <- function(t, thetahat){ + val <- exp(as.numeric(Rcpp_cubicBspline(t, lower = 0, + upper = fitlpsmc$tu, K = K) %*% thetahat)) + return(val) + } + + basesurv <- function(t, thetahat){ + val <- exp(-stats::integrate(basehaz, lower = 0, upper = t, + thetahat = thetahat)$value) + return(val) + } + + for(s in 1:S){ + Shats <- sapply(S0tdom, basesurv, thetahat = thetamat[s, ]) + Shats[1] <- 1 + S0hat[s,] <- Shats + } + + S0hat <- t(S0hat) + S0dat <- data.frame(cbind(S0tdom,S0tar,S0hat,apply(S0hat,1,"median"))) + S0baseplot <- ggplot2::ggplot(data = S0dat, ggplot2::aes(x=S0tdom)) + S0colnames <- names(S0dat) + + S0baseplot <- S0baseplot + + ggplot2::geom_line(ggplot2::aes(y=S0dat[,2]), colour="black", size = 1.2) + + ggplot2::xlim(0,8.5) + + + for(s in 3:(S+2)){ + S0baseplot <- + S0baseplot + eval(parse( + text = paste( + "ggplot2::geom_line(ggplot2::aes(y=", + S0colnames[s], + "),colour='#8564DF')", + sep = "" + ) + )) + } + + S0baseplot <- S0baseplot + ggplot2::geom_line(ggplot2::aes(y=S0dat[,2]), + colour="black", size = 1.2) + + ggplot2::geom_line(ggplot2::aes(y=S0dat[,ncol(S0dat)]), + colour="firebrick2", size = 1.1, linetype = "dashed") + + themeval + + ggplot2::xlab("t") + + ggplot2::ylab(expression(S[0](t))) + + ggplot2::ggtitle(paste0("Scenario ",scenario,", n=",n))+ + ggplot2:: theme(axis.title.x = ggplot2::element_text(size = 14), + axis.title.y = ggplot2::element_text(size = 14), + plot.title = ggplot2::element_text(hjust = 0.5, size = 15), + axis.text.x = ggplot2::element_text(size=12), + axis.text.y = ggplot2::element_text(size=12)) + + # Compute coverage probabilities for baseline survival + + S0CP90 <- colMeans(S0cover90) * 100 + S0CP95 <- colMeans(S0cover95) * 100 + baselinecoverage <- matrix(0, nrow = 2, ncol = length(pvec)) + colnames(baselinecoverage) <- paste0("t", pvec) + rownames(baselinecoverage) <- c("CP90%","CP95%") + baselinecoverage[1, ] <- S0CP90 + baselinecoverage[2, ] <- S0CP95 + + + # Compute coverage probabilities for survival of uncured + + SuCP90 <- colMeans(Sucover90) * 100 + SuCP95 <- colMeans(Sucover95) * 100 + + Sucoverage <- matrix(0, nrow = 2, ncol = length(pvec)) + colnames(Sucoverage) <- paste0("t", pvec) + rownames(Sucoverage) <- c("CP90%","CP95%") + Sucoverage[1, ] <- SuCP90 + Sucoverage[2, ] <- SuCP95 + + # Average squared error for proportion of uncured (incidence) + x1m <- seq(-1.5, 1.5, by = 0.001) + x2m <- c(rep(0, length(x1m)), rep(1, length(x1m))) + Xx <- cbind(rep(1, length(x2m)), rep(x1m, 2), x2m) + ptrue <- fitlpsmc$px(simdat$betas, Xx) + + for (s in 1:S) { + phat <- fitlpsmc$px(betamat[s,], Xx) + ASEpvec[s] <- mean((phat - ptrue) ^ 2) + } + + ASEdatframe <- data.frame(as.factor(rep(1, S)), ASEpvec) + ASEplot <- ggplot2::ggplot(data = ASEdatframe, + ggplot2::aes(x = ASEdatframe[,1], y = ASEpvec)) + + ggplot2::geom_boxplot(color = "black", fill = "#DE4E4E", width = 0.6) + + ggplot2::xlab("") + + ggplot2::ylab("ASE(p)") + + ggplot2::ggtitle(paste0("Scenario ",scenario,", n=",n)) + + themeval + + ggplot2::theme(axis.text.x = ggplot2::element_blank(), + axis.ticks.x = ggplot2::element_blank(), + axis.text.y = ggplot2::element_text(size = 12), + axis.title.y = ggplot2::element_text(size = 14), + plot.title = ggplot2::element_text(hjust = 0.5, size = 15)) + + + + # Print on screen + cat(paste(rep("-",50),collapse = ""),"\n") + cat("Simulation results for LPSMC \n") + cat(paste(rep("-",50),collapse = ""),"\n") + cat("Scenario: ", scenario, "\n") + cat("Sample size: ", n, "\n") + cat("No. of B-splines: ", K, "\n") + cat("No. of replications: ", S, "\n") + cat(paste(rep("-",90),collapse = ""),"\n") + print.table(as.matrix(simulres), digits = 3, justify = "left") + cat(paste(rep("-",90),collapse = ""),"\n") + cat("Estimated coverage probabilities of S0 at selected quantiles \n") + cat(paste(rep("-",65),collapse = ""),"\n") + print.table(baselinecoverage, digits = 3, justify = "left") + cat(paste(rep("-",90),collapse = ""),"\n") + cat("Estimated coverage probabilities of S(uncured) for z=(0,0.4) \n") + cat(paste(rep("-",65),collapse = ""),"\n") + print.table(Sucoverage, digits = 3, justify = "left") + cat(paste(rep("-",90),collapse = ""),"\n") + cat(paste0("Total elapsed time: ",toc, " seconds.\n")) + + + + + outlist <- list(simulres = simulres, + S0plot = S0baseplot, + ASEplot = ASEplot, + ASEpvec = ASEpvec, + S0coverage = baselinecoverage, + Sucoverage = Sucoverage, + elapsed = toc) + +} + + + diff --git a/R/survcurve.R b/R/survcurve.R new file mode 100644 index 0000000..7e592f1 --- /dev/null +++ b/R/survcurve.R @@ -0,0 +1,204 @@ +#' Survival curves for the mixture cure model +#' +#' @description +#' This routine creates objects related to the baseline survival +#' curve and the survival curve of the uncured subjects in a mixture +#' cure model fitted with \code{lpsmc}. +#' +#' @param x An object of type \code{lpsmc}. +#' @param type Either baseline or uncured. +#' @param covarprofile The matrix of covariate profiles. +#' @param cred.int The level for building the credible intervals. +#' @param themetype The theme of the plot either "classic", "gray","light" +#' or "dark". +#' +#' @author Oswaldo Gressani \email{oswaldo_gressani@hotmail.fr} . +#' +#' @examples +#' ## Plot survival curves of uncured subjects for AGE=(30,50) and ER=1 +#' rm(list=ls()) +#' data("breastcancer") +#' formula <- Surv(tobs, delta) ~ inci(AGE + ER) + late(AGE + ER) +#' fitcancer <- lpsmc(formula = formula, data = breastcancer, K = 20) +#' profilematrix <- matrix(c(30, 1, 50, 1), nrow = 2, byrow = TRUE) +#' survcurve(fitcancer, type = "uncured", covarprofile = profilematrix) +#' +#' @export + +survcurve <- function(x, type=c("baseline","uncured"), covarprofile=NULL, + cred.int = 0.95, + themetype = c("classic","gray","light","dark") ){ + + themetype <- match.arg(themetype) + if(themetype == "classic"){ + themeval<- eval(parse(text="ggplot2::theme_classic()")) + } else if(themetype == "gray"){ + themeval <- eval(parse(text="ggplot2::theme_gray()")) + } else if (themetype == "light"){ + themeval <- eval(parse(text="ggplot2::theme_light()")) + } else if (themetype == "dark"){ + themeval <- eval(parse(text="ggplot2::theme_dark()")) + } + + # Extract variables + K <- x$K + thetahat <- x$thetahat + gammahat <- x$gammahat + tu <- x$tu + + Sdom <- seq(0, tu, length = 100) # Domain on which to plot functions + + basehaz <- function(t){ + val <- exp(as.numeric(Rcpp_cubicBspline(t, lower = 0, + upper = tu, K = K) %*% thetahat)) + return(val) + } + + basesurv <- function(t){ + val <- exp(-stats::integrate(basehaz, lower = 0, upper = t)$value) + return(val) + } + + S0hat <- sapply(Sdom,basesurv) + plot_type <- match.arg(type) + + if(plot_type=="baseline"){ + + # Function for CI for S0 at t + S0CIfun <- function(t, alpha){ + cum_tt <- x$cumult(t = t, theta = thetahat, k = 0, l = 0) + gstar <- log(cum_tt) + grad_g <- x$cumult(t = t, theta = thetahat, k = 1, l = 0)[-K] * + (1 / cum_tt) + Sig_thetahat <- x$Covhat[1:(K-1), 1:(K-1)] + qz_alpha <- stats::qnorm(alpha * 0.5, lower.tail = FALSE) + post_sd <- sqrt(as.numeric(t(grad_g) %*% Sig_thetahat %*% grad_g)) + CI_g <- c(gstar - qz_alpha * post_sd, gstar + qz_alpha * post_sd) + CIS <- rev(exp(-exp(CI_g))) + return(CIS) + } + + S0CI <- sapply(Sdom, S0CIfun, alpha = 1 - cred.int) + S0CIlow <- S0CI[1,] + S0CIup <- S0CI[2,] + + S0dat <- data.frame(Sdom, S0hat, S0CIlow, S0CIup) + S0dat <- S0dat[1:utils::head(which(S0dat$S0hat < 0.001), 1), ] + S0dat[1,4] <- 1 + + # baseline survival plot + S0plot <- ggplot2::ggplot(data = S0dat, ggplot2::aes(x=Sdom, y=S0hat)) + + ggplot2::geom_line(size=1.1, colour="red") + + themeval + + ggplot2::xlab("t") + + ggplot2::ylab(expression(S[0](t))) + + ggplot2:: theme(axis.title.x = ggplot2::element_text(size = 14), + axis.title.y = ggplot2::element_text(size = 14), + axis.text.x = ggplot2::element_text(size=12), + axis.text.y = ggplot2::element_text(size=12)) + + ggplot2::geom_ribbon(ggplot2::aes(ymin=S0CIlow, ymax=S0CIup), + alpha = 0.15,fill="firebrick2") + + outlist <- list(tt = Sdom, + S0fit = S0hat, + S0CIup = S0CIup, + S0CIlow = S0CIlow) + + S0plot # display plot + + } else if(plot_type=="uncured"){ + + # survival of uncured subject given covariate profile + + SuCIfun <- function(t, alpha, covarprofile){ + cum_tt <- x$cumult(t = t, theta = thetahat, k = 0, l = 0) + gstar <- as.numeric(covarprofile %*% gammahat) + log(cum_tt) + grad_g <- c(x$cumult(t = t, theta = thetahat, k = 1, l = 0)[-K] * + (1 / cum_tt), covarprofile) + p <- x$p + q <- x$q + dimlat <- K+p+q + SSighat <- matrix(0, nrow = (K - 1) + q, ncol = (K - 1) + q) + SSighat[1:(K-1),1:(K-1)] <- x$Covhat[1:(K-1), 1:(K-1)] + SSighat[1:(K-1),K:(K+1)] <- x$Covhat[1:(K - 1), (K + p + 1):dimlat] + SSighat[K:(K+1),1:(K-1)] <- t(x$Covhat[1:(K - 1), (K + p + 1):dimlat]) + SSighat[K:(K+1),K:(K+1)] <- x$Covhat[(K + p + 1):dimlat, (K + p + 1):dimlat] + qz_alpha <- stats::qnorm(alpha * 0.5, lower.tail = FALSE) + post_sd <- sqrt(as.numeric(t(grad_g) %*% SSighat %*% grad_g)) + CI_g <- c(gstar - qz_alpha * post_sd, gstar + qz_alpha * post_sd) + CIS <- rev(exp(-exp(CI_g))) + return(CIS) + } + + covarprofile1 <- as.numeric(covarprofile[1, ]) + + Suhat <- S0hat ^ (exp(as.numeric(covarprofile1 %*% gammahat))) + SuCI <- sapply(Sdom, SuCIfun, alpha = 1 - cred.int, covarprofile=covarprofile1) + SuCIlow <- SuCI[1,] + SuCIup <- SuCI[2,] + + Sudat <- data.frame(Sdom, Suhat, SuCIlow, SuCIup) + Sudat[1, 4] <- 1 + + # survival plot of uncured + Suplot <- ggplot2::ggplot(data = Sudat, ggplot2::aes(x=Sdom, y=Suhat)) + + ggplot2::geom_line(colour = "blue", size = 1.1) + + themeval + + ggplot2::xlab("t") + + ggplot2::ylab("Survival of uncured group") + + ggplot2:: theme(axis.title.x = ggplot2::element_text(size = 14), + axis.title.y = ggplot2::element_text(size = 14), + axis.text.x = ggplot2::element_text(size=12), + axis.text.y = ggplot2::element_text(size=12)) + + ggplot2::geom_ribbon(ggplot2::aes(ymin=SuCIlow, ymax=SuCIup), + alpha = 0.15,fill="blue") + + if(nrow(covarprofile) > 1){ + covarprofile2 <- as.numeric(covarprofile[2, ]) + + Suhat2 <- S0hat ^ (exp(as.numeric(covarprofile2 %*% gammahat))) + SuCI <- sapply(Sdom, SuCIfun, alpha = 1 - cred.int, + covarprofile=covarprofile2) + SuCIlow <- SuCI[1,] + SuCIup <- SuCI[2,] + + Sudat2 <- data.frame(Sdom, Suhat2, SuCIlow, SuCIup) + Sudat2[1, 4] <- 1 + xlimup <- Sdom[utils::head(which(Sudat$Suhat < 1e-6),1)] + + + Suplot <- Suplot + ggplot2::geom_line(data = Sudat2, + ggplot2::aes(x=Sdom, y=Suhat2, color="green"), size=1.1) + + + ggplot2::geom_ribbon(ggplot2::aes(ymin=Sudat2$SuCIlow, + ymax=Sudat2$SuCIup), + alpha = 0.15,fill="green") + + + ggplot2::scale_colour_manual(name="Legend", + values=c("Profile 2"="green")) + + ggplot2::xlim(0,xlimup) + + } + + suppressWarnings(print(Suplot)) + } + + + + + + + + + + + + + + + + + + + +} diff --git a/README.md b/README.md new file mode 100644 index 0000000..9b1801f --- /dev/null +++ b/README.md @@ -0,0 +1,290 @@ +The mixcurelps package +================ + + + +![Version](https://img.shields.io/badge/Version-0.1.1-lightgrey) +![Languages](https://img.shields.io/badge/Languages-R%2C%20C%2B%2B-informational) +![Lifecycle](https://img.shields.io/badge/lifecycle-experimental-yellow) + + + +The `mixcurelps` package can be used to fit mixture cure survival models +with Laplacian-P-splines. It is based on the methodology presented in +[(Gressani, Faes, Hens, 2021)](https://arxiv.org/abs/2103.01526). The +population survival function is assumed to be a mixture of uncured and +cured subjects. As such, the model accounts for long-term survivors that +will never experience the event of interest, no matter how long the +period of follow-up. The proportion of uncured subjects (also called +*incidence*) is modeled with a logistic link function and the survival +function of the uncured (the *latency* part) is approached with a +flexible Cox proportional hazards model with a baseline hazard +approximated by penalized cubic B-splines [(Eilers and Marx, +1996)](https://doi.org/10.1214/ss/1038425655). + + + +The approximate Bayesian inference methodology is governed by Laplace +approximations to the conditional posterior of latent variables. The +penalty parameter associated with P-splines is optimized and the maximum +*a posteriori* argument is considered. B-spline matrices and the Laplace +approximation subroutines are coded in C++ and integrated in R via the +[Rcpp package](http://www.rcpp.org/), so that the code is highly +efficient and computational times shrinked to seconds. The method is +entirely sampling-free and outperforms classic MCMC approaches from a +computational perspective. + + + +The package can be used to generate survival data under two different +scenarios. The generating process is such that the Kaplan-Meier curve +exhibits a plateau. Thus, the datasets are suitable for mixture cure +model analysis. Once the model has been fitted with the `lpsmc()` +routine, the user can do a variety of things among which: + +- See pointwise estimates and (approximate) credible intervals of + regression coefficients. +- Compute the estimated cure proportion and associated credibility + envelopes. +- Plot the posterior of the (log) penalty parameter. +- Plot baseline survival curves and survival curves of uncured + subjects. +- Implement simulations to assess the statistical performance of the + method. + +This version is unstable and there is still room to further improve the +routines. + +## Getting ready + +To dowload the package from Github, first install the +[devtools](https://cran.r-project.org/web/packages/devtools/index.html) +package and use the `install_github()` routine. + +``` r +install.packages("devtools") +devtools::install_github("oswaldogressani/mixcurelps") +``` + +Then load the package. + +``` r +library("mixcurelps") +``` + +## A simulated example + +First, a dataset is simulated with a sample of size 400 using the +`simdatmixcure()` routine and a plot of the associated Kaplan-Meier +curve is shown. A plateau is clearly visible in the tail (the start of +the plateau is indicated by a vertical dashed line), so that a mixture +cure model is appropriate for this type of data. + +``` r +set.seed(2785) +simul <- simdatmixcure(n = 400, wshape = 1.45, wscale = 0.25, setting = 1) +simdat <- simul$simdata +plot(simul) +``` + + + +To fit a mixture cure model with Laplacian-P-splines, the `lpsmc()` +routine is used. The first argument is a formula taking the observed +follow-up times and the covariates in the incidence part `inci()` and +latency part `late()` into account. The scalar `K` is the number of +B-spline basis functions and the argument `stepsize` is an optional +parameter controlling the precision with which the maximum *a +posteriori* of the (log) penalty parameter is computed. + +``` r +# Fit mixture cure model +formula <- Surv(tobs, event) ~ inci(x1 + x2) + late(z1 + z2) +fit <- lpsmc(formula, data = simdat , K = 12, stepsize = 0.1) +fit +``` + + ## Fitting mixture cure model with Laplacian-P-splines + ## -------------------------------------------------- + ## Sample size: 400 + ## No. of B-splines: 12 + ## ------------------------------------------------------------------------------------------ + ## (Incidence) + ## ------------------------------------------------------------------------------------------ + ## Estimate sd CI90%.low CI90.up% CI95%.low CI95%.up + ## (Intercept) 0.743 0.230 0.365 1.120 0.293 1.193 + ## x1 -1.298 0.211 -1.645 -0.952 -1.711 -0.886 + ## x2 1.109 0.358 0.521 1.697 0.408 1.810 + ## ------------------------------------------------------------------------------------------ + ## (Latency) + ## ------------------------------------------------------------------------------------------ + ## Estimate sd CI90%.low CI90.up% CI95%.low CI95%.up + ## z1 -0.176 0.085 -0.317 -0.036 -0.343 -0.009 + ## z2 0.461 0.154 0.208 0.714 0.160 0.762 + ## ------------------------------------------------------------------------------------------ + ## 'Real' elapsed time: 0.65 seconds. + +The table above shows the pointwise estimates, posterior standard +deviation and credible intervals for the regression parameters in the +incidence and latency parts of the model. For this particular simulated +setting, the coefficients of the incidence part are +*β*0 = 0.70, *β*1 =  − 1.15 and +*β*2 = 0.95. For the latency part one has, +*γ*1 =  − 0.10 and *γ*2 = 0.25. Finally, the table +ends by showing the elapsed wall clock time required by the `lpsmc()` +routine in seconds. Let us now see how the (approximate) posterior of +the (log) penalty parameter looks like by using the `postpendist()` +routine: + +``` r +postpendist(fit, low = 8, up = 15, themetype = "gray") +``` + + + +The maximum *a posteriori* for the log penalty parameter `v` can be +accessed by typing `fit$vhat` and is equal (in this example) to 12.35 as +indicated by the dashed black line above. + +## Estimated cure proportion + +Once `lpsmc()` has been called to fit a mixture cure model, the +`curefit()` routine can be used to computed the estimated proportion of +cured subjects for a given profile of covariates in the incidence part. +The code below randomly generates four covariates profiles (the rows in +the matrix `Xprof`) and computes the estimated cure rates `1-p(x)` for +each row: + +``` r +Xprof <- matrix(c(rep(1,4), rnorm(4), rbinom(4, 1, prob = 0.5)), + nrow = 4 , byrow = FALSE) +fitcure <- curefit(fit, Xprof) +knitr::kable(fitcure$estimcure, digits = 3) +``` + +| | (Intercept) | x1 | x2 | 1-p(x) | CI90.low | CI90.up | CI95.low | CI95.up | +|:-----------|------------:|-------:|----:|-------:|---------:|--------:|---------:|--------:| +| x.profile1 | 1 | -0.177 | 1 | 0.111 | 0.065 | 0.170 | 0.058 | 0.183 | +| x.profile2 | 1 | -0.912 | 1 | 0.046 | 0.022 | 0.084 | 0.018 | 0.093 | +| x.profile3 | 1 | -0.130 | 0 | 0.287 | 0.209 | 0.368 | 0.196 | 0.384 | +| x.profile4 | 1 | 0.309 | 0 | 0.415 | 0.327 | 0.502 | 0.310 | 0.518 | + +The first three columns show the values of the generated covariate +profile. The fourth colum is the estimated cure proportion and the +remaining columns show the approximate credible envelopes. The true cure +rates for the chosen covariate profiles in `Xprof` are: + +``` r +round(1 - fit$px(simul$betas, Xprof),3) +``` + + ## [1] 0.135 0.063 0.299 0.415 + +## Baseline survival curve + +The estimated baseline survival curve can be obtained with the +`survcurve()` routine. The `cred.int` argument can be used to fix the +credible level. + +``` r +survcurve(fit, cred.int = 0.95) +``` + + + +## Simulations + +To assess the statistical performance of the methodology underlying the +algorithms, the `simlpsmc()` routine can be used to simulate datasets in +an iterative way and compute some metrics to assess how precise the +model fit is. The code below simulates `S=500` replications of samples +of size 300 and for each iteration fits a mixture cure model with +`lpsmc()`. A table with the desired metrics to check the performance +summarizes the simulations. + +``` r +S <- 500 +set.seed(14778) +sims <- simlpsmc(n = 300, scenario = 1, S = S, themetype = "gray") +``` + + ## -------------------------------------------------- + ## Simulation results for LPSMC + ## -------------------------------------------------- + ## Scenario: 1 + ## Sample size: 300 + ## No. of B-splines: 15 + ## No. of replications: 500 + ## ------------------------------------------------------------------------------------------ + ## Scenario Parameters Mean Bias ESE RMSE CP90 CP95 + ## beta0 1.000 0.700 0.723 0.023 0.250 0.251 91.600 96.200 + ## beta1 1.000 -1.150 -1.184 -0.034 0.243 0.245 90.600 95.800 + ## beta2 1.000 0.950 0.972 0.022 0.397 0.397 90.200 96.600 + ## gamma1 1.000 -0.100 -0.100 0.000 0.098 0.098 88.200 93.400 + ## gamma2 1.000 0.250 0.224 -0.026 0.189 0.191 86.600 92.200 + ## ------------------------------------------------------------------------------------------ + ## Estimated coverage probabilities of S0 at selected quantiles + ## ----------------------------------------------------------------- + ## t0.05 t0.1 t0.2 t0.3 t0.4 t0.5 t0.6 t0.7 t0.8 t0.9 + ## CP90% 83.4 90.6 86.4 86.6 85.8 90.2 91.6 91.2 86.6 48.0 + ## CP95% 91.2 95.0 94.4 93.0 93.8 96.2 96.6 95.6 94.0 63.8 + ## ------------------------------------------------------------------------------------------ + ## Estimated coverage probabilities of S(uncured) for z=(0,0.4) + ## ----------------------------------------------------------------- + ## t0.05 t0.1 t0.2 t0.3 t0.4 t0.5 t0.6 t0.7 t0.8 t0.9 + ## CP90% 89.2 94.0 89.0 87.8 87.0 91.0 93.2 89.6 83.4 30.2 + ## CP95% 94.2 98.2 93.6 94.0 94.8 97.0 97.6 93.6 92.6 49.0 + ## ------------------------------------------------------------------------------------------ + ## Total elapsed time: 221.98 seconds. + +The above table shows the simulation results associated with the +regression coefficients (table on top) and also the estimated (90% and +95%) coverage probabilities of the baseline survival curve and the +survival curve of the uncured respectively at selected quantiles. +Dividing the total elapsed time by the number of replications allows to +assess the average time required by the `lpsmc()` routine to fit the +model (here 0.444 seconds). To show the simulation results associated +with the regression coefficients only in a clean table: + +``` r +knitr::kable(sims$simulres, digits = 3) +``` + +| | Scenario | Parameters | Mean | Bias | ESE | RMSE | CP90 | CP95 | +|:-------|---------:|-----------:|-------:|-------:|------:|------:|-----:|-----:| +| beta0 | 1 | 0.70 | 0.723 | 0.023 | 0.250 | 0.251 | 91.6 | 96.2 | +| beta1 | 1 | -1.15 | -1.184 | -0.034 | 0.243 | 0.245 | 90.6 | 95.8 | +| beta2 | 1 | 0.95 | 0.972 | 0.022 | 0.397 | 0.397 | 90.2 | 96.6 | +| gamma1 | 1 | -0.10 | -0.100 | 0.000 | 0.098 | 0.098 | 88.2 | 93.4 | +| gamma2 | 1 | 0.25 | 0.224 | -0.026 | 0.189 | 0.191 | 86.6 | 92.2 | + +The beta parameters are the coefficients of the incidence part and the +gamma parameters the coefficients in the latency part. The estimated +baseline survival curves and a boxplot of the Average Squared Error +associated to the incidence across various covariate profiles can be +obtained as follows: + +``` r +gridExtra::grid.arrange(sims$S0plot, sims$ASEplot, nrow = 1) +``` + + + +## Package version + +This is version 0.1.1 (2021-09-09) – “Keeping CPU at low temperature”. + +## License + +Copyright © 2021 Oswaldo Gressani. All rights reserved. + +## Reference + +Gressani, O., Faes, C. and Hens, N. (2021). Laplacian P-splines for +Bayesian inference in the mixture cure model. ArXiv preprint. +arxiv.org/abs/2103.01526 + +## Acknowledgments + +This project is funded by the European Union’s Research and Innovation +Action under the H2020 work programme, EpiPose (grant number 101003688). diff --git a/data/breastcancer.rda b/data/breastcancer.rda new file mode 100644 index 0000000..48e472a Binary files /dev/null and b/data/breastcancer.rda differ diff --git a/data/ecog1684.rda b/data/ecog1684.rda new file mode 100644 index 0000000..b52407c Binary files /dev/null and b/data/ecog1684.rda differ diff --git a/man/breastcancer.Rd b/man/breastcancer.Rd new file mode 100644 index 0000000..aaefa71 --- /dev/null +++ b/man/breastcancer.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/breastcancer.R +\docType{data} +\name{breastcancer} +\alias{breastcancer} +\title{Breast cancer data.} +\format{ +A data frame with 286 rows and 4 columns. +\describe{ + \item{\code{tobs}}{Distant-metastasis-free survival (in days).} + \item{\code{delta}}{Event indicator \code{1}=death or relapse, \code{0}=censored.} + \item{\code{AGE}}{Age of patients.} + \item{\code{ER}}{Estrogen receptor \code{0}="<=10fmol", \code{1}=">10fmol".} +} +} +\source{ +\url{https://doi.org/doi:10.18129/B9.bioc.breastCancerVDX} +} +\usage{ +data(breastcancer) +} +\description{ +Breast cancer data from the \code{breastCancerVDX} package. +} +\references{ +Schroeder M, Haibe-Kains B, Culhane A, Sotiriou C, Bontempi G, + Quackenbush J (2021). breastCancerVDX: Gene expression datasets published + by Wang et al. [2005] and Minn et al. [2007] (VDX). R package version 1.30.0. +} +\keyword{datasets} diff --git a/man/curefit.Rd b/man/curefit.Rd new file mode 100644 index 0000000..3df7fff --- /dev/null +++ b/man/curefit.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/curefit.R +\name{curefit} +\alias{curefit} +\title{Estimated cure proportion} +\usage{ +curefit(x, covarprofile) +} +\arguments{ +\item{x}{A lpsmc object.} + +\item{covarprofile}{The covariate profile on which to compute the +cure proportion.} +} +\value{ +A table with the estimated cure proportion. +} +\description{ +Computes the estimated cure proportion based on a mixture cure model fit +with \code{lpsmc}. Both estimates and approximate 90% and 95% credible +intervals are shown. +} +\examples{ +### Application on breast cancer data +rm(list=ls()) +data("breastcancer") +formula <- Surv(tobs, delta) ~ inci(AGE + ER) + late(AGE + ER) +fitcancer <- lpsmc(formula = formula, data = breastcancer, K = 20) +covarprofile <- matrix(c(1, 30, 1, 1, 40, 0), nrow = 2 , byrow = TRUE) +fitcure <- curefit(fitcancer,covarprofile) +fitcure$estimcure + +} +\author{ +Oswaldo Gressani \email{oswaldo_gressani@hotmail.fr} . +} diff --git a/man/ecog1684.Rd b/man/ecog1684.Rd new file mode 100644 index 0000000..7a669ea --- /dev/null +++ b/man/ecog1684.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ecog1684.R +\docType{data} +\name{ecog1684} +\alias{ecog1684} +\title{Phase III Melanoma clinical trial.} +\format{ +A data frame with 284 rows and 5 columns. +\describe{ + \item{\code{tobs}}{Relapse-free survival (in years).} + \item{\code{delta}}{\code{1}=death or relapse, \code{0}=censored.} + \item{\code{TRT}}{Treatment: \code{0}=control, + \code{1}=Interferon alpha-2b (IFN).} + \item{\code{AGE}}{Age centered to the mean.} + \item{\code{SEX}}{\code{0}=Male, \code{1}=Female.} +} +} +\source{ +\url{https://CRAN.R-project.org/package=smcure} +} +\usage{ +data(ecog1684) +} +\description{ +Melanoma data from the phase III Eastern Cooperative + Oncology Group (ECOG) two-arm clinical trial studied in + Kirkwood et al. (1996) and obtained from the \code{smcure} package. +} +\references{ +Kirkwood, J. M., Strawderman, M. H., Ernstoff, M. S., + Smith, T. J., Borden, E. C. and Blum, R. H. (1996). Interferon alfa-2b + adjuvant therapy of high-risk resected cutaneous melanoma: the Eastern + Cooperative Oncology Group Trial EST 1684. + \emph{Journal of clinical oncology} \strong{14}(1): 7-17. + +Corbiere, F. and Joly, P. (2007). A SAS macro for parametric + and semiparametric mixture cure models. \emph{Computer methods and programs + in Biomedicine} \strong{85}(2): 173-180. + \url{https://doi.org/10.1016/j.cmpb.2006.10.008} +} +\keyword{datasets} diff --git a/man/figures/base-1.png b/man/figures/base-1.png new file mode 100644 index 0000000..7eb0035 Binary files /dev/null and b/man/figures/base-1.png differ diff --git a/man/figures/plot sim-1.png b/man/figures/plot sim-1.png new file mode 100644 index 0000000..5e990a9 Binary files /dev/null and b/man/figures/plot sim-1.png differ diff --git a/man/figures/postpen-1.png b/man/figures/postpen-1.png new file mode 100644 index 0000000..49c0fc3 Binary files /dev/null and b/man/figures/postpen-1.png differ diff --git a/man/figures/simulex-1.png b/man/figures/simulex-1.png new file mode 100644 index 0000000..abcedf7 Binary files /dev/null and b/man/figures/simulex-1.png differ diff --git a/man/lpsmc.Rd b/man/lpsmc.Rd new file mode 100644 index 0000000..030582c --- /dev/null +++ b/man/lpsmc.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lpsmc.R +\name{lpsmc} +\alias{lpsmc} +\title{Fit a mixture cure model with Laplacian-P-splines} +\usage{ +lpsmc(formula, data, K = 15, penorder = 3, stepsize = 0.2) +} +\arguments{ +\item{formula}{A model formula of the form \code{Surv(tobs,delta)~ +inci()+late()}.} + +\item{data}{A data frame (optional).} + +\item{K}{The number of B-spline coefficients.} + +\item{penorder}{The order of the penalty.} + +\item{stepsize}{The stepsize taken to maximize the log posterior penalty.} +} +\value{ +An object of class \code{lpsmc}. +} +\description{ +This routine fits a mixture cure model with a logistic link function for +the incidence part and a flexible Cox proportional hazards model for the +latency part where the baseline survival is approximated with penalized + B-splines. +} +\examples{ +### Real data application ECOG e1684 clinical trial +data("ecog1684") +formula <- Surv(tobs,delta) ~ inci(SEX + TRT + AGE) + late(SEX + TRT + AGE) +fite1684 <- lpsmc(formula = formula, data = ecog1684) +fite1684 + +### Application on breast cancer data +rm(list=ls()) +data("breastcancer") +formula <- Surv(tobs, delta) ~ inci(AGE + ER) + late(AGE + ER) +fitcancer <- lpsmc(formula = formula, data = breastcancer, K = 20) +fitcancer + +} +\seealso{ +\link{lpsmc.object} +} +\author{ +Oswaldo Gressani \email{oswaldo_gressani@hotmail.fr} . +} diff --git a/man/lpsmc.object.Rd b/man/lpsmc.object.Rd new file mode 100644 index 0000000..3a947a3 --- /dev/null +++ b/man/lpsmc.object.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lpsmc_object.R +\name{lpsmc.object} +\alias{lpsmc.object} +\title{Objects related to lpsmc routine.} +\value{ +\item{X}{The matrix of covariates in the incidence part (including + an intercept). As the model includes an intercep, the first column + corresponds to a column of ones.} +\item{Z}{The matrix of covariates in the latency part.} +\item{latenthat}{The vector of estimated latent variables. This includes +the vector of B-spline coefficients, the regression coefficients +of the incidence part and latency part respectively.} +\item{thetahat}{The vector of estimated B-spline coefficients.} +\item{betahat}{The vector of estimated coefficients in the incidence part.} +\item{gammahat}{The vector of estimated coefficents in the latency part.} +\item{vhat}{The (approximate) posterior maximum for the penalty paramter +(in log scale).} +\item{tu}{The largest observed follow-up time.} +\item{ftime}{The vector of observed survival times.} +\item{penorder}{The chosen penalty order for P-splines.} +\item{K}{The number of B-spline coefficients.} +\item{p}{The number of regression in the incidence part (including +intercept).} +\item{q}{The number of regression coefficients in latency part.} +\item{xmean}{The mean covariate vector for the incidence part.} +\item{px}{The logistic link function} +\item{cumult}{A function required to compute the estimated baseline +survival} +\item{CI90}{The (approximate) 90% credible intervals for the regression +coefficients in the incidence and latency part.} +\item{CI95}{The (approximate) 95% credible intervals for the regression +coefficients in the incidence and latency part.} +\item{CIcure90}{The (approximate) 90% credible interval for the +cure rate (at the mean covariate profile).} +\item{CIcure95}{The (approximate) 95% credible interval for the +cure rate (at the mean covariate profile).} +\item{CIincid90}{The (approximate) 90% credible interval for the +incidence rate (at the mean covariate profile).} +\item{CIincid95}{The (approximate) 95% credible interval for the +incidence rate (at the mean covariate profile).} +} +\description{ +A list of objects resulting from the \link{lpsmc} routine. +} +\seealso{ +\link{lpsmc}. +} +\author{ +Oswaldo Gressani \email{oswado_gressani@hotmail.fr} . +} diff --git a/man/postpendist.Rd b/man/postpendist.Rd new file mode 100644 index 0000000..c0c18d2 --- /dev/null +++ b/man/postpendist.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/postpendist.R +\name{postpendist} +\alias{postpendist} +\title{Plot the normalized approximate posterior of the penalty parameter} +\usage{ +postpendist(x, low, up, themetype = c("classic", "gray", "light", "dark")) +} +\arguments{ +\item{x}{An object of class \code{lpsmc}} + +\item{low}{The lower bound on the x-axis (in log scale).} + +\item{up}{The upper bound on the x-axis (in log scale).} + +\item{themetype}{The theme, either "classic", "gray", "light" or "dark".} +} +\description{ +Plots the normalized approximate postrior of the penalty parameter based +on a an object of class \code{lpsmc}. +} +\examples{ +### Posterior penalty distribution for breast cancer dataset +rm(list=ls()) +data("breastcancer") +formula <- Surv(tobs, delta) ~ inci(AGE + ER) + late(AGE + ER) +fitcancer <- lpsmc(formula = formula, data = breastcancer, K = 20, + stepsize = 0.1) +postpendist(fitcancer, 5, 12, themetype = "gray") + +} +\author{ +Oswaldo Gressani \email{oswaldo_gressani@hotmail.fr} . +} diff --git a/man/print.lpsmc.Rd b/man/print.lpsmc.Rd new file mode 100644 index 0000000..90fcfb4 --- /dev/null +++ b/man/print.lpsmc.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/S3_lpsmc_print.R +\name{print.lpsmc} +\alias{print.lpsmc} +\title{Print a lpsmc object.} +\usage{ +\method{print}{lpsmc}(x, ...) +} +\arguments{ +\item{x}{An object of class \code{lpsmc}.} + +\item{...}{Further arguments to be passed to print routine.} +} +\description{ +Print method for a \code{lpsmc} object. +} +\author{ +Oswaldo Gressani \email{oswaldo_gressani@hotmail.fr} . +} diff --git a/man/simdatmixcure.Rd b/man/simdatmixcure.Rd new file mode 100644 index 0000000..92e98e4 --- /dev/null +++ b/man/simdatmixcure.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/simdatmixcure.R +\name{simdatmixcure} +\alias{simdatmixcure} +\title{Simulation of survival data to fit mixture cure models} +\usage{ +simdatmixcure(n, wscale, wshape, setting) +} +\arguments{ +\item{n}{Sample size.} + +\item{wscale}{The positive scale parameter of the Weibull distribution used +to generate the survival times of the uncured subjects.} + +\item{wshape}{The positive shape parameter of the Weibull distribution used +to generate the survival times of the uncured subjects.} + +\item{setting}{The setting under which survival times will be generated. If +\code{setting = 1}, the coefficients of the incidence part are +\emph{beta0=0.70, beta1=-1.15 and beta2=0.95} and the coefficients of the +latency part are \emph{gamma1=-0.10 and gamma2=0.25}. If +\code{setting = 2}, the coefficients of the incidence part are +\emph{beta0=0.1.25, beta1=-0.75 and beta2=0.45} and the coefficients of the +latency part are \emph{gamma1=-0.10 and gamma2=0.20}.} +} +\value{ +An object of class \code{simixcure} containing different objects of the +simulated dataset. Details can be found by typing ?simixcure.object. +} +\description{ +This routines simulates survival data with a cure fraction. The data is +simulated according to a mixture cure model with a logistic link for the +incidence part of the model. The incidence part includes two covariates +following a standard normal and a Bernoulli distribution with success +probability equal to 0.5. The latency part assumes a Weibull-Cox model with +two covariates (a standard normal variate and a Bernoulli variate with +success probability equal to 0.4). +} +\examples{ +### Simulate a sample of size n=300 under Scenario 1. +set.seed(4408) +simdat <- simdatmixcure(n = 300, wshape = 1.45, wscale = 0.25, setting = 1) +plot(simdat) # Plot the baseline survival and Kaplan-Meier curve +simdat$info # Print information on Cure and Censoring levels + +} +\seealso{ +\link{simdatmixcure.object} +} +\author{ +Oswaldo Gressani \email{oswaldo_gressani@hotmail.fr} . +} diff --git a/man/simdatmixcure.object.Rd b/man/simdatmixcure.object.Rd new file mode 100644 index 0000000..c43cbcd --- /dev/null +++ b/man/simdatmixcure.object.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/simdatmixcure_object.R +\name{simdatmixcure.object} +\alias{simdatmixcure.object} +\title{Object resulting from simulating survival data with a cure fraction.} +\value{ +Values: + +\item{n}{The sample size.} +\item{X}{The generated matrix of covariates in the incidence part (including + an intercept). As the model includes an intercep, the first column + corresponds to a column of ones.} +\item{Z}{The generated matrix of covariates in the latency part.} +\item{betas}{The vector of regression coefficients in the incidence part.} +\item{gammas}{The vector of regression coefficents in the latency part.} +\item{tobs}{The vector of observed survival times.} +\item{delta}{The vector of event indicators.} +\item{h0}{A function corresponding to the Weibull baseline hazard used in + the latency part.} +\item{S0}{A function corresponding to the Weibull baseline survival used in + the latency part.} +\item{tup}{The largest observed follow-up time.} +\item{fitKM}{The Kaplan-Meier fit to the generated survival data.} +\item{plateau}{The x-axis value at which the plateau of the Kaplan-Meier +curve starts.} +\item{info}{A character string giving information on censoring and +cure rates.} +\item{setting}{The chosen setting, either 1 or 2.} +} +\description{ +A list of objects resulting from the \link{simdatmixcure} routine. +} +\seealso{ +\link{simdatmixcure} +} +\author{ +Oswaldo Gressani \email{oswado_gressani@hotmail.fr} . +} diff --git a/man/simlpsmc.Rd b/man/simlpsmc.Rd new file mode 100644 index 0000000..d84d686 --- /dev/null +++ b/man/simlpsmc.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/simlpsmc.R +\name{simlpsmc} +\alias{simlpsmc} +\title{Assessing the statistical performance of lpsmc} +\usage{ +simlpsmc( + n = 300, + K = 15, + scenario = 1, + S = 500, + exactrep = FALSE, + themetype = c("classic", "gray", "light", "dark") +) +} +\arguments{ +\item{n}{Sample size.} + +\item{K}{Number of B-spline basis functions.} + +\item{scenario}{Either 1 or 2.} + +\item{S}{Total number of replications.} + +\item{exactrep}{Exactly replicate results.} + +\item{themetype}{The theme of the plot either "classic", "gray","light" +or "dark".} +} +\description{ +Simulates the lpsmc function +} +\examples{ +### Scenario 1, n=300 +sim1 <- simlpsmc(n = 300, scenario = 1, S = 10, exactrep = TRUE) +suppressWarnings(print(sim1$S0plot)) +sim1$ASEplot + +} +\author{ +Oswaldo Gressani \email{oswaldo_gressani@hotmail.fr} . +} diff --git a/man/survcurve.Rd b/man/survcurve.Rd new file mode 100644 index 0000000..ac5b2ed --- /dev/null +++ b/man/survcurve.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/survcurve.R +\name{survcurve} +\alias{survcurve} +\title{Survival curves for the mixture cure model} +\usage{ +survcurve( + x, + type = c("baseline", "uncured"), + covarprofile = NULL, + cred.int = 0.95, + themetype = c("classic", "gray", "light", "dark") +) +} +\arguments{ +\item{x}{An object of type \code{lpsmc}.} + +\item{type}{Either baseline or uncured.} + +\item{covarprofile}{The matrix of covariate profiles.} + +\item{cred.int}{The level for building the credible intervals.} + +\item{themetype}{The theme of the plot either "classic", "gray","light" +or "dark".} +} +\description{ +This routine creates objects related to the baseline survival +curve and the survival curve of the uncured subjects in a mixture +cure model fitted with \code{lpsmc}. +} +\examples{ +## Plot survival curves of uncured subjects for AGE=(30,50) and ER=1 +rm(list=ls()) +data("breastcancer") +formula <- Surv(tobs, delta) ~ inci(AGE + ER) + late(AGE + ER) +fitcancer <- lpsmc(formula = formula, data = breastcancer, K = 20) +profilematrix <- matrix(c(30, 1, 50, 1), nrow = 2, byrow = TRUE) +survcurve(fitcancer, type = "uncured", covarprofile = profilematrix) + +} +\author{ +Oswaldo Gressani \email{oswaldo_gressani@hotmail.fr} . +} diff --git a/src/.gitignore b/src/.gitignore new file mode 100644 index 0000000..22034c4 --- /dev/null +++ b/src/.gitignore @@ -0,0 +1,3 @@ +*.o +*.so +*.dll diff --git a/src/Laplacelpsmc.cpp b/src/Laplacelpsmc.cpp new file mode 100644 index 0000000..505cf75 --- /dev/null +++ b/src/Laplacelpsmc.cpp @@ -0,0 +1,97 @@ +/* --------------------------------------------------- + Laplace approximation in C++ for lpsmc + Copyright Oswaldo Gressani. All rights reserved. +------------------------------------------------------*/ + +#include +// [[Rcpp::depends(RcppArmadillo)]] + using namespace Rcpp; +// [[Rcpp::export]] + +List Rcpp_Laplace(NumericVector lat0, double v, int K, Function PDcorrect, + Function Dloglik, Function D2loglik, Function Qv){ + + int iter = 0; + int pdadj = 0; + double tol = 1e-3; + double dist = 3; + int dimlat = lat0.length(); + double thetaconstr = 1; + NumericMatrix Q = Qv(v); + NumericVector gloglik; + NumericMatrix g2loglik; + List checkPD; + LogicalVector condcheck; + bool PDpassed; + arma::mat Precmat(dimlat,dimlat); + arma::mat Covmat(dimlat,dimlat); + arma::vec latnew(dimlat); + CharacterVector PDnote; + + while(dist > tol){ + gloglik = Dloglik(lat0); + g2loglik = D2loglik(lat0); + Precmat = as(Q)-as(g2loglik); + checkPD = PDcorrect(Precmat); + condcheck = checkPD["isPD"]; + PDpassed = is_true(all(condcheck)); + if(PDpassed == FALSE){ + Precmat = as(checkPD[0]); + pdadj = pdadj+1; + } + Covmat = arma::inv(Precmat); + latnew = Covmat * (as(gloglik)-as(g2loglik) * as(lat0)); + NumericVector latnew2 = wrap(latnew); + dist = sqrt(sum(pow((latnew2-lat0),2))); + lat0 = latnew2; + iter = iter + 1; + } + + // Indicate a note for positive definiteness adjustment + if(pdadj == 0){ + PDnote = "No PD adjustment"; + } else{ + PDnote = "PD has been adjusted"; + } + + // Compute conditional latent vector and Covariance matrix + + double Sigma11 = Covmat(K-1,K-1); + + arma::mat Sigma12 = Covmat.row(K-1); + Sigma12.shed_col(K-1); + arma::mat Sigma21 = Sigma12; + + arma::mat Sigma22 = Covmat; + Sigma22.shed_col(K-1); + Sigma22.shed_row(K-1); + + arma::vec lat0minK = as(lat0); + lat0minK.shed_row(K-1); + arma::mat latstarc = lat0minK + (1/Sigma11) * Sigma21.t() * + (thetaconstr - lat0(K-1)); + + arma::mat Covstarc = Sigma22 - (1/Sigma11) * (Sigma21.t() * Sigma12); + NumericVector latstarc2 = wrap(latstarc); + NumericVector latstarcc(dimlat); + + for(int i = 0; i < K; i++){ + latstarcc[i] = latstarc2[i]; + } + latstarcc[K-1] = thetaconstr; + for(int i = K; i < dimlat; i++){ + latstarcc[i] = latstarc2[i-1]; + } + + arma::cx_double logdetCovstarc = arma::log_det(Covstarc); + + return Rcpp::List::create(Named("latstar") = latstarcc, + Named("Covstar") = Covstarc, + Named("logdetCovstarc") = logdetCovstarc, + Named("Note") = PDnote, + Named("iterations") = iter, + Named("distance") = dist); +} + + + diff --git a/src/Makevars b/src/Makevars new file mode 100644 index 0000000..d715e49 --- /dev/null +++ b/src/Makevars @@ -0,0 +1,3 @@ +CXX_STD = CXX11 +PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) +PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) diff --git a/src/Makevars.win b/src/Makevars.win new file mode 100644 index 0000000..d715e49 --- /dev/null +++ b/src/Makevars.win @@ -0,0 +1,3 @@ +CXX_STD = CXX11 +PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) +PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp new file mode 100644 index 0000000..33075f5 --- /dev/null +++ b/src/RcppExports.cpp @@ -0,0 +1,55 @@ +// Generated by using Rcpp::compileAttributes() -> do not edit by hand +// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +#include +#include + +using namespace Rcpp; + +#ifdef RCPP_USE_GLOBAL_ROSTREAM +Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); +Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); +#endif + +// Rcpp_Laplace +List Rcpp_Laplace(NumericVector lat0, double v, int K, Function PDcorrect, Function Dloglik, Function D2loglik, Function Qv); +RcppExport SEXP _mixcurelps_Rcpp_Laplace(SEXP lat0SEXP, SEXP vSEXP, SEXP KSEXP, SEXP PDcorrectSEXP, SEXP DloglikSEXP, SEXP D2loglikSEXP, SEXP QvSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< NumericVector >::type lat0(lat0SEXP); + Rcpp::traits::input_parameter< double >::type v(vSEXP); + Rcpp::traits::input_parameter< int >::type K(KSEXP); + Rcpp::traits::input_parameter< Function >::type PDcorrect(PDcorrectSEXP); + Rcpp::traits::input_parameter< Function >::type Dloglik(DloglikSEXP); + Rcpp::traits::input_parameter< Function >::type D2loglik(D2loglikSEXP); + Rcpp::traits::input_parameter< Function >::type Qv(QvSEXP); + rcpp_result_gen = Rcpp::wrap(Rcpp_Laplace(lat0, v, K, PDcorrect, Dloglik, D2loglik, Qv)); + return rcpp_result_gen; +END_RCPP +} +// Rcpp_cubicBspline +NumericMatrix Rcpp_cubicBspline(NumericVector x, double lower, double upper, int K); +RcppExport SEXP _mixcurelps_Rcpp_cubicBspline(SEXP xSEXP, SEXP lowerSEXP, SEXP upperSEXP, SEXP KSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< NumericVector >::type x(xSEXP); + Rcpp::traits::input_parameter< double >::type lower(lowerSEXP); + Rcpp::traits::input_parameter< double >::type upper(upperSEXP); + Rcpp::traits::input_parameter< int >::type K(KSEXP); + rcpp_result_gen = Rcpp::wrap(Rcpp_cubicBspline(x, lower, upper, K)); + return rcpp_result_gen; +END_RCPP +} + +static const R_CallMethodDef CallEntries[] = { + {"_mixcurelps_Rcpp_Laplace", (DL_FUNC) &_mixcurelps_Rcpp_Laplace, 7}, + {"_mixcurelps_Rcpp_cubicBspline", (DL_FUNC) &_mixcurelps_Rcpp_cubicBspline, 4}, + {NULL, NULL, 0} +}; + +RcppExport void R_init_mixcurelps(DllInfo *dll) { + R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); + R_useDynamicSymbols(dll, FALSE); +} diff --git a/src/cubicBspline.cpp b/src/cubicBspline.cpp new file mode 100644 index 0000000..d46b470 --- /dev/null +++ b/src/cubicBspline.cpp @@ -0,0 +1,62 @@ +/* --------------------------------------------------- + Cubic B-spline matrix in C++ + Copyright Oswaldo Gressani. All rights reserved. +------------------------------------------------------*/ + +#include +using namespace Rcpp; + +//[[Rcpp::export]] + +NumericMatrix Rcpp_cubicBspline(NumericVector x, double lower, double upper, int K){ + + int nx = x.length(); // length of input vector + int ndx = K-3; + double dx = (upper - lower) / ndx; + int nknots = ndx + 2 * 3 + 1; + + NumericVector knots(nknots); + knots[0] = lower-3*dx; + for(int i=1; i 0) { + temp += pow(cub, 3); + cub = x[i]-knots[j+1]; + if(cub > 0){ + temp -= 4*pow(cub,3); + cub=x[i]-knots[j+2]; + if(cub>0){ + temp += 6*pow(cub,3); + cub = x[i]-knots[j+3]; + if(cub>0){ + temp-=4*pow(cub,3); + cub=x[i]-knots[j+4]; + if(cub>0){ + temp+=pow(cub,3); + } + } + } + } + } + B(i,j) = temp / (6*pow(dx,3)); + if(abs(B(i,j))<1e-10){ + B(i,j)=0; + } + } + } + + return(B); +} + +