Skip to content

Commit

Permalink
initialize package
Browse files Browse the repository at this point in the history
  • Loading branch information
YushiFT committed Nov 20, 2024
1 parent c7fd371 commit f2f0152
Show file tree
Hide file tree
Showing 26 changed files with 915 additions and 0 deletions.
78 changes: 78 additions & 0 deletions R/assignment_index.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#' Assignment Index
#'
#' This function constructs assignment indices based on trio genotypes.
#'
#' @param is_phased Label for unphased/phased genotype. If `FALSE` (default), the
#' child's genotype is unphased. Else, the child's genotype is phased.
#' @param trio_geno A \eqn{3 \times n}{3 x n} matrix of trio genotype if `FALSE`
#' for \code{is_phased}. Or a \eqn{4 \times n}{4 x n} matrix of trio genotype if
#' `TRUE` for \code{is_phased}. Must contain only values \eqn{(0, 1, 2)}{0, 1, 2}.
#' The first row is assumed maternal, the second row is assumed paternal, and
#' the third (or together with the fourth) row is assumed child's genotypes.
#'
#' @return A named list of two vectors containing indicators for assigning to
#' treatment group 0 or treatment group 1.
#'
#' @note Undesired dimensions of the genotype matrices will result in errors.
#'
#' @examples
#' trio_example <- matrix(c(1,1,2,0, 1,2,1,0, 1,1,1,0), nrow=3, byrow=TRUE)
#' list_assign <- assignment_index(trio_example, is_phased=FALSE)
#'
#' @export
assignment_index <- function(trio_geno, is_phased=FALSE){
if(is_phased){
# validate input
if(nrow(trio_geno)!=4) {
stop('undesired dimension for phased trio genotypes')
}
# assignment index
z_m <- trio_geno[1,]
z_p <- trio_geno[2,]
a_m <- trio_geno[3,]
a_p <- trio_geno[4,]
w1m <- as.numeric((z_m==1) & (a_m==1))
w0m <- as.numeric((z_m==1) & (a_m==0))
w1p <- as.numeric((z_p==1) & (a_p==1))
w0p <- as.numeric((z_p==1) & (a_p==0))
w1 <- w1m + w1p
w0 <- w0m + w0p
# output
list_assign <- list(w1, w0, w1m, w0m, w1p, w0p)
names(list_assign) <- c('W1','W0','W1m','W0m','W1p','W0p')
return(list_assign)
} else {
# validate input
if(nrow(trio_geno)!=3) {
stop('undesired dimension for unphased trio genotypes')
}
# assignment index
z_m <- trio_geno[1,]
z_p <- trio_geno[2,]
g <- trio_geno[3,]
# assign to control
w0 <- as.numeric((z_m==2) & (z_p==1) & (g==1)) +
as.numeric((z_m==1) & (z_p==2) & (g==1)) +
as.numeric((z_m==1) & (z_p==1) & (g==1)) +
2 * as.numeric((z_m==1) & (z_p==1) & (g==0)) +
as.numeric((z_m==1) & (z_p==0) & (g==0)) +
as.numeric((z_m==0) & (z_p==1) & (g==0))
# assign to treatment
w1 <- as.numeric((z_m==2) & (z_p==1) & (g==2)) +
as.numeric((z_m==1) & (z_p==2) & (g==2)) +
2 * as.numeric((z_m==1) & (z_p==1) & (g==2)) +
as.numeric((z_m==1) & (z_p==1) & (g==1)) +
as.numeric((z_m==1) & (z_p==0) & (g==1)) +
as.numeric((z_m==0) & (z_p==1) & (g==1))
# output
list_assign <- list(w1, w0)
names(list_assign) <- c('W1','W0')
return(list_assign)
}
}






43 changes: 43 additions & 0 deletions R/calc_dtdt.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#' Transmission-Disequilibrium Test Statistic
#'
#' This function calculates the TDT statistic given trio genotypes and children's
#' dichotomous phenotype.
#'
#'
#' @param Y The n-length vector of the children's dichotomous trait
#' @param is_phased Label for unphased/phased genotype. If `FALSE` (default), the
#' child's genotype is unphased. Else, the child's genotype is phased.
#' @param trio_geno A \eqn{3 \times n}{3 x n} matrix of trio genotype if `FALSE`
#' for is_phased. Or a \eqn{4 \times n}{4 x n} matrix of trio genotype if
#' `TRUE` for is_phased. Must contain only values \eqn{(0, 1, 2)}{0, 1, 2}.
#' The first row is assumed maternal, the second row is assumed paternal, and
#' the third (or together with the fourth) row is assumed child's genotype.
#'
#' @return A numeric value of the TDT statistic.
#'
#' @note Inputting phenotypes that are not dichotomous and undesired dimensions
#' of the genotype matrices will result in errors.
#'
#' @examples
#' trio_example <- matrix(c(1,1,2,0, 1,2,1,0, 1,1,1,0), nrow=3, byrow=TRUE)
#' y_binary <- c(0,1,1,0)
#' dtdt <- calc_dtdt(y_binary, trio_example)
#'
#' @export
calc_dtdt <- function(Y, trio_geno, is_phased=FALSE){
if(!all(Y %in% 0:1)){stop("Y must be a binary vector")}
list_assign <- assignment_index(trio_geno, is_phased)
W0 <- list_assign$W0
W1 <- list_assign$W1
# sizes of treatment and control
N <- sum(W1) + sum(W0)
# assign trait value to treatment and control group
vec_n1 <- rep(Y, list_assign$W1)
vec_n0 <- rep(Y, list_assign$W0)
n1 <- sum(vec_n1==1)
n0 <- sum(vec_n0==1)
ttdt <- ifelse(N>0, 2*(n1-n0)/N, NaN)
return(ttdt)
}


44 changes: 44 additions & 0 deletions R/calc_dtmt.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Transmission-Mean Test Statistic
#'
#' This function computes the unbiased TMT estimand given trio genotypes and
#' children's phenotype. The phenotype can be either quantitative or dichotomous.
#'
#' @param Y The n-length vector of children' phenotype.
#' @param is_phased Label for unphased/phased genotype. If `FALSE` (default), the
#' child's genotype is unphased. Else, the child's genotype is phased.
#' @param trio_geno A \eqn{3 \times n}{3 x n} matrix of trio genotype if `FALSE`
#' for is_phased. Or a \eqn{4 \times n}{4 x n} matrix of trio genotype if
#' `TRUE` for is_phased. Must contain only values \eqn{(0, 1, 2)}{0, 1, 2}.
#' The first row is assumed maternal, the second row is assumed paternal, and
#' the third (or together with the fourth) row is assumed child's genotypes.
#'
#' @return A numeric value of the TMT statistic.
#'
#' @note Undesired dimensions of the genotype matrices will result in errors.
#'
#' @examples
#' trio_example <- matrix(c(1,1,2,0, 1,2,1,0, 1,1,1,0), nrow=3, byrow=TRUE)
#' y_quant <- c(11.8,1.7,3.6,12.2)
#' dtmt <- calc_dtmt(y_quant, trio_example)
#'
#' @export
calc_dtmt <- function(Y, trio_geno, is_phased=FALSE){
# construct assignment vectors
list_assign <- assignment_index(trio_geno, is_phased)
W0 <- list_assign$W0
W1 <- list_assign$W1
# estimate mu to center trait value
mu_hat <- calc_mu_hat(Y, W0, W1)
# sizes of treatment and control
N <- sum(W1) + sum(W0)
# calculate the tmt estimand
dtmt <- ifelse(N>0, 2/N*(sum((Y-mu_hat)*W1) - sum((Y-mu_hat)*W0)), NaN)
return(dtmt)
}







37 changes: 37 additions & 0 deletions R/calc_dtmt_nc.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Transmission-Mean Test Preliminary Statistic
#'
#' This function computes the preliminary TMT statistic given trio genotypes and
#' children's phenotype. The phenotype can be either quantitative or dichotomous.
#' The preliminary TMT statistic is unbiased and non-centered by the population
#' mean.
#'
#' @param Y The n-length vector of children' phenotype.
#' @param is_phased Label for unphased/phased genotype. If `FALSE` (default), the
#' child's genotype is unphased. Else, the child's genotype is phased.
#' @param trio_geno A \eqn{3 \times n}{3 x n} matrix of trio genotype if `FALSE`
#' for is_phased. Or a \eqn{4 \times n}{4 x n} matrix of trio genotype if
#' `TRUE` for is_phased. Must contain only values \eqn{(0, 1, 2)}{0, 1, 2}.
#' The first row is assumed maternal, the second row is assumed paternal, and
#' the third (or together with the fourth) row is assumed child's genotypes.
#'
#' @return A numeric value of the preliminary TMT statistic.
#'
#' @note Undesired dimensions of the genotype matrices will result in errors.
#'
#' @examples
#' trio_example <- matrix(c(1,1,2,0, 1,2,1,0, 1,1,1,0), nrow=3, byrow=TRUE)
#' y_quant <- c(11.8,1.7,3.6,12.2)
#' dtmt_nc <- calc_dtmt_nc(y_quant, trio_example)
#'
#' @export
calc_dtmt_nc <- function(Y, trio_geno, is_phased=FALSE){
# construct assignment vectors
list_assign <- assignment_index(trio_geno, is_phased)
W0 <- list_assign$W0
W1 <- list_assign$W1
# sizes of treatment and control
N <- sum(W1) + sum(W0)
# calculate the tmt estimand
dtmt <- ifelse(N>0, 2/N*(sum(Y*W1) - sum(Y*W0)), NaN)
return(dtmt)
}
44 changes: 44 additions & 0 deletions R/calc_dtmt_pc.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Transmission-Mean Test Centered Statistic
#'
#' This function computes the centered TMT statistic given trio genotypes and
#' children's phenotype. The phenotype can be either quantitative or dichotomous.
#'
#' @param Y The n-length vector of children' phenotype.
#' @param is_phased Label for unphased/phased genotype. If `FALSE` (default), the
#' child's genotype is unphased. Else, the child's genotype is phased.
#' @param mu A numeric value of the population mean.
#' @param trio_geno A \eqn{3 \times n}{3 x n} matrix of trio genotype if `FALSE`
#' for is_phased. Or a \eqn{4 \times n}{4 x n} matrix of trio genotype if
#' `TRUE` for is_phased. Must contain only values \eqn{(0, 1, 2)}{0, 1, 2}.
#' The first row is assumed maternal, the second row is assumed paternal, and
#' the third (or together with the fourth) row is assumed child's genotypes.
#'
#' @return A numeric value of the centered TMT statistic.
#'
#' @note Undesired dimensions of the genotype matrices will result in errors.
#'
#' @examples
#' trio_example <- matrix(c(1,1,2,0, 1,2,1,0, 1,1,1,0), nrow=3, byrow=TRUE)
#' list_assign <- assignment_index(trio_example, is_phased=FALSE)
#' y_quant <- c(11.8,1.7,3.6,12.2)
#' dtmt_pc <- calc_dtmt_pc(y_quant, trio_example, mu=5)
#'
#' @export
calc_dtmt_pc <- function(Y, trio_geno, mu, is_phased=FALSE){
# construct assignment vectors
list_assign <- assignment_index(trio_geno, is_phased)
W0 <- list_assign$W0
W1 <- list_assign$W1
# sizes of treatment and control
N <- sum(W1) + sum(W0)
# calculate the tmt estimand
dtmt <- ifelse(N>0, 2/N*(sum((Y-mu)*W1) - sum((Y-mu)*W0)), NaN)
return(dtmt)
}







29 changes: 29 additions & 0 deletions R/calc_mu_hat.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#' Population Mean Estimate
#'
#' This function computes the unbiased estimate for the population mean.
#'
#' @param Y The n-length vector of children' phenotype.
#' @param W0 A vector of indicators for assigning to group 0.
#' @param W1 A vector of indicators for assigning to group 1.
#'
#' @return A numeric value of the population mean estimate.
#'
#'
#' @examples
#' trio_example <- matrix(c(1,1,2,0, 1,2,1,0, 1,1,1,0), nrow=3, byrow=TRUE)
#' y_quant <- c(11.8,1.7,3.6,12.2)
#' list_assign <- assignment_index(trio_example)
#' mu_hat <- calc_mu_hat(y_quant, list_assign$W0, list_assign$W1)
#'
#' @export
calc_mu_hat <- function(Y, W0, W1){
mu_hat <- (sum(Y*W0) + sum(Y*W1)) / (sum(W0) + sum(W1))
return(mu_hat)
}







64 changes: 64 additions & 0 deletions R/calc_varhat_dtmt.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Transmission-Mean Test Statistic Sampling Variance Estimate
#'
#' This function computes the sampling variance estimate for the TMT statistic
#' given trio genotypes and children's phenotype. The phenotype can be either
#' quantitative or dichotomous.
#'
#' @param Y The n-length vector of children' phenotype.
#' @param is_phased Label for unphased/phased genotype. If `FALSE` (default), the
#' child's genotype is unphased. Else, the child's genotype is phased.
#' @param trio_geno A \eqn{3 \times n}{3 x n} matrix of trio genotype if `FALSE`
#' for is_phased. Or a \eqn{4 \times n}{4 x n} matrix of trio genotype if
#' `TRUE` for is_phased. Must contain only values \eqn{(0, 1, 2)}{0, 1, 2}.
#' The first row is assumed maternal, the second row is assumed paternal, and
#' the third (or together with the fourth) row is assumed child's genotypes.
#'
#' @return A numeric value of the sampling variance estimate.
#'
#' @note Undesired dimensions of the genotype matrices will result in errors.
#'
#' @examples
#' trio_example <- matrix(c(1,1,2,0, 1,2,1,0, 1,1,1,0), nrow=3, byrow=TRUE)
#' y_quant <- c(11.8,1.7,3.6,12.2)
#' varhat <- calc_varhat_dtmt(y_quant, trio_example)
#'
#' @export
calc_varhat_dtmt <- function(Y, trio_geno, is_phased=FALSE){
if(is_phased){
geno_c <- trio_geno[3,]+trio_geno[4,]
} else {
geno_c <- trio_geno[3,]
}
# method 1 for estimating gamma #####################################
id_het_m <- (trio_geno[1,]==1)
id_het_p <- (trio_geno[2,]==1)
id_N <- (id_het_m | id_het_p)
# calculate var dtmt ########################################################
# construct assignment vectors
list_assign <- assignment_index(trio_geno, is_phased)
W0 <- list_assign$W0
W1 <- list_assign$W1
# estimate mu to center trait value
mu_hat <- calc_mu_hat(Y, W0, W1)
# sizes of treatment and control
N <- sum(W1) + sum(W0)
# calculate the tmt estimand
dtmt <- ifelse(N>0, 2/N*(sum((Y-mu_hat)*W1) - sum((Y-mu_hat)*W0)), NaN)
# imbens-rubin approach ######################################################
T0 <- (W0==1) & (W1==0)
T1 <- (W0==0) & (W1==1)
T00 <- (W0==2) & (W1==0)
T11 <- (W0==0) & (W1==2)
y <- as.numeric(Y)
# variance estimates under imbens-rubin approach #############################
mu_hat_0 <- sum(y[T0]) / sum(T0)
mu_hat_1 <- sum(y[T1]) / sum(T1)
mu_hat_00 <- 2*sum(y[T00]) / sum(T00)
mu_hat_11 <- 2*sum(y[T11]) / sum(T11)
sigma2_hat_0 <- sum((y[T0]-mu_hat_0)**2) / (sum(T0)-1)
sigma2_hat_1 <- sum((y[T1]-mu_hat_1)**2) / (sum(T1)-1)
sigma2_hat_00 <- sum((2*y[T00]-mu_hat_00)**2) / (sum(T00)-1)
sigma2_hat_11 <- sum((2*y[T11]-mu_hat_11)**2) / (sum(T11)-1)
varhat <- 4/(N**2)*(sum(T0)*sigma2_hat_0 + sum(T1)*sigma2_hat_1 + sum(T00)*sigma2_hat_00 + sum(T11)*sigma2_hat_11)
return(varhat)
}
19 changes: 19 additions & 0 deletions R/data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#' Example Trio Data
#'
#' @name triolist
#'
#' @format A list of matrices:
#' \describe{
#' \item{geno_m}{A matrix of the maternal genotypes.}
#' \item{geno_p}{A matrix of the paternal genotypes.}
#' \item{geno_c}{A matrix of the child's genotypes}
#' \item{allele_m}{A matrix of the maternal transmitted alleles.}
#' \item{allele_p}{A matrix of the paternal transmitted alleles.}
#' \item{y}{A single row matrix of the child's phenotypes.}
#' }
#' @examples
#' \dontrun{
#' names(triolist)
#' }

NULL
1 change: 1 addition & 0 deletions R/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

Loading

0 comments on commit f2f0152

Please sign in to comment.