Skip to content

Commit

Permalink
Merge pull request xinhe-lab#61 from xinhe-lab/multigroup_test
Browse files Browse the repository at this point in the history
add min_L option in screen regions and update messages
  • Loading branch information
kevinlkx authored Oct 31, 2024
2 parents 6f568a5 + 3183d63 commit 4e6441f
Show file tree
Hide file tree
Showing 97 changed files with 753 additions and 213 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ Package: ctwas
Type: Package
Title: Adjusting for Genetic Confounders in Transcriptome-Wide Association
Studies Improves Discovery of Risk Genes of Complex Traits
Date: 2024-10-16
Version: 0.4.17
Date: 2024-10-30
Version: 0.4.18
Authors@R: c(person("Siming","Zhao",role="aut",email="siming.zhao06@gmail.com"),
person("Wesley","Crouse",role="aut"),
person("Sheng","Qian",role="aut",email="shengqian@uchicago.edu"),
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,12 @@ export(diagnose_LD_mismatch_susie)
export(est_param)
export(estimate_region_L)
export(expand_region_data)
export(filter_z_gene_by_group_size)
export(finemap_regions)
export(finemap_regions_noLD)
export(get_gene_annot_from_ens_db)
export(get_gene_info)
export(get_gene_regions)
export(get_molecular_ids)
export(get_predictdb_genome_build)
export(get_problematic_genes)
Expand Down
45 changes: 38 additions & 7 deletions R/ctwas_compute_gene_z.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,13 @@ compute_gene_z <- function (z_snp,
return(z_gene)
}

# get gene info from weights
#' @title Get gene info from weights
#'
#' @param weights a list of preprocessed weights
#'
#' @return a data frame of gene info
#'
#' @export
get_gene_info <- function(weights){

if (!inherits(weights,"list")){
Expand All @@ -84,7 +90,15 @@ get_gene_info <- function(weights){
return(gene_info)
}

# get regions for each gene
#' @title Get regions for each gene
#'
#' @param gene_info a data frame of gene info
#'
#' @param region_info a data frame of region definitions
#'
#' @return a data frame of gene info with regions overlapping each gene
#'
#' @export
get_gene_regions <- function(gene_info, region_info){

gene_info <- gene_info[gene_info$chrom %in% unique(region_info$chrom), , drop=FALSE]
Expand All @@ -93,10 +107,18 @@ get_gene_regions <- function(gene_info, region_info){
p0 <- gene_info[i, "p0"]
p1 <- gene_info[i, "p1"]
region.idx <- which(region_info$chrom == chrom & region_info$start <= p1 & region_info$stop > p0)
gene_info[i, "region_start"] <- min(region_info[region.idx,"start"])
gene_info[i, "region_stop"] <- max(region_info[region.idx,"stop"])
gene_info[i, "region_id"] <- paste(sort(region_info[region.idx, "region_id"]), collapse = ",")
gene_info[i, "n_regions"] <- length(region.idx)
if (length(region.idx) > 0) {
gene_info[i, "region_start"] <- min(region_info[region.idx,"start"])
gene_info[i, "region_stop"] <- max(region_info[region.idx,"stop"])
gene_info[i, "region_id"] <- paste(sort(region_info[region.idx, "region_id"]), collapse = ",")
gene_info[i, "n_regions"] <- length(region.idx)
} else {
warning(paste("No regions overlapping with", gene_info[i, "id"]))
gene_info[i, "region_start"] <- NA
gene_info[i, "region_stop"] <- NA
gene_info[i, "region_id"] <- NA
gene_info[i, "n_regions"] <- length(region.idx)
}
}
return(gene_info)
}
Expand Down Expand Up @@ -132,7 +154,16 @@ combine_z <- function(z_snp, z_gene){
}


# filter z_gene by group size
#' @title Filter z_gene by group size
#'
#' @param z_gene a data frame of gene z-scores, with columns: "id", "z", "type",
#' "context", "group".
#'
#' @param min_group_size Minimum number of variables in a group.
#'
#' @return a data frame of gene z-scores.
#'
#' @export
filter_z_gene_by_group_size <- function(z_gene, min_group_size){
gene_group_size <- table(z_gene$group)
if (any(gene_group_size < min_group_size)){
Expand Down
2 changes: 2 additions & 0 deletions R/ctwas_est_parameters.R
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,8 @@ est_param <- function(
stop("Estimated group_prior_var contains NAs!")
}

rownames(p_single_effect_df) <- NULL

param <- list("group_prior" = group_prior,
"group_prior_var" = group_prior_var,
"group_prior_iters" = group_prior_iters,
Expand Down
22 changes: 2 additions & 20 deletions R/ctwas_finemapping.R
Original file line number Diff line number Diff line change
Expand Up @@ -137,9 +137,6 @@ finemap_regions <- function(region_data,
if (verbose) {
if (is.null(group_prior)) {
loginfo("Use uniform prior")
} else {
loginfo("group_prior {%s}: {%s}", names(group_prior), format(group_prior, digits = 4))
loginfo("group_prior_var {%s}: {%s}", names(group_prior_var), format(group_prior_var, digits = 4))
}
loginfo("coverage = %s", coverage)
loginfo("min_abs_corr = %s", min_abs_corr)
Expand Down Expand Up @@ -257,9 +254,6 @@ finemap_regions_noLD <- function(region_data,
if (verbose) {
if (is.null(group_prior)) {
loginfo("Use uniform prior")
} else {
loginfo("group_prior {%s}: {%s}", names(group_prior), format(group_prior, digits = 4))
loginfo("group_prior_var {%s}: {%s}", names(group_prior_var), format(group_prior_var, digits = 4))
}
}

Expand Down Expand Up @@ -388,8 +382,7 @@ finemap_single_region <- function(region_data,
cor_dir = cor_dir,
LD_format = LD_format,
LD_loader_fun = LD_loader_fun,
snpinfo_loader_fun = snpinfo_loader_fun,
verbose = verbose)
snpinfo_loader_fun = snpinfo_loader_fun)

# gene first then SNPs
R <- rbind(cbind(cor_res$R_gene, t(cor_res$R_snp_gene)),
Expand Down Expand Up @@ -417,9 +410,6 @@ finemap_single_region <- function(region_data,
min_abs_corr = min_abs_corr,
...)

if (verbose){
loginfo("annotate susie result")
}
susie_res_df <- anno_susie(susie_res,
gids = gids,
sids = sids,
Expand All @@ -432,9 +422,6 @@ finemap_single_region <- function(region_data,

if (get_susie_alpha) {
# extract alpha matrix from susie result
if (verbose){
loginfo("get susie alpha")
}
susie_alpha_df <- get_susie_alpha_res(susie_res, susie_res_df, keep_genes_only = TRUE)
} else {
susie_alpha_df <- NULL
Expand Down Expand Up @@ -566,9 +553,7 @@ finemap_single_region_noLD <- function(region_data,
coverage = coverage,
min_abs_corr = 0,
...)
if (verbose){
loginfo("annotate susie result")
}

susie_res_df <- anno_susie(susie_res,
gids = gids,
sids = sids,
Expand All @@ -581,9 +566,6 @@ finemap_single_region_noLD <- function(region_data,

if (get_susie_alpha) {
# extract alpha matrix from susie result
if (verbose){
loginfo("get susie alpha")
}
susie_alpha_df <- get_susie_alpha_res(susie_res, susie_res_df, keep_genes_only = TRUE)
} else {
susie_alpha_df <- NULL
Expand Down
4 changes: 2 additions & 2 deletions R/ctwas_merge_regions.R
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ create_merged_snp_LD_map <- function(boundary_genes,

}

loginfo("Merged %d boundary genes into %d regions", nrow(boundary_genes), nrow(merged_region_info))
loginfo("Merge %d boundary genes into %d regions", nrow(boundary_genes), nrow(merged_region_info))

return(list("merged_region_info" = merged_region_info,
"merged_LD_map" = merged_LD_map,
Expand Down Expand Up @@ -398,7 +398,7 @@ create_merged_snp_map <- function(boundary_genes,

}

loginfo("Merged %d boundary genes into %d regions", nrow(boundary_genes), nrow(merged_region_info))
loginfo("Merge %d boundary genes into %d regions", nrow(boundary_genes), nrow(merged_region_info))

return(list("merged_region_info" = merged_region_info,
"merged_snp_map" = merged_snp_map,
Expand Down
4 changes: 4 additions & 0 deletions R/ctwas_region_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,10 @@ assemble_region_data <- function(region_info,
# read SNPs in the chromosome
snp_info_chr <- snp_info[snp_info$chrom == b, ]

if (nrow(snp_info_chr) == 0) {
stop(paste("No reference SNP info in chrom", b, "!"))
}

# select genes in the chromosome
gene_info_chr <- gene_info[gene_info$chrom == b, ]

Expand Down
22 changes: 18 additions & 4 deletions R/ctwas_screen_regions.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,16 @@
#'
#' @param min_gene minimum number of genes in a region.
#'
#' @param filter_L If TRUE, screening regions with estimated L > 0.
#' @param filter_L If TRUE, screening regions with estimated credible sets
#' (L) = \code{min_L}.
#'
#' @param filter_nonSNP_PIP If TRUE, screening regions with
#' total non-SNP PIP >= \code{min_nonSNP_PIP}.
#'
#' @param min_L If screening by nubmer of credible sets (L),
#' regions with L >= \code{min_L}
#' will be selected to run finemapping using full SNPs.
#'
#' @param min_nonSNP_PIP If screening by non-SNP PIPs,
#' regions with total non-SNP PIP >= \code{min_nonSNP_PIP}
#' will be selected to run finemapping using full SNPs.
Expand Down Expand Up @@ -62,6 +67,7 @@ screen_regions <- function(region_data,
min_gene = 1,
filter_L = TRUE,
filter_nonSNP_PIP = FALSE,
min_L = 1,
min_nonSNP_PIP = 0.5,
min_abs_corr = 0.1,
LD_format = c("rds", "rdata", "mtx", "csv", "txt", "custom"),
Expand Down Expand Up @@ -151,11 +157,10 @@ screen_regions <- function(region_data,
ncore = ncore,
verbose = verbose,
...)
screened_region_ids <- names(all_estimated_L[all_estimated_L > 0])
screened_region_ids <- names(all_estimated_L[all_estimated_L >= min_L])
screened_region_data <- region_data[screened_region_ids]
loginfo("Selected %d regions with L > 0", length(screened_region_data))
loginfo("Selected %d regions with L >= %d", length(screened_region_data), min_L)
screened_region_L <- all_estimated_L[screened_region_ids]

idx <- match(names(all_estimated_L), screen_summary$region_id)
screen_summary$L[idx] <- all_estimated_L
} else {
Expand Down Expand Up @@ -199,6 +204,13 @@ screen_regions <- function(region_data,
screen_summary$nonSNP_PIP[idx] <- all_nonSNP_PIPs
}

# if min_L = 0, set regions with L = 0 to 1
if (min_L == 0) {
screened_region_L[screened_region_L == 0] <- 1
}

rownames(screen_summary) <- NULL

return(list("screened_region_data" = screened_region_data,
"screened_region_L" = screened_region_L,
"screen_summary" = screen_summary))
Expand Down Expand Up @@ -324,6 +336,8 @@ screen_regions_noLD <- function(region_data,
idx <- match(names(all_nonSNP_PIPs), screen_summary$region_id)
screen_summary$nonSNP_PIP[idx] <- all_nonSNP_PIPs

rownames(screen_summary) <- NULL

return(list("screened_region_data" = screened_region_data,
"screen_summary" = screen_summary))
}
Expand Down
2 changes: 1 addition & 1 deletion docs/404.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/LICENSE-text.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions docs/articles/FAQ.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/articles/index.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions docs/articles/minimal_tutorial.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 4e6441f

Please sign in to comment.