Skip to content

Commit

Permalink
Fixes for fit_nb_offset
Browse files Browse the repository at this point in the history
  • Loading branch information
saketkc committed Oct 18, 2023
1 parent 38b1a86 commit 9cf21c4
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 16 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: sctransform
Type: Package
Title: Variance Stabilizing Transformations for Single Cell UMI Data
Version: 0.4.0.9001
Date: 2023-10-17
Version: 0.4.0.9002
Date: 2023-10-18
Authors@R: c(
person(given = "Christoph", family = "Hafemeister", email = "christoph.hafemeister@nyu.edu", role = "aut", comment = c(ORCID = "0000-0001-6365-8254")),
person(given = "Saket", family = "Choudhary", email = "schoudhary@nygenome.org", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-5202-7633")),
Expand Down
12 changes: 12 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,18 @@
# News
All notable changes will be documented in this file.

## Unreleased

### Fixed

- Fixed column name setting in `fit_nb_offset`

### Changed

- Verbose messages when invoking `v2`: messages are only invoked if verbosity > 1.



## [0.5.0] - 2023-09-18

### Added
Expand Down
6 changes: 4 additions & 2 deletions R/fit.R
Original file line number Diff line number Diff line change
Expand Up @@ -178,9 +178,9 @@ fit_glmGamPoi_offset <- function(umi, model_str, data, allow_inf_theta=FALSE) {

fit_nb_offset <- function(umi, model_str, data, allow_inf_theta=FALSE) {
# remove log_umi from model formula if it is with batch variables
new_formula <- gsub("\\+ log_umi", "", model_str)
new_formula <- gsub(pattern = "\\+ log_umi", replacement = "", x = model_str)
# replace log_umi with 1 if it is the only formula
new_formula <- gsub("log_umi", "1 + offset(log_umi)", new_formula)
new_formula <- gsub(pattern = "log_umi", replacement = "1 + offset(log_umi)", x = new_formula)

log10_umi <- data$log_umi
stopifnot(!is.null(log10_umi))
Expand All @@ -202,7 +202,9 @@ fit_nb_offset <- function(umi, model_str, data, allow_inf_theta=FALSE) {
})
model_pars <- t(par_mat)
model_pars <- cbind(model_pars, rep(log(10), nrow(umi)))

rownames(x = model_pars) <- rownames(x = umi)
colnames(x = model_pars)[match(x = 'Intercept', table = colnames(x = model_pars))] <- "(Intercept)"
colnames(x = model_pars)[ncol(x = model_pars)] <- "log_umi"
return(model_pars)
}
29 changes: 17 additions & 12 deletions R/vst.R
Original file line number Diff line number Diff line change
Expand Up @@ -141,14 +141,19 @@ vst <- function(umi,
show_progress = NULL) {
if (!is.null(vst.flavor)){
if (vst.flavor == "v2"){
if (verbosity>0){
message("vst.flavor='v2' set, setting model to use fixed slope and exclude poisson genes.")
if (verbosity > 0){
message("vst.flavor='v2' set. Using model with fixed slope and excluding poisson genes.")
}
glmGamPoi_check <- requireNamespace("glmGamPoi", quietly = TRUE)
method <- "glmGamPoi_offset"
if (!glmGamPoi_check){
message('`vst.flavor` is set to "v2" but could not find glmGamPoi installed.
Please install the glmGamPoi package. See https://github.com/const-ae/glmGamPoi for details.')
message("`vst.flavor` is set to 'v2' but could not find glmGamPoi installed.\n",
"Please install the glmGamPoi package for much faster estimation.\n",
"--------------------------------------------\n",
"install.packages('BiocManager')\n",
"BiocManager::install('glmGamPoi')\n",
"--------------------------------------------\n",
"Falling back to native (slower) implementation.\n")
method <- "nb_offset"
}
exclude_poisson <- TRUE
Expand Down Expand Up @@ -257,7 +262,7 @@ vst <- function(umi,
overdispersion_factor <- genes_var - genes_amean
overdispersion_factor_step1 <- overdispersion_factor[genes_step1]
is_overdispersed <- (overdispersion_factor_step1 > 0)
if (verbosity > 0) {
if (verbosity > 1) {
message(paste("Total Step 1 genes:",
length(genes_step1)))
message(paste("Total overdispersed genes:", sum(is_overdispersed)))
Expand Down Expand Up @@ -363,7 +368,7 @@ vst <- function(umi,
if (min_variance == "umi_median"){
# Maximum pearson residual for non-zero median UMI is 5
min_var <- (get_nz_median2(umi) / 5)^2
if (verbosity > 0) {
if (verbosity > 1) {
message(paste("Setting min_variance based on median UMI: ", min_var))
}
arguments$set_min_var <- min_var
Expand Down Expand Up @@ -674,7 +679,7 @@ get_model_pars <- function(genes_step1, bin_size, umi, model_str, cells_step1,

# if the naive and estimated MLE are 1000x apart, set theta estimate to Inf
diff_theta_index <- rownames(model_pars[model_pars[genes_step1, "diff_theta"]< 1e-3,])
if (verbosity>0){
if (verbosity > 1){
message(paste("Setting estimate of ", length(diff_theta_index), "genes to inf as theta_mm/theta_mle < 1e-3"))
}
# Replace theta by infinity
Expand Down Expand Up @@ -752,7 +757,7 @@ reg_model_pars <- function(model_pars, genes_log_gmean_step1, genes_log_gmean, c

poisson_genes_step1 <- genes_step1[overdispersion_factor_step1<=0]

if (verbosity>0){
if (verbosity > 1){
message(paste("# of step1 poisson genes (variance < mean):",
length(poisson_genes_step1)))
message(paste("# of low mean genes (mean < 0.001):", length(low_mean_genes)))
Expand All @@ -765,7 +770,7 @@ reg_model_pars <- function(model_pars, genes_log_gmean_step1, genes_log_gmean, c
overdispersed_genes_step1 <- setdiff(genes_step1, poisson_genes_step1)


if (verbosity>0){
if (verbosity > 1){
message(paste("Total # of Step1 poisson genes (theta=Inf; variance < mean):",
length(poisson_genes_step1)))
message(paste("Total # of poisson genes (theta=Inf; variance < mean):",
Expand Down Expand Up @@ -844,7 +849,7 @@ reg_model_pars <- function(model_pars, genes_log_gmean_step1, genes_log_gmean, c


if (exclude_poisson){
if (verbosity > 0) {
if (verbosity > 1) {
message('Ignoring theta inf genes')
}
overdispersed_genes <- setdiff(rownames(model_pars), all_poisson_genes)
Expand Down Expand Up @@ -933,7 +938,7 @@ reg_model_pars <- function(model_pars, genes_log_gmean_step1, genes_log_gmean, c
if (verbosity > 0) {
message(paste('Replacing fit params for', length(all_poisson_genes), 'poisson genes by theta=Inf'))
}
for (col in colnames(model_pars_fit)){
for (col in intersect(colnames(x = model_pars_fit), colnames(x = vst_out_offset)) ){
stopifnot(col %in% colnames(vst_out_offset))
model_pars_fit[all_poisson_genes, col] <- vst_out_offset[all_poisson_genes, col]
}
Expand All @@ -942,7 +947,7 @@ reg_model_pars <- function(model_pars, genes_log_gmean_step1, genes_log_gmean, c
if (fix_intercept){
# Replace the fitted intercepts by those calculated from offset model
col <- "(Intercept)"
if (verbosity > 0) {
if (verbosity > 1) {
message(paste0('Replacing regularized parameter ', col, ' by offset'))
}
gene_mean <- rowMeans(umi)
Expand Down

0 comments on commit 9cf21c4

Please sign in to comment.