-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path6b-blanket_test_with_spectral_counts.R
100 lines (77 loc) · 2.68 KB
/
6b-blanket_test_with_spectral_counts.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#' ---
#' title: "Spectral Counting Statistical Analysis"
#' output:
#' BiocStyle::html_document:
#' toc: true
#' number_sections: true
#' highlight: haddock
#' ---
#+ echo=F
knitr::opts_chunk$set(echo=F, message=F, warning=F, fig.align='center')
#, out.width='10cm')
#+ libraries
library(tidyverse)
library(MSnSet.utils)
#+ loading MSnSet
load("./output_data/shotgun_topdown_sc_20240730_modann_wres.RData")
### Filter works, but please be careful. The data is non-imputed and thus sparse.
#' # Original numbers Proteoforms/Genes
nrow(m)
featureNames(m) %>% sub("([^_]*)_\\d+","\\1",.) %>% unique() %>% length()
# apply > 50 filter
# m <- m[rowSums(exprs(m) > 0) > 10,] # 70 out of 103
# alternative filter. Better suited is the data is non-batch-corrected and the
# batch is used as a covariate.
#
# subset to at least 2 batches with 2 samples
selected_features <- m %>%
exprs() %>%
as.data.frame() %>%
rownames_to_column("feature_name") %>%
pivot_longer(cols = -feature_name, names_to = "ProjID", values_to = "spectral_counts") %>%
inner_join(pData(m)) %>%
dplyr::select(feature_name, ProjID, spectral_counts, batch) %>%
filter(spectral_counts > 0) %>%
group_by(feature_name, batch) %>% # filter below retains features/batches with >= given number of counts
tally() %>%
filter(n >= 2) %>%
group_by(feature_name) %>% # filter below retains features present across >= given number of batches
tally() %>%
filter(n >= 3) %>%
pull(feature_name)
m <- m[selected_features,]
library(msmsTests)
#' # Overall test results across clinical and neuropath traits
get_sig_res_for_counts <- function(m, pheno){
m <- m[,!is.na(m[[pheno]])]
alt.f <- sprintf("y ~ %s + batch + pmi + 1", pheno)
null.f <- "y ~ 1 + batch + pmi"
div <- colSums(exprs(m)) # normalization factor
res <- msms.glm.qlll(m, alt.f, null.f, div=div, facs = pData(m))
res$p.val.adj <- p.adjust(res$p.value, "BH")
res <- res %>%
rownames_to_column("proteoform") %>%
dplyr::rename(logFC = LogFC) %>%
dplyr::rename(P.Value = p.value) %>%
dplyr::rename(adj.P.Val = p.val.adj) %>%
dplyr::select(proteoform, logFC, P.Value, adj.P.Val)
return(res)
}
vls <- c("sqrt_tangles",
"sqrt_amyloid",
"lb_neo",
"hip_scl_3reg_yn",
"cogn_global_lv",
"cogng_demog_slope",
"ci_num2_gct",
"caa_4gp",
"arteriol_scler",
"anye4")
results <- list()
for(vl in vls){
res_i <- get_sig_res_for_counts(m, vl)
res_i$variable <- vl
results <- c(results, list(res_i))
}
results <- bind_rows(results)
save(results, file = "./output_data/spectral_counting_test_results.RData")