-
Notifications
You must be signed in to change notification settings - Fork 0
/
FigS8_SRM_TMT_corr.R
82 lines (71 loc) · 2.67 KB
/
FigS8_SRM_TMT_corr.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
library(MSnSet.utils)
library(tidyverse)
library(ggplot2)
library(ggExtra)
library(ggsci)
library(tidyr)
library(dplyr)
(load("./output_data/shotgun_topdown_int_20240730_modann_cnt_wres.RData"))
td103 <- m
# TMT, generated by Emory. Cited as PMID: 32284590
(load("./source_data/processed_protein_level_msnset.RData")) # m1
emory <- m1
# SRM. Cited as PMID: 29908079
(load("./source_data/final_r1r2r3_peptide_srm_data.RData")) # m
srm <- m
# if(!file.exists("./output_data/TMT_SRM_cors.RData")){
#
# m1 <- emory
# m2 <- srm
#
# ### SRM data ###################################################################
# srm_representative_proteoform_ids <- fData(m2) %>%
# filter(!grepl("80", Peptide.Modified.Sequence)) %>% # remove phosphopeptides
# group_by(Gene) %>%
# slice_max(s2n) %>%
# pull(specie)
# m2 <- m2[srm_representative_proteoform_ids,]
# featureNames(m2) <- fData(m2)$Gene
# ################################################################################
#
# # intersecting samples
# common_samples <- intersect(sampleNames(m1), sampleNames(m2)) # 55
# common_features <- intersect(featureNames(m1), featureNames(m2)) # 55
# m1 <- m1[common_features, common_samples]
# m2 <- m2[common_features, common_samples]
#
# cors <- c()
# for(i in 1:nrow(m1)){
# cor_i <- cor(as.numeric(exprs(m1)[i,]),
# as.numeric(exprs(m2)[i,]),
# use = "pairwise.complete.obs")
# cors <- c(cors, cor_i)
# }
#
# dafr <- data.frame(Gene = featureNames(m1), cor = cors)
#
# # appending variances to dafr
# dafr$var_tmt <- apply(exprs(m1), 1, var, na.rm = T)
# dafr$var_srm <- apply(exprs(m2), 1, var, na.rm = T)
#
# save(dafr, file="./output_data/TMT_SRM_cors.RData")
# }
# (load("./output_data/TMT_SRM_cors.RData")) # dafr
dafr2 <- dafr %>%
mutate(var = (var_tmt + var_srm)/2) %>%
mutate(var_quartile = cut(var,
quantile(var),
labels = c("(0,25]%","(25,50]%","(50,75]%","(75,100]%"),
include.lowest = T))
ggplot(dafr2) +
aes(x = var_quartile, y = cor) +
geom_hline(yintercept = 0, color = "black", size = 0.75, linetype = "dashed") +
geom_violin(scale = "area", trim = F, adjust = 0.9, fill = "#2196F3") +
geom_boxplot(width=0.05, outlier.shape = NA, fill = "#ECEFF1") +
scale_y_continuous(breaks = seq(-1,+1,0.2), limits = c(-0.2, 1)) +
theme_bw(base_size = 18) +
removeGrid(y=FALSE) +
xlab("average variance quartile") +
ylab("SRM/TMT correlation")
ggsave(filename = "./figures_tables/FigS8_SRMvTMT_with_VAR.png",
width = 7, height = 6, scale = 1)