-
Notifications
You must be signed in to change notification settings - Fork 0
/
FragmentTensor2PeptideFragments.R
153 lines (120 loc) · 5.17 KB
/
FragmentTensor2PeptideFragments.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#' Decode fragment array to data.table
#'
#' @param fragmentArray a fragment array generated by fragments2tensor or a
#' model.
#'
#' @return
#' @export
#'
decodeFragArray <- function(fragmentArray){
#Break each row into data.table
dtList <- apply(X = fragmentArray, MARGIN = 1, FUN = as.data.table)
#add row index
dtList <- lapply(X = dtList, FUN = function(X){X[,rowIndex := c(1:nrow(X))]})
#combine, melt, label
dtList <- rbindlist(dtList, idcol = TRUE)
dtList <- melt(data = dtList, id.vars = c(".id", "rowIndex"))
setnames(x = dtList, old = c(".id", "rowIndex", "variable", "value"), new = c("sequence", "fragmentLength", "type_charge", "intensity"))
#add info
splits <- strsplit(x = as.character(dtList$type_charge), split = "_")
dtList[, fragment_type := sapply(splits, "[[", 1)]
dtList[, fragment_charge := sapply(splits, "[[", 2)]
dtList[, fragment_type := paste0(fragment_type, fragmentLength)]
splits <- strsplit(x = dtList$sequence, split = "_")
dtList[, sequence := sapply(splits, "[[", 1)]
dtList[, precursorCharge := sapply(splits, "[[", 2)]
dtList
}
#' Compare results from model to known answer (long output)
#'
#' @description a model is used to predict the output from an input tensor
#' (test_x). It is converted using decodeFragArray(). The ground truth
#' (test_y) is also converted using decodeFragArray() and all the data are
#' formatted into a long data.table. There is only one intensity column and a
#' label column "isModel". Useful for plotting.
#'
#' @param model a trained model with method in predict()
#' @param test_x input sequence array to generate predicted fragment pattern
#' @param test_y known answer encoded as an array
#'
#' @return a long formatted data.table
#' @export
#'
mergeFragResults_long <- function(model, test_x, test_y){
model_result <- predict(object = model, test_x)
dimnames(model_result) <- dimnames(test_y)
#Convert arrays to actual ions
model_result <- decodeFragArray(model_result)
model_result$intensity <- model_result$intensity * -1
model_result[,isModel := TRUE]
answer <- decodeFragArray(test_y)
answer[,isModel := FALSE]
answer <- rbindlist(list(model_result, answer))
answer[, peptideIndex := match(x = sequence, table = unique(sequence))]
answer
}
#' Visualize comparison of model to known
#'
#' @param compareResultsDt data.table returned by mergeFragResults_long()
#'
#' @return a ggplot
#' @export
#'
compareFragResults_plot <- function(compareResultsDt){
ggplot(data = compareResultsDt, aes(x = fragment_type , y = intensity, fill = fragment_charge))+
geom_bar(stat = "identity", position = "dodge")+
facet_wrap("sequence", scales = "free")+
geom_hline(yintercept = 0)+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
}
#' Compare results from model to known answer (wide output)
#'
#' @description a model is used to predict the output from an input tensor
#' (test_x). It is converted using decodeFragArray(). The ground truth
#' (test_y) is also converted using decodeFragArray() and all the data are
#' formatted into a wide data.table. There are two intensity columns
#' ("intensity_reference", "intensity_model") and their values are merged
#' rowise by multiple keys for direct comparison.
#'
#' @param model a trained model with method in predict()
#' @param test_x input sequence array to generate predicted fragment pattern
#' @param test_y known answer encoded as an array
#'
#' @return a wide formatted data.table with two intensity columns
#' ("intensity_reference", "intensity_model")
#'
mergeFragResults_wide <- function(model, test_x, test_y){
model_result <- predict(object = model, test_x)
dimnames(model_result) <- dimnames(test_y)
#Convert arrays to actual ions
model_result <- decodeFragArray(model_result)
answer <- decodeFragArray(test_y)
answer <- merge(x = answer, y = model_result, by = c("sequence", "fragmentLength", "type_charge", "fragment_type", "fragment_charge", "precursorCharge"))
setnames(x = answer, old = c("intensity.x", "intensity.y"), new = c("intensity_reference", "intensity_model"))
answer
}
#' Correlate fragment model results to known
#'
#' @param wideDt data.table returned by mergeFragResults_wide()
#'
#' @return a summary data.table containing r^2 value, cos(theta) "cosine similarity", and theta "spectral contrast angle".
#' @export
#'
correlateFragResults <- function(wideDt){
#r2 from linear regression
cor <- wideDt[, .(cor = summary(lm(intensity_reference ~ intensity_model))$r.squared),
by = .(sequence, precursorCharge)]
#Cosine similarity/Spectral contrast angle
contrast <- wideDt[, cosineSim := cosineSimilarity(intensity_reference, intensity_model), by = .(sequence, precursorCharge)]
contrast <- unique(contrast[,.(sequence, precursorCharge, cosineSim)])
#TODO: Make sure spectral contrast angle is correct... differs from prosit.
contrast[, contrast_theta := acos(cosineSim)]
contrast[, sequence_length := nchar(sequence)]
out <- merge(x = cor, y = contrast, by = c("sequence", "precursorCharge"))
out
}
cosineSimilarity <- function(x,y){
nume <- (x %*% y)
denom <- sqrt(sum(x^2) * sum(y^2))
nume/denom
}