FragmentTensor2PeptideFragments.R

#' Decode fragment array to data.table
#'
#' @param fragmentArray a fragment array generated by fragments2tensor or a
#'   model.
#'
#' @return
#' @export
#' 

decodeFragArray <- function(fragmentArray){
  #Break each row into data.table
  dtList <- apply(X = fragmentArray, MARGIN = 1, FUN = as.data.table)
  #add row index
  dtList <- lapply(X = dtList, FUN = function(X){X[,rowIndex := c(1:nrow(X))]})
  
  #combine, melt, label
  dtList <- rbindlist(dtList, idcol = TRUE)
  dtList <- melt(data = dtList, id.vars = c(".id", "rowIndex"))
  setnames(x = dtList, old = c(".id", "rowIndex", "variable", "value"), new = c("sequence", "fragmentLength", "type_charge", "intensity"))
  
  #add info
  splits <- strsplit(x = as.character(dtList$type_charge), split = "_")
  dtList[, fragment_type := sapply(splits, "[[", 1)]
  dtList[, fragment_charge := sapply(splits, "[[", 2)]
  dtList[, fragment_type := paste0(fragment_type, fragmentLength)]
  
  splits <- strsplit(x = dtList$sequence, split = "_")
  dtList[, sequence := sapply(splits, "[[", 1)]
  dtList[, precursorCharge := sapply(splits, "[[", 2)]
  
  dtList
}

#' Compare results from model to known answer (long output)
#'
#' @description a model is used to predict the output from an input tensor
#'   (test_x). It is converted using decodeFragArray(). The ground truth
#'   (test_y) is also converted using decodeFragArray() and all the data are
#'   formatted into a long data.table. There is only one intensity column and a
#'   label column "isModel". Useful for plotting. 
#'
#' @param model a trained model with method in predict()
#' @param test_x input sequence array to generate predicted fragment pattern
#' @param test_y known answer encoded as an array
#'
#' @return a long formatted data.table
#' @export
#' 

mergeFragResults_long <- function(model, test_x, test_y){
  model_result <- predict(object = model, test_x)
  dimnames(model_result) <- dimnames(test_y)
  
  #Convert arrays to actual ions
  model_result <- decodeFragArray(model_result)
  model_result$intensity <- model_result$intensity * -1
  model_result[,isModel := TRUE]
  
  answer <- decodeFragArray(test_y)
  answer[,isModel := FALSE]
  
  answer <- rbindlist(list(model_result, answer))
  
  
  answer[, peptideIndex := match(x = sequence, table = unique(sequence))]
  answer
}

#' Visualize comparison of model to known
#'
#' @param compareResultsDt data.table returned by mergeFragResults_long()
#'
#' @return a ggplot
#' @export
#'

compareFragResults_plot <- function(compareResultsDt){
  ggplot(data = compareResultsDt, aes(x = fragment_type , y = intensity, fill = fragment_charge))+
    geom_bar(stat = "identity", position = "dodge")+
    facet_wrap("sequence", scales = "free")+
    geom_hline(yintercept = 0)+
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
}


#' Compare results from model to known answer (wide output)
#'
#' @description a model is used to predict the output from an input tensor
#'   (test_x). It is converted using decodeFragArray(). The ground truth
#'   (test_y) is also converted using decodeFragArray() and all the data are
#'   formatted into a wide data.table. There are two intensity columns
#'   ("intensity_reference", "intensity_model") and their values are merged
#'   rowise by multiple keys for direct comparison.
#'
#' @param model a trained model with method in predict()
#' @param test_x input sequence array to generate predicted fragment pattern
#' @param test_y known answer encoded as an array
#'
#' @return a wide formatted data.table with two intensity columns
#'   ("intensity_reference", "intensity_model")
#'   

mergeFragResults_wide <- function(model, test_x, test_y){
  model_result <- predict(object = model, test_x)
  dimnames(model_result) <- dimnames(test_y)
  
  #Convert arrays to actual ions
  model_result <- decodeFragArray(model_result)
  answer <- decodeFragArray(test_y)
  
  answer <- merge(x = answer, y = model_result, by = c("sequence", "fragmentLength", "type_charge", "fragment_type", "fragment_charge", "precursorCharge"))
  
  setnames(x = answer, old = c("intensity.x", "intensity.y"), new = c("intensity_reference", "intensity_model"))
  
  answer
}


#' Correlate fragment model results to known
#'
#' @param wideDt data.table returned by mergeFragResults_wide()
#'
#' @return a summary data.table containing r^2 value, cos(theta) "cosine similarity", and theta "spectral contrast angle".
#' @export
#'

correlateFragResults <- function(wideDt){
  #r2 from linear regression 
  cor <- wideDt[, .(cor = summary(lm(intensity_reference ~ intensity_model))$r.squared), 
                 by = .(sequence, precursorCharge)]
  
  
  #Cosine similarity/Spectral contrast angle
  contrast <- wideDt[, cosineSim := cosineSimilarity(intensity_reference, intensity_model), by = .(sequence, precursorCharge)]
  contrast <- unique(contrast[,.(sequence, precursorCharge, cosineSim)])
  
  #TODO: Make sure spectral contrast angle is correct... differs from prosit. 
  contrast[, contrast_theta := acos(cosineSim)]
  
  contrast[, sequence_length := nchar(sequence)]
  
  out <- merge(x = cor, y = contrast, by = c("sequence", "precursorCharge"))
  out
}


cosineSimilarity <- function(x,y){
  nume <- (x %*% y)
  denom <- sqrt(sum(x^2) * sum(y^2))
  
  nume/denom
}