Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: support for Sirius annotations #83

Merged
merged 11 commits into from
Nov 7, 2024
51 changes: 8 additions & 43 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -106,43 +106,13 @@ jobs:
echo $sysreqs
sudo -s eval "$sysreqs"

# Step 9: Install macOS system dependencies
- name: Install macOS system dependencies
if: matrix.config.os == 'macOS-latest'
run: |
## Enable installing XML from source if needed
brew install libxml2
echo "XML_CONFIG=/usr/local/opt/libxml2/bin/xml2-config" >> $GITHUB_ENV

## Required to install magick as noted at
## https://github.com/r-lib/usethis/commit/f1f1e0d10c1ebc75fd4c18fa7e2de4551fd9978f#diff-9bfee71065492f63457918efcd912cf2
brew install imagemagick@6

## For textshaping, required by ragg, and required by pkgdown
brew install harfbuzz fribidi

## For installing usethis's dependency gert
brew install libgit2

## required for ncdf4
## brew install netcdf ## Does not work as it is compiled with gcc
## Use pre-compiled libraries from https://mac.r-project.org/libs-4/
curl -O https://mac.r-project.org/libs-4/netcdf-4.7.4-darwin.17-x86_64.tar.gz
tar fvxzm netcdf-4.7.4-darwin.17-x86_64.tar.gz -C /
rm netcdf-4.7.4-darwin.17-x86_64.tar.gz
curl -O https://mac.r-project.org/libs-4/hdf5-1.12.0-darwin.17-x86_64.tar.gz
tar fvxzm hdf5-1.12.0-darwin.17-x86_64.tar.gz -C /
rm hdf5-1.12.0-darwin.17-x86_64.tar.gz
curl -O https://mac.r-project.org/libs-4/szip-2.1.1-darwin.17-x86_64.tar.gz
tar fvxzm szip-2.1.1-darwin.17-x86_64.tar.gz -C /
rm szip-2.1.1-darwin.17-x86_64.tar.gz

# Step 10: Install Windows system dependencies
- name: Install Windows system dependencies
if: runner.os == 'Windows'
run: |
## Edit below if you have any Windows system dependencies
shell: Rscript {0}
# Steps 9 and 10 (macOS and Windows dependencies) removed
- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: any::rcmdcheck, testthat, openxlsx2
needs: check
dependencies: '"hard"'


# Step 11: Install BiocManager
- name: Install BiocManager
Expand Down Expand Up @@ -206,12 +176,7 @@ jobs:
BiocManager::install("BiocGenerics")
shell: Rscript {0}

# Step 16: Install covr
- name: Install covr
if: github.ref == 'refs/heads/devel' && env.run_covr == 'true' && runner.os == 'Linux'
run: |
remotes::install_cran("covr")
shell: Rscript {0}
# Skipping Step 16: Install covr

# Step 17: Install pkgdown
- name: Install pkgdown
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
repopack-output.txt
.Rproj.user
.Rhistory
.RData
Expand Down
5 changes: 4 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,10 @@ Imports:
methods,
QFeatures,
stats,
utils
utils,
S4Vectors,
SummarizedExperiment,
openxlsx2
Remotes:
decisionpatterns/searchable
Suggests:
Expand Down
91 changes: 35 additions & 56 deletions R/DataProcessing.R
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ readClusterDataFromProjectFile <- function(file, progress = FALSE)
fileLines <- readLines(con = file)
)
base::close(con = file)
dataList <- readProjectData(fileLines = fileLines, progress = progress)

dataList <- readProjectData(fileLines = fileLines, progress = progress, qfeatures = qfeatures)
fileLines <- NULL

return(dataList)
Expand All @@ -98,7 +98,7 @@ readClusterDataFromProjectFile <- function(file, progress = FALSE)
#' @export
#'
#' @examples
readProjectData <- function(fileLines, progress = FALSE)
readProjectData <- function(fileLines, progress = FALSE, qfeatures = NULL)
{
allowedTags <- c("ID")
allowedTagPrefixes <- c("AnnotationColors=")
Expand Down Expand Up @@ -196,70 +196,43 @@ readProjectData <- function(fileLines, progress = FALSE)
listMatrixRows <- NULL
listMatrixCols <- NULL

## Disable command line reading of answer
if (FALSE) {

################################################################################
#Start of importing annotation part1 from two
# Display the message and give the user the option to choose whether to upload the annotation file or not.
#If Y shows selection window for annotation file. if N ignores annotation process
#message("Do you want to upload the annotation file? (Y/N)")
#user_choice <- readline()
user_choice <- "N"

if (toupper(user_choice) == "Y") {


# Read the annotation_file file (if needed)
annotation_file <- read.delim(file.choose(), header = TRUE, check.names = FALSE) # select interactively

# Display the available columns in annotation_file
message("Available columns in annotation_file:")
available_columns <- colnames(annotation_file)
for (i in 1:length(available_columns)) {
message(paste(i, "-", available_columns[i]))
######debugging
tryCatch(
{
rowData(qfeatures)
},
error = function(e) {
message("Error: ", e$message)
traceback()
}
)
######debugging

if (!is.null(attr(rowData(qfeatures[[1]]), "annotation column"))) {

# Prompt the user to select the column containing IDs
message("Enter the number corresponding to the column containing IDs:")
selected_column_id <- as.integer(readline())
# Extract the relevant data: Alignment ID and the annotation column from qfeatures
annot_colname <- attr(rowData(qfeatures[[1]]), "annotation column")
annotation_data <- rowData(qfeatures[[1]])[[annot_colname]]
alignment_ids <- rowData(qfeatures[[1]])[["Alignment ID"]]

# Check if the selected column index is valid
if (selected_column_id >= 1 && selected_column_id <= length(available_columns)) {
id_column <- available_columns[selected_column_id]

# Prompt the user to select the Annotation column to use
message("Enter the number corresponding to the annotation column:")
selected_column_annot <- as.integer(readline())

# Check if the selected column index is valid
if (selected_column_annot >= 1 && selected_column_annot <= length(available_columns)) {
selected_column <- available_columns[selected_column_annot]

# Iterate through all values in the "Annotation" column of metaboliteProfile, excluding first row
for (i in 1:nrow(metaboliteProfile)) {
# Perform the lookup based on metaboliteProfile's "Alignment ID" column and annotation_file's selected ID column
matching_indices <- which(annotation_file[[id_column]] == metaboliteProfile$'Alignment ID'[i])

# Check data types and unique values of IDs column in annotation_file

# Check if any matches were found
if (length(matching_indices) > 0) {
# Update the specified column (Annotation) in metaboliteProfile with the corresponding value from annotation_file
metaboliteProfile[i, "Annotation"] <- annotation_file[matching_indices[1], selected_column]
} else {
# Handle the case where no match was found (you can add custom logic here)
warning(paste("No match found for row", i, "in metaboliteProfile"))
}
}
} else {
message("Invalid column selection. Skipping annotation step.")
}
}
# Find the matching indices between metaboliteProfile and annotation_data
matching_indices <- match(metaboliteProfile[["Alignment ID"]], alignment_ids)

metaboliteProfile$Annotation[!is.na(matching_indices)] <- annotation_data[matching_indices[!is.na(matching_indices)]]
#eliminate NAs replace by "" so nchar(annoVals[[i]]) > 0 works in l. 597
metaboliteProfile$Annotation[is.na(metaboliteProfile$Annotation)] <- ""
}

#####################################################################################################################################
#end of importing annotation part1 from two
}

listMatrixVals <- NULL

Expand Down Expand Up @@ -319,13 +292,17 @@ readProjectData <- function(fileLines, progress = FALSE)
}

## STN: Disabled.
if (FALSE) {
if (!is.null(attr(rowData(qfeatures[[1]]), "annotation column"))) {
#Start of importing annotation part2 from two
################################################################################
#adding HEX color codes from external annotations to the annotationColorsMapInitValue of dataFrameHeader
if (toupper(user_choice) == "Y") {

# Copy the selected column by user, Remove duplicates and exclude the first row
uniqueAnnotations <- unique(unlist(strsplit(metaboliteProfile$Annotation, ",")))
###Debug
print("Unique Annotations Before Filtering:")
print(uniqueAnnotations)
###/Debug
uniqueAnnotations <- paste0(uniqueAnnotations, "=")
# Add a random string from the hex color list to each element of uniqueAnnotions
# strings_list <- c("#000000", "#FFFFFF", "#FF0000", "#00FF00", "#0000FF", "#FFFF00", "#FF00FF", "#00FFFF", "#800000", "#008000", "#000080", "#808000", "#800080", "#008080", "#808080", "#C0C0C0", "#FFA500", "#FFC0CB", "#FFD700", "#A52A2A")
Expand All @@ -338,7 +315,7 @@ readProjectData <- function(fileLines, progress = FALSE)
uniqueAnnotationsHexs <- gsub("AnnotationColors=\\{\\s+", "AnnotationColors={", paste("AnnotationColors={", paste(uniqueAnnotations1, collapse = ","), "}"))
# Assuming dataFrameHeader is your data frame
dataFrameHeader$Annotation[2] <- uniqueAnnotationsHexs
}

################################################################################
#End of importing annotation part2 from two
}
Expand Down Expand Up @@ -612,9 +589,11 @@ readProjectData <- function(fileLines, progress = FALSE)
annotationValueIgnore <- "Ignore"
annotationColorIgnore <- "red"


## present annotations
annotations <- vector(mode='list', length=numberOfMS1features)
annoVals <- metaboliteProfile[, annotationColumnName]

for(i in seq_len(numberOfMS1features)){
if(nchar(annoVals[[i]]) > 0){
annotations[[i]] <- as.list(unlist(strsplit(x = annoVals[[i]], split = ", ")))
Expand Down
15 changes: 11 additions & 4 deletions R/FragmentMatrixFunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -1397,7 +1397,8 @@ mzClustGeneric <- function(p,
}

convertToProjectFile <- function(filePeakMatrixPath,
fileSpectra,
fileSpectra,
fileAnnotation,
parameterSet,
progress = FALSE){
####################################################################################
Expand Down Expand Up @@ -1436,6 +1437,10 @@ convertToProjectFile <- function(filePeakMatrixPath,


filePeakMatrixQF <- readMSDial(filePeakMatrixPath)
if (!is.null(fileAnnotation)){
# TODO: determine colums to merge by
filePeakMatrixQF <- addSiriusAnnotations(filePeakMatrixQF,fileAnnotation)
}

returnObj <- convertToProjectFile2(
filePeakMatrixQF = filePeakMatrixQF,
Expand All @@ -1453,7 +1458,7 @@ convertToProjectFile <- function(filePeakMatrixPath,
returnObj$numberOfSpectraDiscardedDueToNoPeaks <- numberOfSpectraDiscardedDueToNoPeaks
returnObj$numberOfSpectraDiscardedDueToMaxIntensity <- numberOfSpectraDiscardedDueToMaxIntensity
returnObj$numberOfSpectraDiscardedDueToTooHeavy <- numberOfSpectraDiscardedDueToTooHeavy

returnObj$qfeatures <- filePeakMatrixQF
return(returnObj)
}

Expand Down Expand Up @@ -1497,6 +1502,7 @@ convertToProjectFile2 <- function(filePeakMatrixQF,
numberOfParsedMs1Features <- returnObj$numberOfPrecursorsPrior
numberOfRemovedPrecursorIsotopePeaks <- returnObj$numberOfRemovedIsotopePeaks

qfeatures <- returnObj$qfeatures
rm(returnObj)
} else {
propList <- list(
Expand Down Expand Up @@ -1589,7 +1595,7 @@ convertToProjectFile2 <- function(filePeakMatrixQF,
#temporary fix
#filePeakMatrix <- NULL

if(!is.null(filePeakMatrix)){
if(!is.null(filePeakMatrixQF)){
## allHits: dataFrame$"Average Mz" --> precursorMz; allHits indexes the spectraList
diffAll <- abs(outer(X = precursorMz, Y = dataFrame$"Average Mz", FUN = function(x, y){abs(x-y)}))
allHits <- apply(X = diffAll, MARGIN = 2, FUN = function(x){which(x == min(x[x < parameterSet$mzDeviationAbsolute_mapping], Inf))})
Expand Down Expand Up @@ -1850,7 +1856,8 @@ convertToProjectFile2 <- function(filePeakMatrixQF,
numberOfUnmappedSpectra = numberOfUnmappedSpectra,
numberOfUnmappedPrecursors = numberOfUnmappedPrecursors,
numberOfUnmappedPrecursorsMz = numberOfUnmappedPrecursorsMz,
numberOfUnmappedPrecursorsRt = numberOfUnmappedPrecursorsRt
numberOfUnmappedPrecursorsRt = numberOfUnmappedPrecursorsRt,
qfeatures <- qfeatures
)

if(!is.na(progress)) if(progress) setProgress(1) else print("Ready")
Expand Down
51 changes: 50 additions & 1 deletion R/parsePeakAbundanceMatrixQF.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ parsePeakAbundanceMatrixQF <- function(qfeatures,
dataFrame <- cbind(rowData(qfeatures)[[1]][,cols_to_keep] ,assay(qfeatures))
#workaround for avoiding change in colnames during coercion
cnames <- colnames(dataFrame)
dataFrame <- as.data.frame(dataFrame)
dataFrame <- as.data.frame(dataFrame, check.names = FALSE)
colnames(dataFrame) <- cnames
oldFormat <- ncol(colData(qfeatures))==3
numRowDataCols <- ncol(rowData(qfeatures)[[1]])
Expand Down Expand Up @@ -161,6 +161,9 @@ parsePeakAbundanceMatrixQF <- function(qfeatures,
returnObj$dataFrame <- dataFrame
returnObj$vals <- vals

## qfeatures
returnObj$qfeatures <- qfeatures

## meta
returnObj$oldFormat <- oldFormat
returnObj$numberOfPrecursors <- numberOfPrecursors
Expand All @@ -179,3 +182,49 @@ parsePeakAbundanceMatrixQF <- function(qfeatures,

return (returnObj)
}

#' Title
#'
#' @param qfeatures
#' @param siriusFile
#' @param featureID
#' @param siriusID
#'
#' @return
#' @export
#'
#' @examples
addSiriusAnnotations <- function(qfeatures,
siriusFile,
rowData_col = "Alignment ID",
sirius_col = "featureId") {
#TODO: specify more parameters in read delim
annotation <- read.delim(siriusFile)

rowData <- rowData(qfeatures[[1]])

# Print for debugging
print(paste("Merging by:", sirius_col, "and", rowData_col))

# Merge the data frames
annotatedRowData <- S4Vectors::merge( rowData, annotation,
by.x = rowData_col, by.y = sirius_col, all.x = TRUE)

#TODO: ? check for duplicate columns ?
annotation_cols <- colnames(annotation)[colnames(annotation) != rowData_col]
rowData_cols <- colnames(rowData)

for (col in colnames(annotatedRowData)) {
if (col %in% annotation_cols) {
attr(annotatedRowData[[col]], "source") <- "sirius"
} else if (col %in% rowData_cols) {
attr(annotatedRowData[[col]], "source") <- "data"
}
}

# Set the annotation column
attr(annotatedRowData, "annotation column") <- "ClassyFire.subclass"

rowData(qfeatures[[1]]) <- annotatedRowData
return(qfeatures)
}
Loading
Loading