diff --git a/DESCRIPTION b/DESCRIPTION index 40e8d371..44b8e8a3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Spectra Title: Spectra Infrastructure for Mass Spectrometry Data -Version: 1.15.1 +Version: 1.15.2 Description: The Spectra package defines an efficient infrastructure for storing and handling mass spectrometry spectra and functionality to subset, process, visualize and compare spectra data. It provides different diff --git a/NAMESPACE b/NAMESPACE index aef9e98e..269d2e35 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -50,6 +50,7 @@ exportMethods("centroided<-") exportMethods("collisionEnergy<-") exportMethods("dataOrigin<-") exportMethods("dataStorage<-") +exportMethods("dataStorageBasePath<-") exportMethods("intensity<-") exportMethods("isolationWindowLowerMz<-") exportMethods("isolationWindowTargetMz<-") @@ -78,6 +79,7 @@ exportMethods(containsMz) exportMethods(containsNeutralLoss) exportMethods(dataOrigin) exportMethods(dataStorage) +exportMethods(dataStorageBasePath) exportMethods(dropNaSpectraVariables) exportMethods(entropy) exportMethods(export) @@ -157,6 +159,7 @@ importFrom(MsCoreUtils,coefMA) importFrom(MsCoreUtils,coefSG) importFrom(MsCoreUtils,coefWMA) importFrom(MsCoreUtils,common) +importFrom(MsCoreUtils,common_path) importFrom(MsCoreUtils,entropy) importFrom(MsCoreUtils,group) importFrom(MsCoreUtils,i2index) diff --git a/NEWS.md b/NEWS.md index f72e9681..2babe28f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,11 @@ # Spectra 1.15 +## Changes in 1.15.2 + +- Add `dataStorageDataPath()` and `dataStorageDataPath<-` methods to allow + updating/adapting the path of the data storage files of backends supporting + that [issue #321](https://github.com/rformassspectrometry/Spectra/issues/321). + ## Changes in 1.15.1 - Improve documentation for `combineSpectra()` and `combinePeaks()` [issue diff --git a/R/AllGenerics.R b/R/AllGenerics.R index f68500ad..0b69bdaf 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -11,6 +11,10 @@ setGeneric("containsMz", function(object, ...) #' @rdname hidden_aliases setGeneric("containsNeutralLoss", function(object, ...) standardGeneric("containsNeutralLoss")) +setGeneric("dataStorageBasePath", function(object, ...) + standardGeneric("dataStorageBasePath")) +setGeneric("dataStorageBasePath<-", function(object, ..., value) + standardGeneric("dataStorageBasePath<-")) #' @rdname hidden_aliases setGeneric("dropNaSpectraVariables", function(object, ...) standardGeneric("dropNaSpectraVariables")) diff --git a/R/MsBackend.R b/R/MsBackend.R index 9528c628..dc73ee5f 100644 --- a/R/MsBackend.R +++ b/R/MsBackend.R @@ -11,6 +11,10 @@ #' @aliases backendInitialize #' @aliases backendParallelFactor,MsBackendMzR-method #' @aliases backendParallelFactor,MsBackendHdf5Peaks-method +#' @aliases dataStorageBasePath +#' @aliases dataStorageBasePath,MsBackendMzR-method +#' @aliases dataStorageBasePath<- +#' @aliases dataStorageBasePath<-,MsBackendMzR-method #' #' @description #' @@ -280,6 +284,16 @@ #' spectra in `object` with the data storage of each spectrum. Note that #' missing values (`NA_character_`) are not supported for `dataStorage`. #' +#' - `dataStorageBasePath()`, `dataStorageBasePath<-: gets or sets the common +#' *base* path of the directory containing all data files. If supported, +#' the function is expected to return (or accept) a `character` of length 1. +#' Most backends (such as for example the `MsBackendMemory` will not support +#' this function and `dataStorageBasePath()` will return `NA_character_`. +#' For `MsBackendMzR`, this function allows to get or change the path to the +#' directory containing the original data files, which is required if e.g. +#' a serialized `MsBackendMzR` instance gets copied to another computer or +#' file system. +#' #' - `dropNaSpectraVariables()`: removes spectra variables (i.e. columns in the #' object's `spectraData` that contain only missing values (`NA`). Note that #' while columns with only `NA`s are removed, a `spectraData()` call after @@ -1711,3 +1725,20 @@ setReplaceMethod("[[", "MsBackend", function(x, i, j, ..., value) { setMethod("uniqueMsLevels", "MsBackend", function(object, ...) { unique(msLevel(object)) }) + +#' @exportMethod dataStorageBasePath +#' +#' @rdname MsBackend +setMethod("dataStorageBasePath", "MsBackend", function(object) { + NA_character_ +}) + +#' @exportMethod dataStorageBasePath<- +#' +#' @rdname MsBackend +setReplaceMethod( + "dataStorageBasePath", "MsBackend", function(object, value) { + warning(class(object)[1L], " does not support changing", + " 'dataStorageBasePath'.") + object + }) diff --git a/R/MsBackendMzR.R b/R/MsBackendMzR.R index 74b00308..7cadc0d5 100644 --- a/R/MsBackendMzR.R +++ b/R/MsBackendMzR.R @@ -214,3 +214,21 @@ setMethod("export", "MsBackendMzR", function(object, x, file = tempfile(), setMethod("backendParallelFactor", "MsBackendMzR", function(object) { factor(dataStorage(object), levels = unique(dataStorage(object))) }) + +#' @importFrom MsCoreUtils common_path +setMethod("dataStorageBasePath", "MsBackendMzR", function(object) { + common_path(dataStorage(object)) +}) + +setReplaceMethod( + "dataStorageBasePath", "MsBackendMzR", function(object, value) { + ds <- dataStorage(object) + ds <- gsub("\\", "/", ds, fixed = TRUE) + value <- gsub("\\", "/", value, fixed = TRUE) + cp <- common_path(ds) + ds <- sub(cp, value, ds, fixed = TRUE) + if (!all(file.exists(unique(ds)))) + stop("Provided path does not contain all data files.") + dataStorage(object) <- normalizePath(ds) + object + }) diff --git a/R/Spectra.R b/R/Spectra.R index 690a8fd5..cf212594 100644 --- a/R/Spectra.R +++ b/R/Spectra.R @@ -136,6 +136,15 @@ NULL #' - `...`: additional parameters specific for the `MsBackend` passed with #' parameter `backend`. #' +#' The `dataStorageBasePath()` and `dataStorageBasePath<-` functions allow, for +#' backend classes that support this operation, to get or change the *base* +#' path to the directory where the backend stores the data. In-memory backends +#' such as [MsBackendMemory] or [MsBackendDataFrame] keeping all MS data in +#' memory don't support, and need, this function, but for [MsBackendMzR] this +#' function can be used to update/adapt the path to the directory containing +#' the original data files. Thus, for `Spectra` objects (using this backend) +#' that were moved to another file system or computer, these functions allow to +#' adjust/adapt the base file path. #' #' @section Accessing spectra data: #' @@ -2811,3 +2820,14 @@ setMethod("entropy", "Spectra", function(object, normalized = TRUE) { setMethod("entropy", "ANY", function(object, ...) { MsCoreUtils::entropy(object) }) + +#' @rdname Spectra +setMethod("dataStorageBasePath", "Spectra", function(object) { + dataStorageBasePath(object@backend) +}) + +#' @rdname Spectra +setReplaceMethod("dataStorageBasePath", "Spectra", function(object, value) { + dataStorageBasePath(object@backend) <- value + object +}) diff --git a/man/MsBackend.Rd b/man/MsBackend.Rd index 1ee1d331..2e9292e9 100644 --- a/man/MsBackend.Rd +++ b/man/MsBackend.Rd @@ -17,6 +17,10 @@ \alias{backendInitialize} \alias{backendParallelFactor,MsBackendMzR-method} \alias{backendParallelFactor,MsBackendHdf5Peaks-method} +\alias{dataStorageBasePath} +\alias{dataStorageBasePath,MsBackendMzR-method} +\alias{dataStorageBasePath<-} +\alias{dataStorageBasePath<-,MsBackendMzR-method} \alias{backendBpparam,MsBackend-method} \alias{backendInitialize,MsBackend-method} \alias{backendMerge,list-method} @@ -93,6 +97,8 @@ \alias{$<-,MsBackend-method} \alias{[[,MsBackend-method} \alias{[[<-,MsBackend-method} +\alias{dataStorageBasePath,MsBackend-method} +\alias{dataStorageBasePath<-,MsBackend-method} \alias{MsBackendDataFrame} \alias{backendInitialize,MsBackendDataFrame-method} \alias{MsBackendHdf5Peaks} @@ -269,6 +275,10 @@ \S4method{uniqueMsLevels}{MsBackend}(object, ...) +\S4method{dataStorageBasePath}{MsBackend}(object) + +\S4method{dataStorageBasePath}{MsBackend}(object) <- value + MsBackendDataFrame() \S4method{backendInitialize}{MsBackendDataFrame}(object, data, peaksVariables = c("mz", "intensity"), ...) @@ -559,6 +569,8 @@ e.g. be the mzML file from which the data was read. \item \code{dataStorage()}: gets a \code{character} of length equal to the number of spectra in \code{object} with the data storage of each spectrum. Note that missing values (\code{NA_character_}) are not supported for \code{dataStorage}. +\item \code{dataStorageBasePath()}, \verb{dataStorageBasePath<-: gets or sets the common *base* path of the directory containing all data files. If supported, the function is expected to return (or accept) a }character\verb{of length 1. Most backends (such as for example the}MsBackendMemory\verb{will not support this function and}dataStorageBasePath()\verb{will return}NA_character_\verb{. For }MsBackendMzR\verb{, this function allows to get or change the path to the directory containing the original data files, which is required if e.g. a serialized }MsBackendMzR` instance gets copied to another computer or +file system. \item \code{dropNaSpectraVariables()}: removes spectra variables (i.e. columns in the object's \code{spectraData} that contain only missing values (\code{NA}). Note that while columns with only \code{NA}s are removed, a \code{spectraData()} call after diff --git a/man/Spectra.Rd b/man/Spectra.Rd index 4ff4f5ed..399b8262 100644 --- a/man/Spectra.Rd +++ b/man/Spectra.Rd @@ -108,6 +108,8 @@ \alias{combinePeaks,Spectra-method} \alias{entropy,Spectra-method} \alias{entropy,ANY-method} +\alias{dataStorageBasePath,Spectra-method} +\alias{dataStorageBasePath<-,Spectra-method} \title{The Spectra class to manage and access MS data} \usage{ applyProcessing( @@ -504,6 +506,10 @@ coreSpectraVariables() \S4method{entropy}{Spectra}(object, normalized = TRUE) \S4method{entropy}{ANY}(object, ...) + +\S4method{dataStorageBasePath}{Spectra}(object) + +\S4method{dataStorageBasePath}{Spectra}(object) <- value } \arguments{ \item{object}{For \code{Spectra()}: either a \code{DataFrame} or \code{missing}. See @@ -952,6 +958,16 @@ of the data (i.e. which has a defined \code{export} method). \item \code{...}: additional parameters specific for the \code{MsBackend} passed with parameter \code{backend}. } + +The \code{dataStorageBasePath()} and \verb{dataStorageBasePath<-} functions allow, for +backend classes that support this operation, to get or change the \emph{base} +path to the directory where the backend stores the data. In-memory backends +such as \link{MsBackendMemory} or \link{MsBackendDataFrame} keeping all MS data in +memory don't support, and need, this function, but for \link{MsBackendMzR} this +function can be used to update/adapt the path to the directory containing +the original data files. Thus, for \code{Spectra} objects (using this backend) +that were moved to another file system or computer, these functions allow to +adjust/adapt the base file path. } \section{Accessing spectra data}{ diff --git a/tests/testthat/test_MsBackend.R b/tests/testthat/test_MsBackend.R index 3d3f7e28..d80bd757 100644 --- a/tests/testthat/test_MsBackend.R +++ b/tests/testthat/test_MsBackend.R @@ -75,3 +75,9 @@ test_that("backendBpparam,MsBackend works", { test_that("backendParallelFactor,MsBackend works", { expect_equal(backendParallelFactor(MsBackendMemory()), factor()) }) + +test_that("dataStorageBasePath,MsExperiment works", { + expect_identical(dataStorageBasePath(MsBackendMemory()), NA_character_) + tmp <- MsBackendMemory() + expect_warning(dataStorageBasePath(tmp) <- "/", "not support") +}) diff --git a/tests/testthat/test_MsBackendMzR.R b/tests/testthat/test_MsBackendMzR.R index ff891738..dee66253 100644 --- a/tests/testthat/test_MsBackendMzR.R +++ b/tests/testthat/test_MsBackendMzR.R @@ -570,3 +570,19 @@ test_that("backendParallelFactor,MsBackendMzR", { factor(dataStorage(sciex_mzr), levels = unique(dataStorage(sciex_mzr)))) }) + +test_that("dataStorageBasePath,dataStorageBasePath<-,MsBackendMzR works", { + tmpd <- normalizePath(tempdir()) + file.copy(sciex_file, tmpd) + + expect_equal(dataStorageBasePath(sciex_mzr), + MsCoreUtils::common_path(sciex_file)) + tmp <- sciex_mzr + dataStorageBasePath(tmp) <- tmpd + expect_true(validObject(tmp)) + bp <- normalizePath(dataStorageBasePath(tmp)) + expect_equal(bp, tmpd) + + #' errors + expect_error(dataStorageBasePath(tmp) <- "some path", "Provided path") +}) diff --git a/tests/testthat/test_Spectra.R b/tests/testthat/test_Spectra.R index 3f8090fc..b0cda2ca 100644 --- a/tests/testthat/test_Spectra.R +++ b/tests/testthat/test_Spectra.R @@ -1891,4 +1891,20 @@ test_that("entropy,Spectra works", { expect_identical(res, vapply(df$intensity, MsCoreUtils::entropy, numeric(1))) }) +test_that("dataStorageBasePath,dataStorageBasePath<-,MsBackendMzR works", { + tmpd <- normalizePath(tempdir()) + file.copy(sciex_file, tmpd) + tmp <- Spectra(sciex_mzr) + expect_equal(dataStorageBasePath(tmp), + MsCoreUtils::common_path(sciex_file)) + tmp <- sciex_mzr + tmp <- Spectra(tmp) + dataStorageBasePath(tmp) <- tmpd + expect_true(validObject(tmp@backend)) + bp <- normalizePath(dataStorageBasePath(tmp)) + expect_equal(bp, tmpd) + + #' errors + expect_error(dataStorageBasePath(tmp) <- "some path", "Provided path") +}) diff --git a/vignettes/Spectra.Rmd b/vignettes/Spectra.Rmd index 753d2bb9..4fbbb95d 100644 --- a/vignettes/Spectra.Rmd +++ b/vignettes/Spectra.Rmd @@ -1287,6 +1287,60 @@ a `lengths(sps)` call, the number of peaks per spectra could also be determined 5000L)`. In that way only peak data of 5000 spectra at a time will be loaded into memory. + +# Serializing (saving), moving and loading serialized `Spectra` objects + +Serializing and re-loading variables/objects during an analysis using e.g. the +`save()` and `load()` functions are common in many workflows, especially if some +of the tasks are computationally intensive and take long time. Sometimes such +serialized objects might even be moved from one computer (or file system) to +another. These operations are unproblematic for `Spectra` objects with +*in-memory* backends such as the `MsBackendMemory` or `MsBackendDataFrame`, that +keep all data in memory, would however break for *on-disk* backends such as the +`MsBackendMzR` if the file path to the original data files is not identical. It +is thus suggested (if the size of the MS data respectively the available system +memory allows it) to change the backend for such `Spectra` objects to a +`MsBackendMemory` before serializing the object with `save()`. For `Spectra` +objects with a `MsBackendMzR` an alternative option would be to eventually +update/adapt the path to the directory containing the raw (e.g. mzML) data +files: assuming these data files are available on both computers, the path to +the directory containing these can be updated with the `dataStorageBasePath<-` +function allowing thus to move/copy serialized `Spectra` objects between +computers or file systems. + +An example workflow could be: + +files *a.mzML*, *b.mzML* are stored in a directory */data/mzML/* on one +computer. These get loaded as a `Spectra` object with `MsBackendMzR` and then +serialized to a file *A.RData*. + +```{r, eval = FALSE} +A <- Spectra(c("/data/mzML/a.mzML", "/data/mzML/b.mzML")) +save(A, file = "A.RData") +``` + +Assuming this file gets now copied to another computer (where the data is not +available in a folder */data/mzML/*) and loaded with `load()`. + +```{r, eval = FALSE} +load("A.RData") +``` + +This `Spectra` object would not be valid because its `MsBackendMzR` can no +longer access the MS data in the original data files. Assuming the user also +copied the data files *a.mzML* and *b.mzML*, but to a folder +*/some_other_folder/*, the base storage path of the object would need to be +adapted to match the directory where the data files are available on the second +computer: + +```{r, eval = FALSE} +dataStorageBasePath(A) <- "/some_other_folder" +``` + +By pointing now the storage path to the new storage location of the data files, +the `Spectra` object `A` would also be usable on the second computer. + + # Session information ```{r si}