diff --git a/DESCRIPTION b/DESCRIPTION index d448fd2..275734a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: CompoundDb Type: Package Title: Creating and using (Chemical) Compound Annotation Databases -Version: 0.5.0 +Version: 0.6.0 Authors@R: c(person(given = "Jan", family = "Stanstrup", email = "stanstrup@gmail.com", role = c("aut"), diff --git a/NAMESPACE b/NAMESPACE index 0d95edf..d9fc1ed 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,10 +2,10 @@ export(CompDb) export(CompoundIdFilter) -export(CompoundNameFilter) export(MsBackendCompDb) export(MsmsMzRangeMaxFilter) export(MsmsMzRangeMinFilter) +export(NameFilter) export(adducts) export(compound_tbl_lipidblast) export(compound_tbl_sdf) @@ -25,10 +25,10 @@ export(src_compdb) export(tables) exportClasses(CompDb) exportClasses(CompoundIdFilter) -exportClasses(CompoundNameFilter) exportClasses(MsBackendCompDb) exportClasses(MsmsMzRangeMaxFilter) exportClasses(MsmsMzRangeMinFilter) +exportClasses(NameFilter) exportMethods("$<-") exportMethods("intensity<-") exportMethods("mz<-") diff --git a/R/AnnotationFilters.R b/R/AnnotationFilters.R index d5cd037..2c880e8 100644 --- a/R/AnnotationFilters.R +++ b/R/AnnotationFilters.R @@ -9,7 +9,7 @@ #' #' The supported filters are: #' - `CompoundIdFilter`: filter based on the compound ID. -#' - `CompoundNameFilter`: filter based on the compound name. +#' - `NameFilter`: filter based on the compound name. #' - `MsmsMzRangeMinFilter`: retrieve entries based on the smallest m/z of all #' peaks of their MS/MS spectra. Requires that MS/MS spectra data are present #' (i.e. `hasMsMsSpectra(cmp_db)` returns `TRUE`). @@ -43,10 +43,10 @@ #' AnnotationFilter(~ compound_id == "comp_b") #' #' ## Combine filters -#' AnnotationFilterList(CompoundIdFilter("a"), CompoundNameFilter("b")) +#' AnnotationFilterList(CompoundIdFilter("a"), NameFilter("b")) #' #' ## Using a formula expression -#' AnnotationFilter(~ compound_id == "a" | compound_name != "b") +#' AnnotationFilter(~ compound_id == "a" | name != "b") NULL #' @importClassesFrom AnnotationFilter CharacterFilter AnnotationFilter @@ -67,20 +67,20 @@ CompoundIdFilter <- function(value, condition = "==") { new("CompoundIdFilter", value = as.character(value), condition = condition) } -#' @exportClass CompoundNameFilter +#' @exportClass NameFilter #' #' @rdname Filter-classes -setClass("CompoundNameFilter", contains = "CharacterFilter", +setClass("NameFilter", contains = "CharacterFilter", prototype = list( condition = "==", value = "", - field = "compound_name" + field = "name" )) -#' @export CompoundNameFilter +#' @export NameFilter #' #' @rdname Filter-classes -CompoundNameFilter <- function(value, condition = "==") { - new("CompoundNameFilter", value = as.character(value), +NameFilter <- function(value, condition = "==") { + new("NameFilter", value = as.character(value), condition = condition) } @@ -300,9 +300,9 @@ MsmsMzRangeMaxFilter <- function(value, condition = "<=") { #' @noRd .supported_filters <- function(x) { df <- data.frame(filter = c("CompoundIdFilter", - "CompoundNameFilter"), + "NameFilter"), field = c("compound_id", - "compound_name"), + "name"), stringsAsFactors = FALSE) if (!missing(x) && .has_msms_spectra(x)) { df <- rbind(df, diff --git a/R/CompDb.R b/R/CompDb.R index 9205448..1a44a05 100644 --- a/R/CompDb.R +++ b/R/CompDb.R @@ -59,11 +59,11 @@ #' to the database with parameter `x`. #' #' For all other methods: a `CompDb` object. -#' +#' #' @param flags flags passed to the SQLite database connection. #' See [SQLite()]. Defaults to read-only, i.e. RSQLite::SQLITE_RO. -#' -#' +#' +#' #' @author Johannes Rainer #' #' @md @@ -100,12 +100,12 @@ #' tables(cmp_db) #' #' ## Extract a data.frame with the id, name and inchi of all compounds -#' compounds(cmp_db, columns = c("compound_id", "compound_name", "inchi")) +#' compounds(cmp_db, columns = c("compound_id", "name", "inchi")) #' #' ## Add also the synonyms (aliases) for the compounds. This will cause the #' ## tables compound and synonym to be joined. The elements of the compound_id -#' ## and compound_name are now no longer unique -#' res <- compounds(cmp_db, columns = c("compound_id", "compound_name", "synonym")) +#' ## and name are now no longer unique +#' res <- compounds(cmp_db, columns = c("compound_id", "name", "synonym")) #' head(res) #' #' ## Extract spectra for a specific HMDB compound. @@ -124,7 +124,7 @@ #' cmp_tbl <- tbl(src_cmp, "compound") #' #' ## Extract the id, name and inchi -#' cmp_tbl %>% select(compound_id, compound_name, inchi) %>% collect() +#' cmp_tbl %>% select(compound_id, name, inchi) %>% collect() NULL #' @importFrom methods new diff --git a/R/createCompDbPackage.R b/R/createCompDbPackage.R index a3dfbfd..3c64a68 100644 --- a/R/createCompDbPackage.R +++ b/R/createCompDbPackage.R @@ -12,7 +12,7 @@ #' #' @details #' -#' Column `"compound_name"` reports for HMDB files the `"GENERIC_NAME"`, for +#' Column `"name"` reports for HMDB files the `"GENERIC_NAME"`, for #' ChEBI the `"ChEBI Name"`, for PubChem the `"PUBCHEM_IUPAC_TRADITIONAL_NAME"`, #' and for Lipid Maps the `"COMMON_NAME"`, if that is #' not available, the first of the compounds synonyms and, if that is also not @@ -27,7 +27,7 @@ #' defined (i.e. not all entries have an InChI ID or other means to uniquely #' identify compounds). Thus, the function returns a highly redundant compound #' table. Feedback on how to reduce this redundancy would be highly welcome! -#' +#' #' LIPID MAPS was tested August 2020. Older SDF files might not work as the field names were changed. #' #' @param file `character(1)` with the name of the SDF file. @@ -38,11 +38,11 @@ #' @return A [tibble::tibble] with general compound information (one row per #' compound): #' + `compound_id`: the ID of the compound. -#' + `compound_name`: the compound's name. +#' + `name`: the compound's name. #' + `inchi`: the InChI of the compound. #' + `inchikey`: the InChI key. #' + `formula`: the chemical formula. -#' + `mass`: the compound's mass. +#' + `exactmass`: the compound's (monoisotopic exact) mass. #' + `synonyms`: the compound's synonyms (aliases). This type of this column is #' by default a `list` to support multiple aliases per compound, unless #' argument `collapse` is provided, in which case multiple synonyms are pasted @@ -106,11 +106,11 @@ compound_tbl_sdf <- function(file, collapse) { #' @return A [tibble::tibble] with general compound information (one row per #' compound): #' + `compound_id`: the ID of the compound. -#' + `compound_name`: the compound's name. +#' + `name`: the compound's name. #' + `inchi`: the InChI of the compound. #' + `inchikey`: the InChI key. #' + `formula`: the chemical formula. -#' + `mass`: the compound's mass. +#' + `exactmass`: the compound's mass. #' + `synonyms`: the compound's synonyms (aliases). This type of this column is #' by default a `list` to support multiple aliases per compound, unless #' argument `collapse` is provided, in which case multiple synonyms are pasted @@ -145,11 +145,11 @@ compound_tbl_lipidblast <- function(file, collapse) { #' @description #' #' Internal function to extract compound information from a file in SDF format. -#' +#' #' @param x what is returned by datablock2ma(datablock(read.SDFset)). #' -#' @return A [tibble::tibble] with columns `"compound_id"`, `"compound_name"`, -#' `"inchi"`, `"formula"`, `"mass"`. +#' @return A [tibble::tibble] with columns `"compound_id"`, `"name"`, +#' `"inchi"`, `"formula"`, `"exactmass"`. #' #' @note #' LIPID MAPS was tested August 2020. Older SDF files might not work as the field names were changed. @@ -180,11 +180,11 @@ compound_tbl_lipidblast <- function(file, collapse) { nms[nas] <- x[nas, "SYSTEMATIC_NAME"] } res <- data_frame(compound_id = x[, colmap["id"]], - compound_name = nms, + name = nms, inchi = x[, colmap["inchi"]], - inchi_key = x[, colmap["inchi_key"]], + inchikey = x[, colmap["inchikey"]], formula = x[, colmap["formula"]], - mass = as.numeric(x[, colmap["mass"]]), + exactmass = as.numeric(x[, colmap["exactmass"]]), synonyms = syns ) if (is.na(colmap["smiles"])) { @@ -209,7 +209,7 @@ compound_tbl_lipidblast <- function(file, collapse) { #' #' Based on the provided `colnames` guess whether the file is from HMDB, #' ChEBI, LIPID MAPS, PubChem or LipidBlast. -#' +#' #' #' @param x `character` with the column names of the data table. #' @@ -243,9 +243,9 @@ compound_tbl_lipidblast <- function(file, collapse) { .hmdb_colmap <- c(id = "HMDB_ID", name = "GENERIC_NAME", inchi = "INCHI_IDENTIFIER", - inchi_key = "INCHI_KEY", + inchikey = "INCHI_KEY", formula = "FORMULA", - mass = "EXACT_MASS", + exactmass = "EXACT_MASS", synonyms = "SYNONYMS", smiles = "SMILES" ) @@ -253,9 +253,9 @@ compound_tbl_lipidblast <- function(file, collapse) { .chebi_colmap <- c(id = "ChEBI ID", name = "ChEBI Name", inchi = "InChI", - inchi_key = "InChIKey", + inchikey = "InChIKey", formula = "Formulae", - mass = "Monoisotopic Mass", + exactmass = "Monoisotopic Mass", synonyms = "Synonyms", smiles = "SMILES" ) @@ -263,9 +263,9 @@ compound_tbl_lipidblast <- function(file, collapse) { .lipidmaps_colmap <- c(id = "LM_ID", name = "NAME", inchi = "INCHI", - inchi_key = "INCHI_KEY", + inchikey = "INCHI_KEY", formula = "FORMULA", - mass = "EXACT_MASS", + exactmass = "EXACT_MASS", synonyms = "SYNONYMS", smiles = NA ) @@ -273,9 +273,9 @@ compound_tbl_lipidblast <- function(file, collapse) { .pubchem_colmap <- c(id = "PUBCHEM_COMPOUND_CID", name = "PUBCHEM_IUPAC_TRADITIONAL_NAME", inchi = "PUBCHEM_IUPAC_INCHI", - inchi_key = "PUBCHEM_IUPAC_INCHIKEY", + inchikey = "PUBCHEM_IUPAC_INCHIKEY", formula = "PUBCHEM_MOLECULAR_FORMULA", - mass = "PUBCHEM_EXACT_MASS", + exactmass = "PUBCHEM_EXACT_MASS", synonyms = "PUBCHEM_IUPAC_TRADITIONAL_NAME", smiles = "PUBCHEM_OPENEYE_CAN_SMILES" # Others: @@ -288,9 +288,9 @@ compound_tbl_lipidblast <- function(file, collapse) { .mona_colmap <- c(id = "ID", name = "NAME", inchi = "INCHIKEY", - inchi_key = "INCHIKEY", + inchikey = "INCHIKEY", formula = "FORMULA", - mass = "EXACT MASS", + exactmass = "EXACT MASS", synonyms = "SYNONYMS", smiles = NA ) @@ -332,11 +332,11 @@ compound_tbl_lipidblast <- function(file, collapse) { mass <- NA_character_ list( compound_id = x$id, - compound_name = nms[1], + name = nms[1], inchi = cmp$inchi, - inchi_key = NA_character_, + inchikey = NA_character_, formula = frml, - mass = mass, + exactmass = mass, synonyms = nms[-1] ) } @@ -369,11 +369,11 @@ compound_tbl_lipidblast <- function(file, collapse) { #' Required columns for the `data.frame` providing the compound information ( #' parameter `x`) are: #' + `"compound_id"`: the ID of the compound. -#' + `"compound_name"`: the compound's name. +#' + `"name"`: the compound's name. #' + `"inchi"`: the InChI of the compound. #' + `"inchikey"`: the InChI key. #' + `"formula"`: the chemical formula. -#' + `"mass"`: the compound's mass. +#' + `"exactmass"`: the compound's (exact) mass. #' + `"synonyms"`: additional synonyms/aliases for the compound. Should be #' either a single character or a list of values for each compound. #' @@ -587,7 +587,7 @@ createCompDb <- function(x, metadata, msms_spectra, path = ".") { } ## Creating indices dbExecute(con, "create index compound_id_idx on compound (compound_id)") - dbExecute(con, "create index compound_name_idx on compound (compound_name)") + dbExecute(con, "create index compound_name_idx on compound (name)") ## Process spectra. if (!missing(msms_spectra) && is.data.frame(msms_spectra)) { comp_ids <- unique(x$compound_id) @@ -667,8 +667,8 @@ createCompDb <- function(x, metadata, msms_spectra, path = ".") { .required_metadata_keys <- c("source", "url", "source_version", "source_date", "organism") -.required_compound_db_columns <- c("compound_id", "compound_name", "inchi", - "inchi_key", "formula", "mass") +.required_compound_db_columns <- c("compound_id", "name", "inchi", + "inchikey", "formula", "exactmass") .required_compound_columns <- c(.required_compound_db_columns, "synonyms") .required_msms_spectrum_columns <- c(spectrum_id = "integer", @@ -783,8 +783,8 @@ createCompDb <- function(x, metadata, msms_spectra, path = ".") { paste0(.required_compound_columns[!got_it], collapse = ", "))) } else { - if (!is.numeric(x$mass)) - txt <- c(txt, "Column 'mass' should be numeric") + if (!is.numeric(x$exactmass)) + txt <- c(txt, "Column 'exactmass' should be numeric") } ## if (db) { ## ## Do not allow more columns than expected! @@ -930,7 +930,7 @@ make_metadata <- function(source, url, source_version, source_date, organism) { #' MoNa SDF files organize the data by individual spectra (i.e. each element #' is one spectrum) and individual compounds can not easily and consistently #' defined (i.e. not all entries have an InChI ID or other means to uniquely -#' identify compounds). Thus, the function returns a highly redundant compount +#' identify compounds). Thus, the function returns a highly redundant compound #' table. Feedback on how to reduce this redundancy would be highly welcome! #' #' @param x `character(1)` being the SDF file name. diff --git a/R/mass-utility-functions.R b/R/mass-utility-functions.R index 9ae6d7a..d7461e4 100644 --- a/R/mass-utility-functions.R +++ b/R/mass-utility-functions.R @@ -153,7 +153,7 @@ adducts <- function(pattern, polarity, name, set, ...) { #' @param x `numeric` with m/z values. #' #' @param cmps `data.frame` with a column containing monoisotopic masses in a -#' column named `"mass"`. +#' column named `"exactmass"`. #' #' @param adduct Adduct definition. Either a `data.frame` as returned by #' [adducts()] or a `character` with the names of the adducts in that @@ -202,7 +202,7 @@ adducts <- function(pattern, polarity, name, set, ...) { not_found$ppm <- NA_real_ lapply(x, function(z) { mss <- (adduct$charge * z - adduct$massdiff) / adduct$nmol - idx <- matchWithPpm(mss, cmps$mass, ppm = ppm) + idx <- matchWithPpm(mss, cmps$exactmass, ppm = ppm) hits <- lengths(idx) idx <- unlist(idx, use.names = FALSE) if (length(idx)) { @@ -210,7 +210,7 @@ adducts <- function(pattern, polarity, name, set, ...) { rownames(res) <- NULL res$adduct <- rep(adduct$name, hits) rep_mss <- rep(mss, hits) - res$ppm <- abs(res$mass - rep_mss) * 1e6 / res$mass + res$ppm <- abs(res$exactmass - rep_mss) * 1e6 / res$exactmass res[order(res$ppm), ] } else not_found }) @@ -241,7 +241,7 @@ adducts <- function(pattern, polarity, name, set, ...) { #' equivalent) with a column containing the m/z values. #' #' @param compounds either a `data.frame` (or equivalent) or a [CompDb()] with -#' the reference annotations. A column named `"mass"` is mandatory if +#' the reference annotations. A column named `"exactmass"` is mandatory if #' `compounds` is a `data.frame`. #' #' @param adduct adduct definition. Either a `data.frame` as returned by @@ -297,8 +297,8 @@ setMethod( "annotateMz", signature(object = "numeric", compounds = "DataFrameOrEquivalent"), function(object, compounds, adduct = adducts(), ppm = 10, ...) { - if (!any(colnames(compounds) == "mass")) - stop("Required column \"mass\" not found in 'compounds'", + if (!any(colnames(compounds) == "exactmass")) + stop("Required column \"exactmass\" not found in 'compounds'", call. = FALSE) .annotate_adduct_mz(object, compounds, adduct = adduct, ppm = ppm) }) diff --git a/R/query-engine.R b/R/query-engine.R index 9ea5d7a..1d7c0e7 100644 --- a/R/query-engine.R +++ b/R/query-engine.R @@ -274,7 +274,7 @@ #' #' ## Define database tables with some redundand fields. #' tabs <- list( -#' compound = c("compound_id", "compound_name", "red_field"), +#' compound = c("compound_id", "name", "red_field"), #' spectrum = c("spectrum_id", "compound_id"), #' other_tab = c("compound_id", "red_field")) #' @@ -284,9 +284,9 @@ #' start_from = "other_tab") #' .reduce_tables_start_from(tabs, c("compound_id", "red_field"), #' start_from = "spectrum") -#' .reduce_tables_start_from(tabs, c("compound_name", "red_field"), +#' .reduce_tables_start_from(tabs, c("name", "red_field"), #' start_from = "spectrum") -#' .reduce_tables_start_from(tabs, c("compound_name", "red_field"), +#' .reduce_tables_start_from(tabs, c("name", "red_field"), #' start_from = "spectrum_bla") .reduce_tables_start_from <- function(tables, columns, start_from) { tbls <- .reduce_tables(tables, columns) diff --git a/inst/NEWS b/inst/NEWS index 2b5ce61..2c2c9a4 100644 --- a/inst/NEWS +++ b/inst/NEWS @@ -1,3 +1,9 @@ +Changes in version 0.6.0 + +- Rename column names: compound_name -> name, mass -> exactmass, inchi_key -> + inchikey. + + Changes in version 0.5.0 - Replace `as.list` with `peaksData`. diff --git a/man/CompDb.Rd b/man/CompDb.Rd index 25d96c3..d6049c7 100644 --- a/man/CompDb.Rd +++ b/man/CompDb.Rd @@ -142,12 +142,12 @@ cmp_db tables(cmp_db) ## Extract a data.frame with the id, name and inchi of all compounds -compounds(cmp_db, columns = c("compound_id", "compound_name", "inchi")) +compounds(cmp_db, columns = c("compound_id", "name", "inchi")) ## Add also the synonyms (aliases) for the compounds. This will cause the ## tables compound and synonym to be joined. The elements of the compound_id -## and compound_name are now no longer unique -res <- compounds(cmp_db, columns = c("compound_id", "compound_name", "synonym")) +## and name are now no longer unique +res <- compounds(cmp_db, columns = c("compound_id", "name", "synonym")) head(res) ## Extract spectra for a specific HMDB compound. @@ -166,7 +166,7 @@ src_cmp cmp_tbl <- tbl(src_cmp, "compound") ## Extract the id, name and inchi -cmp_tbl \%>\% select(compound_id, compound_name, inchi) \%>\% collect() +cmp_tbl \%>\% select(compound_id, name, inchi) \%>\% collect() } \seealso{ \code{\link[=createCompDb]{createCompDb()}} for the function to create a SQLite compound database. diff --git a/man/Filter-classes.Rd b/man/Filter-classes.Rd index 59baac5..cc763ac 100644 --- a/man/Filter-classes.Rd +++ b/man/Filter-classes.Rd @@ -5,8 +5,8 @@ \alias{Filter-classes} \alias{CompoundIdFilter-class} \alias{CompoundIdFilter} -\alias{CompoundNameFilter-class} -\alias{CompoundNameFilter} +\alias{NameFilter-class} +\alias{NameFilter} \alias{MsmsMzRangeMinFilter-class} \alias{MsmsMzRangeMinFilter} \alias{MsmsMzRangeMaxFilter-class} @@ -15,7 +15,7 @@ \usage{ CompoundIdFilter(value, condition = "==") -CompoundNameFilter(value, condition = "==") +NameFilter(value, condition = "==") MsmsMzRangeMinFilter(value, condition = ">=") @@ -37,7 +37,7 @@ introduced by Bioconductor's \code{AnnotationFilter} package. The supported filters are: \itemize{ \item \code{CompoundIdFilter}: filter based on the compound ID. -\item \code{CompoundNameFilter}: filter based on the compound name. +\item \code{NameFilter}: filter based on the compound name. \item \code{MsmsMzRangeMinFilter}: retrieve entries based on the smallest m/z of all peaks of their MS/MS spectra. Requires that MS/MS spectra data are present (i.e. \code{hasMsMsSpectra(cmp_db)} returns \code{TRUE}). @@ -57,10 +57,10 @@ cf AnnotationFilter(~ compound_id == "comp_b") ## Combine filters -AnnotationFilterList(CompoundIdFilter("a"), CompoundNameFilter("b")) +AnnotationFilterList(CompoundIdFilter("a"), NameFilter("b")) ## Using a formula expression -AnnotationFilter(~ compound_id == "a" | compound_name != "b") +AnnotationFilter(~ compound_id == "a" | name != "b") } \seealso{ \code{\link[=supportedFilters]{supportedFilters()}} for the method to list all supported filters diff --git a/man/annotateMz.Rd b/man/annotateMz.Rd index 4553f93..157de83 100644 --- a/man/annotateMz.Rd +++ b/man/annotateMz.Rd @@ -18,7 +18,7 @@ equivalent) with a column containing the m/z values.} \item{compounds}{either a \code{data.frame} (or equivalent) or a \code{\link[=CompDb]{CompDb()}} with -the reference annotations. A column named \code{"mass"} is mandatory if +the reference annotations. A column named \code{"exactmass"} is mandatory if \code{compounds} is a \code{data.frame}.} \item{adduct}{adduct definition. Either a \code{data.frame} as returned by diff --git a/man/compound_tbl_lipidblast.Rd b/man/compound_tbl_lipidblast.Rd index efc145d..d7abc80 100644 --- a/man/compound_tbl_lipidblast.Rd +++ b/man/compound_tbl_lipidblast.Rd @@ -17,11 +17,11 @@ A \link[tibble:tibble]{tibble::tibble} with general compound information (one ro compound): \itemize{ \item \code{compound_id}: the ID of the compound. -\item \code{compound_name}: the compound's name. +\item \code{name}: the compound's name. \item \code{inchi}: the InChI of the compound. \item \code{inchikey}: the InChI key. \item \code{formula}: the chemical formula. -\item \code{mass}: the compound's mass. +\item \code{exactmass}: the compound's mass. \item \code{synonyms}: the compound's synonyms (aliases). This type of this column is by default a \code{list} to support multiple aliases per compound, unless argument \code{collapse} is provided, in which case multiple synonyms are pasted diff --git a/man/compound_tbl_sdf.Rd b/man/compound_tbl_sdf.Rd index 1c3e3a0..37dfea0 100644 --- a/man/compound_tbl_sdf.Rd +++ b/man/compound_tbl_sdf.Rd @@ -17,11 +17,11 @@ A \link[tibble:tibble]{tibble::tibble} with general compound information (one ro compound): \itemize{ \item \code{compound_id}: the ID of the compound. -\item \code{compound_name}: the compound's name. +\item \code{name}: the compound's name. \item \code{inchi}: the InChI of the compound. \item \code{inchikey}: the InChI key. \item \code{formula}: the chemical formula. -\item \code{mass}: the compound's mass. +\item \code{exactmass}: the compound's (monoisotopic exact) mass. \item \code{synonyms}: the compound's synonyms (aliases). This type of this column is by default a \code{list} to support multiple aliases per compound, unless argument \code{collapse} is provided, in which case multiple synonyms are pasted @@ -41,7 +41,7 @@ format (structure-data file). The function currently supports SDF files from: } } \details{ -Column \code{"compound_name"} reports for HMDB files the \code{"GENERIC_NAME"}, for +Column \code{"name"} reports for HMDB files the \code{"GENERIC_NAME"}, for ChEBI the \code{"ChEBI Name"}, for PubChem the \code{"PUBCHEM_IUPAC_TRADITIONAL_NAME"}, and for Lipid Maps the \code{"COMMON_NAME"}, if that is not available, the first of the compounds synonyms and, if that is also not diff --git a/man/createCompDb.Rd b/man/createCompDb.Rd index 030cc6d..e1cbd6a 100644 --- a/man/createCompDb.Rd +++ b/man/createCompDb.Rd @@ -92,11 +92,11 @@ Required columns for the \code{data.frame} providing the compound information ( parameter \code{x}) are: \itemize{ \item \code{"compound_id"}: the ID of the compound. -\item \code{"compound_name"}: the compound's name. +\item \code{"name"}: the compound's name. \item \code{"inchi"}: the InChI of the compound. \item \code{"inchikey"}: the InChI key. \item \code{"formula"}: the chemical formula. -\item \code{"mass"}: the compound's mass. +\item \code{"exactmass"}: the compound's (exact) mass. \item \code{"synonyms"}: additional synonyms/aliases for the compound. Should be either a single character or a list of values for each compound. } diff --git a/man/import_mona_sdf.Rd b/man/import_mona_sdf.Rd index d36b430..4d7607a 100644 --- a/man/import_mona_sdf.Rd +++ b/man/import_mona_sdf.Rd @@ -35,7 +35,7 @@ import but avoiding to read the SDF files twice. MoNa SDF files organize the data by individual spectra (i.e. each element is one spectrum) and individual compounds can not easily and consistently defined (i.e. not all entries have an InChI ID or other means to uniquely -identify compounds). Thus, the function returns a highly redundant compount +identify compounds). Thus, the function returns a highly redundant compound table. Feedback on how to reduce this redundancy would be highly welcome! } \examples{ diff --git a/tests/testthat/test_AnnotationFilters.R b/tests/testthat/test_AnnotationFilters.R index 139747d..ea46382 100644 --- a/tests/testthat/test_AnnotationFilters.R +++ b/tests/testthat/test_AnnotationFilters.R @@ -11,15 +11,15 @@ test_that("CompoundIdFilter, .field, .sql_condition, sql_value work", { expect_equal(.sql_value(fl), "'samid'") }) -test_that("CompoundNameFilter works", { - fl <- CompoundNameFilter("a") - expect_true(is(fl, "CompoundNameFilter")) +test_that("NameFilter works", { + fl <- NameFilter("a") + expect_true(is(fl, "NameFilter")) expect_true(is(fl, "CharacterFilter")) expect_true(is(fl, "AnnotationFilter")) - expect_error(CompoundNameFilter()) + expect_error(NameFilter()) - expect_equal(.field(fl), "compound_name") + expect_equal(.field(fl), "name") expect_equal(.sql_condition(fl), "=") expect_equal(.sql_value(fl), "'a'") }) @@ -84,31 +84,31 @@ test_that(".sql_value works", { }) test_that(".sql_logicOp works", { - afl <- AnnotationFilter(~ compound_id == "a" & compound_name == "2323434") + afl <- AnnotationFilter(~ compound_id == "a" & name == "2323434") expect_equal(.sql_logicOp(afl), "and") - afl <- AnnotationFilter(~ compound_id == "a" | compound_name == "2323434") + afl <- AnnotationFilter(~ compound_id == "a" | name == "2323434") expect_equal(.sql_logicOp(afl), "or") - afl <- AnnotationFilter(~ compound_id == "a" & compound_name == "2323434" | + afl <- AnnotationFilter(~ compound_id == "a" & name == "2323434" | gene_id == "123") expect_equal(.sql_logicOp(afl), c("and", "or")) }) test_that(".where_filter works", { fl <- CompoundIdFilter("5") - afl <- AnnotationFilter(~ compound_id == "a" & compound_name == "1") + afl <- AnnotationFilter(~ compound_id == "a" & name == "1") expect_equal(.where_filter(fl), "compound_id = '5'") expect_equal(.where_filter(afl), - "(compound_id = 'a' and compound_name = '1')") + "(compound_id = 'a' and name = '1')") afl_2 <- AnnotationFilterList(fl, afl, logicOp = "|") expect_equal(.where_filter(afl_2), paste0("(compound_id = '5' or (compound_id =", - " 'a' and compound_name = '1'))")) + " 'a' and name = '1'))")) afl_2 <- AnnotationFilterList(afl_2, afl, logicOp = "&") expect_equal(.where_filter(afl_2), paste0("((compound_id = '5' or (compound_id", - " = 'a' and compound_name = '1')", + " = 'a' and name = '1')", ") and (compound_id = 'a' and ", - "compound_name = '1'))")) + "name = '1'))")) res <- .where_filter(fl, c(compound_id = "test.compound_id")) expect_equal(res, "test.compound_id = '5'") }) diff --git a/tests/testthat/test_CompDb-methods.R b/tests/testthat/test_CompDb-methods.R index dc9f331..0e8d456 100644 --- a/tests/testthat/test_CompDb-methods.R +++ b/tests/testthat/test_CompDb-methods.R @@ -29,8 +29,8 @@ test_that("Spectra,CompDb works", { ## filter and columns res <- Spectra(cmp_spctra_db, filter = ~ compound_id == "HMDB0000001", - columns = c("inchi", "compound_name")) - expect_true(all(c("spectrum_id", "compound_name", "inchi") %in% + columns = c("inchi", "name")) + expect_true(all(c("spectrum_id", "name", "inchi") %in% spectraVariables(res))) expect_true(length(res) == 2) diff --git a/tests/testthat/test_CompDb.R b/tests/testthat/test_CompDb.R index d48d9bc..252880c 100644 --- a/tests/testthat/test_CompDb.R +++ b/tests/testthat/test_CompDb.R @@ -57,10 +57,10 @@ test_that("CompDb constructor and low level functions", { test_that("compounds works", { cmps <- compounds(cmp_db) expect_true(is(cmps, "data.frame")) - cmps_tbl <- compounds(cmp_db, columns = c("compound_id", "compound_name"), + cmps_tbl <- compounds(cmp_db, columns = c("compound_id", "name"), return.type = "tibble") expect_true(is(cmps_tbl, "tbl")) - expect_equal(colnames(cmps_tbl), c("compound_id", "compound_name")) + expect_equal(colnames(cmps_tbl), c("compound_id", "name")) expect_error(compounds(cmp_db, filter = "something")) diff --git a/tests/testthat/test_createCompDbPackage.R b/tests/testthat/test_createCompDbPackage.R index c81dd55..508ac21 100644 --- a/tests/testthat/test_createCompDbPackage.R +++ b/tests/testthat/test_createCompDbPackage.R @@ -5,9 +5,9 @@ test_that(".simple_extract_compounds_sdf works", { datablock2ma(datablock(read.SDFset(hmdb)))) expect_true(is(cmps, "data.frame")) expect_true(is(cmps, "tbl")) - expect_equal(colnames(cmps), c("compound_id", "compound_name", "inchi", - "inchi_key", "formula", "mass", "synonyms", - "smiles")) + expect_equal(colnames(cmps), c("compound_id", "name", "inchi", + "inchikey", "formula", "exactmass", + "synonyms", "smiles")) expect_true(nrow(cmps) == 9) chebi <- system.file("sdf/ChEBI_sub.sdf.gz", package = "CompoundDb") @@ -15,9 +15,9 @@ test_that(".simple_extract_compounds_sdf works", { datablock2ma(datablock(read.SDFset(chebi)))) expect_true(is(cmps, "data.frame")) expect_true(is(cmps, "tbl")) - expect_equal(colnames(cmps), c("compound_id", "compound_name", "inchi", - "inchi_key", "formula", "mass", "synonyms", - "smiles")) + expect_equal(colnames(cmps), c("compound_id", "name", "inchi", + "inchikey", "formula", "exactmass", + "synonyms", "smiles")) expect_true(nrow(cmps) == 6) lm <- system.file("sdf/LipidMaps_sub.sdf.gz", package = "CompoundDb") @@ -25,9 +25,9 @@ test_that(".simple_extract_compounds_sdf works", { datablock2ma(datablock(read.SDFset(lm)))) expect_true(is(cmps, "data.frame")) expect_true(is(cmps, "tbl")) - expect_equal(colnames(cmps), c("compound_id", "compound_name", "inchi", - "inchi_key", "formula", "mass", "synonyms", - "smiles")) + expect_equal(colnames(cmps), c("compound_id", "name", "inchi", + "inchikey", "formula", "exactmass", + "synonyms", "smiles")) expect_true(nrow(cmps) == 7) pubchem <- system.file("sdf/PubChem_sub.sdf.gz", package = "CompoundDb") @@ -35,9 +35,9 @@ test_that(".simple_extract_compounds_sdf works", { datablock2ma(datablock(read.SDFset(pubchem)))) expect_true(is(cmps, "data.frame")) expect_true(is(cmps, "tbl")) - expect_equal(colnames(cmps), c("compound_id", "compound_name", "inchi", - "inchi_key", "formula", "mass", "synonyms", - "smiles")) + expect_equal(colnames(cmps), c("compound_id", "name", "inchi", + "inchikey", "formula", "exactmass", + "synonyms", "smiles")) expect_true(nrow(cmps) == 12) mona <- system.file("sdf/MoNa_export-All_Spectra_sub.sdf.gz", @@ -46,9 +46,9 @@ test_that(".simple_extract_compounds_sdf works", { datablock2ma(datablock(read.SDFset(mona)))) expect_true(is(cmps, "data.frame")) expect_true(is(cmps, "tbl")) - expect_equal(colnames(cmps), c("compound_id", "compound_name", "inchi", - "inchi_key", "formula", "mass", "synonyms", - "smiles")) + expect_equal(colnames(cmps), c("compound_id", "name", "inchi", + "inchikey", "formula", "exactmass", + "synonyms", "smiles")) expect_true(nrow(cmps) == 7) }) @@ -60,9 +60,9 @@ test_that("compound_tbl_sdf works", { cmps <- compound_tbl_sdf(hmdb) expect_true(is(cmps, "data.frame")) expect_true(is(cmps, "tbl")) - expect_equal(colnames(cmps), c("compound_id", "compound_name", "inchi", - "inchi_key", "formula", "mass", "synonyms", - "smiles")) + expect_equal(colnames(cmps), c("compound_id", "name", "inchi", + "inchikey", "formula", "exactmass", + "synonyms", "smiles")) expect_true(nrow(cmps) == 9) expect_true(is(cmps$synonyms, "list")) cmps <- compound_tbl_sdf(hmdb, collapse = "|") @@ -72,9 +72,9 @@ test_that("compound_tbl_sdf works", { cmps <- compound_tbl_sdf(chebi) expect_true(is(cmps, "data.frame")) expect_true(is(cmps, "tbl")) - expect_equal(colnames(cmps), c("compound_id", "compound_name", "inchi", - "inchi_key", "formula", "mass", "synonyms", - "smiles")) + expect_equal(colnames(cmps), c("compound_id", "name", "inchi", + "inchikey", "formula", "exactmass", + "synonyms", "smiles")) expect_true(nrow(cmps) == 6) expect_true(is(cmps$synonyms, "list")) cmps <- compound_tbl_sdf(chebi, collapse = "|") @@ -84,9 +84,9 @@ test_that("compound_tbl_sdf works", { cmps <- compound_tbl_sdf(lm) expect_true(is(cmps, "data.frame")) expect_true(is(cmps, "tbl")) - expect_equal(colnames(cmps), c("compound_id", "compound_name", "inchi", - "inchi_key", "formula", "mass", "synonyms", - "smiles")) + expect_equal(colnames(cmps), c("compound_id", "name", "inchi", + "inchikey", "formula", "exactmass", + "synonyms", "smiles")) expect_true(nrow(cmps) == 7) expect_true(is(cmps$synonyms, "list")) cmps <- compound_tbl_sdf(lm, collapse = "|") @@ -96,9 +96,9 @@ test_that("compound_tbl_sdf works", { cmps <- compound_tbl_sdf(pc) expect_true(is(cmps, "data.frame")) expect_true(is(cmps, "tbl")) - expect_equal(colnames(cmps), c("compound_id", "compound_name", "inchi", - "inchi_key", "formula", "mass", "synonyms", - "smiles")) + expect_equal(colnames(cmps), c("compound_id", "name", "inchi", + "inchikey", "formula", "exactmass", + "synonyms", "smiles")) expect_true(nrow(cmps) == 12) expect_true(is(cmps$synonyms, "list")) cmps <- compound_tbl_sdf(pc, collapse = "|") @@ -131,8 +131,9 @@ test_that(".import_lipidblast works", { cmps <- .import_lipidblast(fl) expect_true(is(cmps, "data.frame")) expect_true(is(cmps, "tbl")) - expect_equal(colnames(cmps), c("compound_id", "compound_name", "inchi", - "inchi_key", "formula", "mass", "synonyms")) + expect_equal(colnames(cmps), c("compound_id", "name", "inchi", + "inchikey", "formula", "exactmass", + "synonyms")) expect_true(nrow(cmps) == 8) }) @@ -144,8 +145,9 @@ test_that("compound_tbl_lipidblast works", { cmps <- compound_tbl_lipidblast(lb) expect_true(is(cmps, "data.frame")) expect_true(is(cmps, "tbl")) - expect_equal(colnames(cmps), c("compound_id", "compound_name", "inchi", - "inchi_key", "formula", "mass", "synonyms")) + expect_equal(colnames(cmps), c("compound_id", "name", "inchi", + "inchikey", "formula", "exactmass", + "synonyms")) expect_true(nrow(cmps) == 8) expect_true(is(cmps$synonyms, "character")) cmps <- compound_tbl_lipidblast(lb, collapse = ";") @@ -181,10 +183,10 @@ test_that(".db_file_from_metadata works", { }) test_that(".valid_compound works", { - cmps <- data.frame(compound_id = c("01", "02"), compound_name = c("a", "b"), - inchi = c("i1", "i2"), inchi_key = c("k1", "k2"), + cmps <- data.frame(compound_id = c("01", "02"), name = c("a", "b"), + inchi = c("i1", "i2"), inchikey = c("k1", "k2"), formula = c("some", "thing"), - mass = c(1, 3), synonyms = c("a", "b")) + exactmass = c(1, 3), synonyms = c("a", "b")) expect_true(.valid_compound(cmps, db = FALSE)) expect_true(.valid_compound(cmps[, 1:6])) expect_error(.valid_compound(cmps[, 1:6], db = FALSE)) @@ -195,7 +197,7 @@ test_that(".valid_compound works", { expect_true(is.character(.valid_compound("b", error = FALSE))) expect_error(.valid_compound(data.frame())) expect_error(.valid_compound(cmps[, 1:3])) - cmps$mass <- c("1", "2") + cmps$exactmass <- c("1", "2") expect_error(.valid_compound(cmps)) }) diff --git a/tests/testthat/test_query-engine.R b/tests/testthat/test_query-engine.R index 1bdf4ec..f4e59ae 100644 --- a/tests/testthat/test_query-engine.R +++ b/tests/testthat/test_query-engine.R @@ -90,11 +90,11 @@ test_that(".build_query_CompDb works", { "inchi from compound where compound.compound_id = 'a'")) res <- .build_query_CompDb( cmp_db, columns = c("compound_id", "inchi"), - filter = ~ compound_id == "a" | compound_name != "b") + filter = ~ compound_id == "a" | name != "b") expect_equal(res, paste0("select distinct compound.compound_id,compound.", - "inchi,compound.compound_name from compound ", + "inchi,compound.name from compound ", "where (compound.compound_id = 'a' or ", - "compound.compound_name != 'b')")) + "compound.name != 'b')")) expect_error(.build_query_CompDb( cmp_db, columns = c("compound_id", "inchi"), filter = ~ compound_id == "a" | gene_id != "b")) @@ -182,7 +182,7 @@ test_that(".join_tables works", { test_that(".reduce_tables_start_from works", { tabs <- list( - compound = c("compound_id", "compound_name", "red_field"), + compound = c("compound_id", "name", "red_field"), spectrum = c("spectrum_id", "compound_id"), other_tab = c("compound_id", "red_field")) res <- .reduce_tables_start_from(tabs, c("compound_id")) @@ -196,11 +196,11 @@ test_that(".reduce_tables_start_from works", { start_from = "spectrum") expect_equal(res, list(spectrum = "compound_id", compound = "red_field")) expect_warning(res <- .reduce_tables_start_from( - tabs, c("compound_name", "red_field"), + tabs, c("name", "red_field"), start_from = "spectrum")) - expect_equal(res, list(compound = c("compound_name", "red_field"))) + expect_equal(res, list(compound = c("name", "red_field"))) expect_error(.reduce_tables_start_from(tabs, - c("compound_name", "red_field"), + c("name", "red_field"), start_from = "spectrum_bla") ) }) @@ -228,7 +228,7 @@ test_that(".deserialize_mz_intensity works", { }) test_that(".fetch_data works", { - clmns <- c("compound_id", "compound_name", "inchi") + clmns <- c("compound_id", "name", "inchi") res <- .fetch_data(cmp_db, clmns) expect_true(is.data.frame(res)) expect_equal(colnames(res), clmns) @@ -241,15 +241,15 @@ test_that(".fetch_data works", { ## MS/MS spectra res <- .fetch_data(cmp_spctra_db, - columns = c("mz", "compound_name", "polarity")) + columns = c("mz", "name", "polarity")) expect_equal(colnames(res), c("polarity", "spectrum_id", "mz", - "compound_name")) + "name")) expect_true(is.numeric(res$mz[[1]])) res <- CompoundDb:::.fetch_data(cmp_spctra_db, - columns = c("compound_name", "spectrum_id", + columns = c("name", "spectrum_id", "compound_id"), filter = ~ compound_id == "HMDB0000001") - expect_equal(colnames(res), c("compound_name", "compound_id", + expect_equal(colnames(res), c("name", "compound_id", "spectrum_id")) expect_true(nrow(res) == 2) }) diff --git a/vignettes/create-compounddb.Rmd b/vignettes/create-compounddb.Rmd index 3073527..8d2d9d5 100644 --- a/vignettes/create-compounddb.Rmd +++ b/vignettes/create-compounddb.Rmd @@ -82,11 +82,11 @@ cmps The `tibble` contains columns - `compound_id`: the resource-specific ID of the compound. -- `compound_name`: the name of the compound, mostly a generic or common name. +- `name`: the name of the compound, mostly a generic or common name. - `inchi`: the compound's inchi. -- `inchi_key`: the INCHI key. +- `inchikey`: the INCHI key. - `formula`: the chemical formula of the compound. -- `mass`: the compounds (monoisotopic) mass. +- `exactmass`: the compounds (monoisotopic) mass. - `synonyms`: a `list` of aliases/synonyms for the compound. - `smiles`: the SMILES of the compound. @@ -158,7 +158,7 @@ tables(cmpdb) Below we extract only selected columns from the *compounds* table. ```{r compounds} -compounds(cmpdb, columns = c("compound_name", "formula", "mass")) +compounds(cmpdb, columns = c("name", "formula", "exactmass")) ``` Analogously we can use the `Spectra` function to extract spectrum data from the @@ -196,13 +196,13 @@ mz(sps)[[2]] Note that it is also possible to retrieve specific spectra, e.g. for a provided compound, or add compound annotations to the `Spectra` object. Below we use the filter expression `~ compound_id == "HMDB0000001"`to get only MS/MS spectra for -the specified compound. In addition we ask for the `"compound_name"` and -`"inchi_key"` of the compound. +the specified compound. In addition we ask for the `"name"` and +`"inchikey"` of the compound. ```{r spectra-selected} sps <- Spectra(cmpdb, filter = ~ compound_id == "HMDB0000001", - columns = c(tables(cmpdb)$msms_spectrum, "compound_name", - "inchi_key")) + columns = c(tables(cmpdb)$msms_spectrum, "name", + "inchikey")) sps ``` @@ -216,7 +216,7 @@ The compound's name and INCHI key have thus also been added as spectra variables: ```{r} -sps$inchi_key +sps$inchikey ``` To share or archive the such created `CompDb` database, we can also create a