diff --git a/DESCRIPTION b/DESCRIPTION index 6f710fa..bd34db2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -15,7 +15,8 @@ Description: CompoundDb provides functionality to create and use (chemical) LipidMaps, HMDB, ChEBI. Depends: R (>= 3.4), - methods + methods, + AnnotationFilter Imports: ChemmineR, tibble, @@ -36,6 +37,7 @@ biocViews: MassSpectrometry, Metabolomics VignetteBuilder: knitr RoxygenNote: 6.0.1 Collate: + 'AnnotationFilters.R' 'createCompDbPackage.R' 'CompDb.R' 'CompDb-methods.R' diff --git a/NAMESPACE b/NAMESPACE index 782affb..7bcc745 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,8 @@ # Generated by roxygen2: do not edit by hand export(CompDb) +export(CompoundIdFilter) +export(CompoundNameFilter) export(compound_tbl_lipidblast) export(compound_tbl_sdf) export(compounds) @@ -10,8 +12,15 @@ export(make_metadata) export(src_compdb) export(tables) exportClasses(CompDb) +exportClasses(CompoundIdFilter) +exportClasses(CompoundNameFilter) exportMethods(dbconn) exportMethods(show) +importClassesFrom(AnnotationFilter,AnnotationFilter) +importClassesFrom(AnnotationFilter,AnnotationFilterList) +importClassesFrom(AnnotationFilter,CharacterFilter) +importFrom(AnnotationFilter,AnnotationFilter) +importFrom(AnnotationFilter,AnnotationFilterList) importFrom(Biobase,createPackage) importFrom(ChemmineR,datablock) importFrom(ChemmineR,datablock2ma) @@ -33,4 +42,7 @@ importFrom(methods,show) importFrom(methods,validObject) importFrom(tibble,as_tibble) importFrom(tibble,data_frame) +importMethodsFrom(AnnotationFilter,condition) +importMethodsFrom(AnnotationFilter,logicOp) +importMethodsFrom(AnnotationFilter,value) importMethodsFrom(BiocGenerics,dbconn) diff --git a/R/AnnotationFilters.R b/R/AnnotationFilters.R new file mode 100644 index 0000000..7337fdb --- /dev/null +++ b/R/AnnotationFilters.R @@ -0,0 +1,273 @@ +#' @title Filters supported by CompDb +#' +#' @description +#' +#' A variety of different filters can be applied to the `CompDb` object to +#' retrieve only subsets of the data. These filters extend the +#' [AnnotationFilter::AnnotationFilter] class and support the filtering concepts +#' introduced by Bioconductor's `AnnotationFilter` package. +#' +#' The supported filters are: +#' - `CompoundIdFilter`: filter based on the compound ID. +#' - `CompoundNameFilter`: filter based on the compound name. +#' +#' @param value The value for the filter. For details see +#' [AnnotationFilter::AnnotationFilter()]. +#' +#' @param condition The condition for the filter. For details see +#' [AnnotationFilter::AnnotationFilter()]. +#' +#' @author Johannes Rainer +#' +#' @md +#' +#' @name Filter-classes +#' +#' @seealso [supportedFilters()] for the method to list all supported filters +#' for a `CompDb` object. +#' +#' @examples +#' library(CompoundDb) +#' +#' ## Create a filter for the compound id +#' cf <- CompoundIdFilter("comp_a") +#' cf +#' +#' ## Create a filter using a formula expression +#' AnnotationFilter(~ compound_id == "comp_b") +#' +#' ## Combine filters +#' AnnotationFilterList(CompoundIdFilter("a"), CompoundNameFilter("b")) +#' +#' ## Using a formula expression +#' AnnotationFilter(~ compound_id == "a" | compound_name != "b") +NULL + +#' @importClassesFrom AnnotationFilter CharacterFilter AnnotationFilter +#' +#' @exportClass CompoundIdFilter +#' +#' @rdname Filter-classes +setClass("CompoundIdFilter", contains = "CharacterFilter", + prototype = list( + condition = "==", + value = "", + field = "compound_id" + )) +#' @export CompoundIdFilter +#' +#' @rdname Filter-classes +CompoundIdFilter <- function(value, condition = "==") { + new("CompoundIdFilter", value = as.character(value), condition = condition) +} + +#' @exportClass CompoundNameFilter +#' +#' @rdname Filter-classes +setClass("CompoundNameFilter", contains = "CharacterFilter", + prototype = list( + condition = "==", + value = "", + field = "compound_name" + )) +#' @export CompoundNameFilter +#' +#' @rdname Filter-classes +CompoundNameFilter <- function(value, condition = "==") { + new("CompoundNameFilter", value = as.character(value), + condition = condition) +} + +#' @description Returns the field (database column name) for the provided +#' `AnnotationFilter`. Returns by default the value from `@field` but can +#' be overwritten if the name differs. +#' +#' @importClassesFrom AnnotationFilter AnnotationFilterList +#' +#' @author Johannes Rainer +#' +#' @md +#' +#' @noRd +.field <- function(x) { + if (is(x, "AnnotationFilterList")) + unlist(lapply(x, .field), use.names = FALSE) + else x@field +} + +#' @description Utility function to map the condition of an AnnotationFilter +#' condition to SQL. +#' +#' @param x `AnnotationFilter`. +#' +#' @return A `character(1)` representing the condition for the SQL call. +#' +#' @importMethodsFrom AnnotationFilter condition value +#' +#' @author Johannes Rainer +#' +#' @md +#' +#' @noRd +.sql_condition <- function(x) { + cond <- condition(x) + if (length(unique(value(x))) > 1) { + if (cond == "==") + cond <- "in" + if (cond == "!=") + cond <- "not in" + } + if (cond == "==") + cond <- "=" + if (cond %in% c("startsWith", "endsWith", "contains")) + cond <- "like" + cond +} + +#' @description Single quote character values, paste multiple values and enclose +#' in quotes. +#' +#' @param x `AnnotationFilter`. +#' +#' @author Johannes Rainer +#' +#' @md +#' +#' @noRd +.sql_value <- function(x) { + vals <- unique(value(x)) + if (is(x, "CharacterFilter")) { + vals <- paste0("'", + gsub(unique(vals), pattern = "'", replacement = "''"), + "'") + } + if (length(vals) > 1) + vals <- paste0("(", paste0(vals, collapse = ","), ")") + ## Process the like/startsWith/endsWith + if (condition(x) == "startsWith") + vals <- paste0("'", unique(x@value), "%'") + if (condition(x) == "endsWith") + vals <- paste0("'%", unique(x@value), "'") + if (condition(x) == "contains") + vals <- paste0("'%", unique(x@value), "%'") + vals +} + +#' @description Get the logical operator(s) combining `AnnotationFilter` objects +#' in an `AnnotationFilterList` in SQL format. +#' +#' @param x `AnnotationFilterList` +#' +#' @return `character` with the logical operator(s) in SQL format. +#' +#' @importMethodsFrom AnnotationFilter logicOp +#' +#' @author Johannes Rainer +#' +#' @md +#' +#' @noRd +.sql_logicOp <- function(x) { + vapply(logicOp(x), FUN = function(z) { + if (z == "&") + "and" + else "or" + }, FUN.VALUE = "", USE.NAMES = FALSE) +} + +#' @description Build the where condition from an `AnnotationFilter` or +#' `AnnotationFilterList`. +#' +#' @details The function recursively calls itself if `x` is an +#' `AnnotationFilterList`. +#' @param x `AnnotationFilter` or `AnnotationFilterList`. +#' +#' @return `character(1)` with the *where* condition for a given filter (without +#' `"where"`). +#' +#' @author Johannes Rainer +#' +#' @md +#' +#' @noRd +.where_filter <- function(x) { + if (is(x, "AnnotationFilter")) + paste(.field(x), .sql_condition(x), .sql_value(x)) + else { + whrs <- lapply(x, .where_filter) + log_ops <- .sql_logicOp(x) + res <- whrs[[1]] + if (length(x) > 1) { + ## Combine the elements with the logOp and encapsulate them in () + for (i in 2:length(x)) { + res <- paste(res, log_ops[i-1], whrs[[i]]) + } + res <- paste0("(", res, ")") + } else + res <- whrs[[1]] + res + } +} + +#' @description Process the 'filter' input parameter to ensure that the expected +#' type of objects is provided, the submitted filters are supported by the +#' databse and the result is an `AnnotationFilterList`. +#' +#' @return `AnnotationFilterList` +#' +#' @importFrom AnnotationFilter AnnotationFilterList AnnotationFilter +#' +#' @author Johannes Rainer +#' +#' @md +#' +#' @noRd +.process_filter <- function(x) { + if (is(x, "formula")) + x <- AnnotationFilter(x) + if (is(x, "AnnotationFilter")) + x <- AnnotationFilterList(x) + if (!is(x, "AnnotationFilterList")) + stop("'filter' has to be an object excending 'AnnotationFilter', an ", + "'AnnotationFilterList' or a valid filter expression") + supp_flts <- .supported_filters() + have_flts <- .filter_class(x) + got_it <- have_flts %in% supp_flts$filter + if (any(!got_it)) + stop("Filter(s) ", paste(have_flts[!got_it]), " are not supported") + x +} + + +#' @description List supported filters for the database. +#' +#' @param x `BioCHRIStes` +#' +#' @author Johannes Rainer +#' +#' @md +#' +#' @noRd +.supported_filters <- function(x) { + df <- data.frame(filter = c("CompoundIdFilter", + "CompoundNameFilter"), + field = c("compound_id", + "compound_name"), + stringsAsFactors = FALSE) + df[order(df$filter), ] +} + +#' @description Get an `AnnotationFilter` clss name. +#' +#' @param x `AnnotationFilterList` or `AnnotationFilter`. +#' +#' @author Johannes Rainer +#' +#' @md +#' +#' @noRd +.filter_class <- function(x) { + if (is(x, "AnnotationFilterList")) + unlist(lapply(x, .filter_class)) + else class(x)[1] +} diff --git a/R/query-engine.R b/R/query-engine.R index 8d6a7e9..5836eab 100644 --- a/R/query-engine.R +++ b/R/query-engine.R @@ -18,23 +18,28 @@ #' @md #' #' @noRd -.build_query_CompDb <- function(x, columns, filter) { +.build_query_CompDb <- function(x, columns, filter, order) { if (missing(x)) stop("'x' is required") if (missing(columns)) stop("'columns' is required") - if (!missing(filter)) - stop("Not implemented yet") tbls <- .tables(x) col_ok <- columns %in% unique(unlist(tbls, use.names = FALSE)) if (!all(col_ok)) stop("Columns ", paste0(columns[!col_ok], collapse = ", "), " are not present in the database. Use 'tables' to list ", "all tables and their columns.") + ## Depending on 'filter' we might have to add some more tables/columns! + if (!missing(filter)) { + filter <- .process_filter(filter) + columns_flts <- .field(filter) + columns <- unique(c(columns, columns_flts)) + } + ## By default we will return also the filter columns! columns_tbl <- .reduce_tables(tbls, columns) paste0(.select(unlist(.prefix_columns(columns_tbl), use.names = FALSE)), .from(names(columns_tbl)), - .where(filter)) + .where(filter), .order(order)) } #' @description @@ -46,10 +51,13 @@ #' @md #' #' @noRd -.select <- function(columns) { +.select <- function(columns, distinct = TRUE) { if (missing(columns)) stop("No columns provided") - paste0("select ", paste0(columns, collapse = ",")) + if (distinct) + dst <- "distinct " + else dst <- "" + paste0("select ", dst, paste0(columns, collapse = ",")) } #' @description @@ -81,6 +89,7 @@ "synonym.compound_id)"), "left outer join") ) + x <- .add_join_tables(x) q <- x[1] tbls_used <- x[1] tbls <- x[-1] @@ -100,6 +109,28 @@ q } +#' @description Helper function to add additional tables required to join the +#' provided tables. +#' +#' @note This function uses some hard-coded logic based on the database layout +#' to define if, and which, tables are needed for a join. +#' +#' @param x `character` with the names of the tables to be joined. +#' +#' @return `character` with all tables needed to join the tables in `x` +#' (contain `x` plus eventually required additional tables). +#' +#' @md +#' +#' @author Johannes Rainer +#' +#' @noRd +.add_join_tables <- function(x) { + ## So far we don't have to add anything here. + unique(x) +} + + #' @description #' #' Create the *where* condition for the SQL query based on the provided filter. @@ -110,10 +141,24 @@ #' #' @noRd .where <- function(filter) { - if (!missing(filter)) { - stop("Filtering is not yet implemented") - } - NULL + if (!missing(filter)) + paste0(" where ", .where_filter(filter)) + else NULL +} + +#' @description +#' +#' Add a order statement. Thus far we are not testing/checking for correctness. +#' +#' @author Johannes Rainer +#' +#' @md +#' +#' @noRd +.order <- function(order) { + if (!missing(order)) + paste0(" order by ", order) + else NULL } #' @description diff --git a/inst/NEWS b/inst/NEWS index f4c05f8..3e5ff6d 100644 --- a/inst/NEWS +++ b/inst/NEWS @@ -1,7 +1,7 @@ Changes in version 0.0.3 ------------------------ -- Enable parallel processing in `createCompDb`. +- Add CompoundIdFilter and CompoundNameFilter classes and filtering framework. Changes in version 0.0.2 diff --git a/man/Filter-classes.Rd b/man/Filter-classes.Rd new file mode 100644 index 0000000..f9822b7 --- /dev/null +++ b/man/Filter-classes.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AnnotationFilters.R +\docType{class} +\name{Filter-classes} +\alias{Filter-classes} +\alias{CompoundIdFilter-class} +\alias{CompoundIdFilter} +\alias{CompoundNameFilter-class} +\alias{CompoundNameFilter} +\title{Filters supported by CompDb} +\usage{ +CompoundIdFilter(value, condition = "==") + +CompoundNameFilter(value, condition = "==") +} +\arguments{ +\item{value}{The value for the filter. For details see +\code{\link[AnnotationFilter:AnnotationFilter]{AnnotationFilter::AnnotationFilter()}}.} + +\item{condition}{The condition for the filter. For details see +\code{\link[AnnotationFilter:AnnotationFilter]{AnnotationFilter::AnnotationFilter()}}.} +} +\description{ +A variety of different filters can be applied to the \code{CompDb} object to +retrieve only subsets of the data. These filters extend the +\link[AnnotationFilter:AnnotationFilter]{AnnotationFilter::AnnotationFilter} class and support the filtering concepts +introduced by Bioconductor's \code{AnnotationFilter} package. + +The supported filters are: +\itemize{ +\item \code{CompoundIdFilter}: filter based on the compound ID. +\item \code{CompoundNameFilter}: filter based on the compound name. +} +} +\examples{ +library(CompoundDb) + +## Create a filter for the compound id +cf <- CompoundIdFilter("comp_a") +cf + +## Create a filter using a formula expression +AnnotationFilter(~ compound_id == "comp_b") + +## Combine filters +AnnotationFilterList(CompoundIdFilter("a"), CompoundNameFilter("b")) + +## Using a formula expression +AnnotationFilter(~ compound_id == "a" | compound_name != "b") +} +\seealso{ +\code{\link[=supportedFilters]{supportedFilters()}} for the method to list all supported filters +for a \code{CompDb} object. +} +\author{ +Johannes Rainer +} diff --git a/tests/testthat/test_AnnotationFilters.R b/tests/testthat/test_AnnotationFilters.R new file mode 100644 index 0000000..7460247 --- /dev/null +++ b/tests/testthat/test_AnnotationFilters.R @@ -0,0 +1,105 @@ +test_that("CompoundIdFilter, .field, .sql_condition, sql_value work", { + fl <- CompoundIdFilter("samid") + expect_true(is(fl, "CompoundIdFilter")) + expect_true(is(fl, "CharacterFilter")) + expect_true(is(fl, "AnnotationFilter")) + + expect_error(CompoundIdFilter()) + + expect_equal(.field(fl), "compound_id") + expect_equal(.sql_condition(fl), "=") + expect_equal(.sql_value(fl), "'samid'") +}) + +test_that("CompoundNameFilter works", { + fl <- CompoundNameFilter("a") + expect_true(is(fl, "CompoundNameFilter")) + expect_true(is(fl, "CharacterFilter")) + expect_true(is(fl, "AnnotationFilter")) + + expect_error(CompoundNameFilter()) + + expect_equal(.field(fl), "compound_name") + expect_equal(.sql_condition(fl), "=") + expect_equal(.sql_value(fl), "'a'") +}) + +test_that(".field works", { + library(AnnotationFilter) + gif <- GeneIdFilter("a") + sf <- SymbolFilter("b") + tif <- TxIdFilter("c") + expect_equal(.field(gif), "gene_id") + expect_equal(.field(AnnotationFilterList(gif)), "gene_id") + expect_equal(.field(AnnotationFilterList(gif, tif)), c("gene_id", "tx_id")) + expect_equal(.field( + AnnotationFilterList(gif, AnnotationFilterList(tif, sf))), + c("gene_id", "tx_id", "symbol")) +}) + +test_that(".process_filter works", { + library(AnnotationFilter) + gif <- GeneIdFilter("a") + fl <- CompoundIdFilter("d") + + expect_error(.process_filter("3")) + expect_error(.process_filter(gif)) + expect_error(.process_filter(AnnotationFilterList(gif, fl))) + + expect_equal(.process_filter(fl), AnnotationFilterList(fl)) + expect_equal(.process_filter(~compound_id == "d"), AnnotationFilterList(fl)) +}) + +test_that(".sql_condition works", { + fl <- CompoundIdFilter("a") + expect_equal(.sql_condition(fl), "=") + fl <- CompoundIdFilter("a", "!=") + expect_equal(.sql_condition(fl), "!=") + fl <- CompoundIdFilter(c("a", "b"), "!=") + expect_equal(.sql_condition(fl), "not in") + fl <- CompoundIdFilter(c("a", "b"), "==") + expect_equal(.sql_condition(fl), "in") + fl <- CompoundIdFilter("a", "startsWith") + expect_equal(.sql_condition(fl), "like") +}) + +test_that(".sql_value works", { + fl <- CompoundIdFilter("a") + expect_equal(.sql_value(fl), "'a'") + fl <- CompoundIdFilter(c("a", "b")) + expect_equal(.sql_value(fl), "('a','b')") + fl <- CompoundIdFilter("a", condition = "startsWith") + expect_equal(.sql_value(fl), "'a%'") + fl <- CompoundIdFilter("a", condition = "endsWith") + expect_equal(.sql_value(fl), "'%a'") + fl <- CompoundIdFilter("a", condition = "contains") + expect_equal(.sql_value(fl), "'%a%'") +}) + +test_that(".sql_logicOp works", { + afl <- AnnotationFilter(~ compound_id == "a" & compound_name == "2323434") + expect_equal(.sql_logicOp(afl), "and") + afl <- AnnotationFilter(~ compound_id == "a" | compound_name == "2323434") + expect_equal(.sql_logicOp(afl), "or") + afl <- AnnotationFilter(~ compound_id == "a" & compound_name == "2323434" | + gene_id == "123") + expect_equal(.sql_logicOp(afl), c("and", "or")) +}) + +test_that(".where_filter works", { + fl <- CompoundIdFilter("5") + afl <- AnnotationFilter(~ compound_id == "a" & compound_name == "1") + expect_equal(.where_filter(fl), "compound_id = '5'") + expect_equal(.where_filter(afl), + "(compound_id = 'a' and compound_name = '1')") + afl_2 <- AnnotationFilterList(fl, afl, logicOp = "|") + expect_equal(.where_filter(afl_2), + paste0("(compound_id = '5' or (compound_id =", + " 'a' and compound_name = '1'))")) + afl_2 <- AnnotationFilterList(afl_2, afl, logicOp = "&") + expect_equal(.where_filter(afl_2), + paste0("((compound_id = '5' or (compound_id", + " = 'a' and compound_name = '1')", + ") and (compound_id = 'a' and ", + "compound_name = '1'))")) +}) diff --git a/tests/testthat/test_CompDb.R b/tests/testthat/test_CompDb.R index 69e3a13..2150916 100644 --- a/tests/testthat/test_CompDb.R +++ b/tests/testthat/test_CompDb.R @@ -52,6 +52,9 @@ test_that("compounds works", { expect_equal(colnames(cmps_tbl), c("compound_id", "compound_name")) expect_error(compounds(cmp_db, filter = "something")) + + expect_true( + nrow(compounds(cmp_db, filter = ~ compound_id == "HMDB0000005")) == 1) }) test_that("src_compound works", { diff --git a/tests/testthat/test_query-engine.R b/tests/testthat/test_query-engine.R index 3982194..8695459 100644 --- a/tests/testthat/test_query-engine.R +++ b/tests/testthat/test_query-engine.R @@ -29,6 +29,21 @@ test_that(".prefix_columns works", { b = c("b.d", "b.e"))) }) +test_that(".add_join_tables works", { + expect_equal(.add_join_tables(c("a", "b")), c("a", "b")) +}) + +test_that(".where works", { + expect_equal(.where(), NULL) + expect_error(.where("something")) + expect_equal(.where(CompoundIdFilter("a")), " where compound_id = 'a'") +}) + +test_that(".order works", { + expect_equal(.order(), NULL) + expect_equal(.order("a"), " order by a") +}) + test_that(".from works", { expect_equal(.from("compound"), " from compound") expect_error(.from(c("a", "b"))) @@ -46,16 +61,37 @@ test_that(".where works", { }) test_that(".select works", { - expect_equal(.select(c("a", "b")), "select a,b") + expect_equal(.select(c("a", "b"), distinct = FALSE), "select a,b") + expect_equal(.select(c("a", "b")), "select distinct a,b") expect_error(.select()) }) test_that(".build_query_CompDb works", { res <- .build_query_CompDb( cmp_db, columns = c("compound_id", "inchi")) - expect_equal(res, "select compound.compound_id,compound.inchi from compound") + expect_equal(res, paste0("select distinct compound.compound_id,compound.", + "inchi from compound")) expect_error(.build_query_CompDb( cmp_db, columns = c("od", "inchi"))) + res <- .build_query_CompDb( + cmp_db, columns = c("compound_id", "inchi"), order = "something") + expect_equal(res, paste0("select distinct compound.compound_id,compound.", + "inchi from compound order by something")) + res <- .build_query_CompDb( + cmp_db, columns = c("compound_id", "inchi"), + filter = ~ compound_id == "a") + expect_equal(res, paste0("select distinct compound.compound_id,compound.", + "inchi from compound where compound_id = 'a'")) + res <- .build_query_CompDb( + cmp_db, columns = c("compound_id", "inchi"), + filter = ~ compound_id == "a" | compound_name != "b") + expect_equal(res, paste0("select distinct compound.compound_id,compound.", + "inchi,compound.compound_name from compound ", + "where (compound_id = 'a' or compound_name ", + "!= 'b')")) + expect_error(.build_query_CompDb( + cmp_db, columns = c("compound_id", "inchi"), + filter = ~ compound_id == "a" | gene_id != "b")) }) test_that(".join_tables works", {