Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[r] add row/col max stat functions #103

Merged
merged 5 commits into from
Jul 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions r/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ export(chrNames)
export(cluster_graph_leiden)
export(cluster_graph_louvain)
export(cluster_graph_seurat)
export(colMaxs)
export(colMaxs.IterableMatrix)
export(colMaxs.default)
export(colMeans)
export(colSums)
export(colVars)
Expand Down Expand Up @@ -84,6 +87,9 @@ export(read_gencode_transcripts)
export(read_gtf)
export(read_ucsc_chrom_sizes)
export(rotate_x_labels)
export(rowMaxs)
export(rowMaxs.IterableMatrix)
export(rowMaxs.default)
export(rowMeans)
export(rowSums)
export(rowVars)
Expand Down
3 changes: 3 additions & 0 deletions r/NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ Contributions welcome :)
- `apply_by_col()` and `apply_by_row()` allow providing custom R functions to compute per row/col summaries.
In initial tests calculating row/col means using R functions is ~2x slower than the C++-based implementation but memory
usage remains low.
- Add `rowMaxs()` and `colMaxs()` functions, which return the maximum value in each row or column of a matrix.
If `matrixStats` or `MatrixGenerics` packages are installed, `BPCells::rowMaxs()` will fall back to their implementations for non-BPCells objects.
Thanks to @immanuelazn for their first contribution as a new lab hire!

## Bug-fixes
- Fixed error message when a matrix is too large to be converted to dgCMatrix (Thanks to @RookieA1 for reporting issue #95)
Expand Down
8 changes: 8 additions & 0 deletions r/R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -729,6 +729,14 @@ apply_matrix_double_cpp <- function(mat_sexp, f, row_major) {
.Call(`_BPCells_apply_matrix_double_cpp`, mat_sexp, f, row_major)
}

matrix_max_per_row_cpp <- function(matrix) {
.Call(`_BPCells_matrix_max_per_row_cpp`, matrix)
}

matrix_max_per_col_cpp <- function(matrix) {
.Call(`_BPCells_matrix_max_per_col_cpp`, matrix)
}

matrix_identical_uint32_t_cpp <- function(mat1, mat2) {
.Call(`_BPCells_matrix_identical_uint32_t_cpp`, mat1, mat2)
}
Expand Down
73 changes: 72 additions & 1 deletion r/R/matrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ setClass("IterableMatrix",
)
)


#' Construct an S4 matrix object wrapping another matrix object
#'
#' Helps to avoid duplicate storage of dimnames
Expand Down Expand Up @@ -647,6 +646,78 @@ rlang::on_load({
}
})

#' Get the max of each row in an iterable matrix
#' @param x IterableMatrix object/dgCMatrix object
#' @return * `rowMaxs()`: vector of maxes for every row
#' @describeIn IterableMatrix-methods Calculate rowMaxs (replacement for `matrixStats::rowMaxs()`)
#' @export
rowMaxs <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ...) UseMethod("rowMaxs")
#' @export
rowMaxs.default <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ...) {
if (requireNamespace("MatrixGenerics", quietly = TRUE)) {
MatrixGenerics::rowMaxs(x, rows = rows, cols = cols, na.rm = na.rm, ...)
} else if (requireNamespace("matrixStats", quietly = TRUE)) {
matrixStats::rowMaxs(x, rows = rows, cols = cols, na.rm = na.rm, ...)
}
else {
stop("Can't run rowMaxs on a non-BPCells object unless MatrixGenerics or matrixStats are installed.")
}
}
#' @export
rowMaxs.IterableMatrix <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ...) {
if(!is.null(rows) || !is.null(cols) || !isFALSE(na.rm)) {
stop("rowMaxs(IterableMatrix) doesn't support extra arguments rows, cols, or na.rm")
}
iter <- iterate_matrix(convert_matrix_type(x, "double"))
if(x@transpose == TRUE) {
res <- matrix_max_per_col_cpp(iter)
} else {
res <- matrix_max_per_row_cpp(iter)
}
names(res) <- rownames(x)
return(res)
}
rlang::on_load({
if (requireNamespace("MatrixGenerics", quietly=TRUE)) {
setMethod(MatrixGenerics::rowMaxs, "IterableMatrix", rowMaxs.IterableMatrix)
}
})

#' Get the max of each col in an interable matrix
#' @param x IterableMatrix/dgCMatrix object
#' @return * `colMaxs()`: vector of column maxes
#' @describeIn IterableMatrix-methods Calculate colMax (replacement for `matrixStats::colMax()`)
#' @export
colMaxs <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ...) UseMethod("colMaxs")
#' @export
colMaxs.default <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ...) {
if (requireNamespace("MatrixGenerics", quietly = TRUE)) {
MatrixGenerics::colMaxs(x, rows = rows, cols = cols, na.rm = na.rm, ...)
} else if (requireNamespace("matrixStats", quietly = TRUE)) {
matrixStats::colMaxs(x, rows = rows, cols = cols, na.rm = na.rm, ...)
}
else {
stop("Can't run colMaxs on a non-BPCells object unless MatrixGenerics or matrixStats are installed.")
}
}
#' @export
colMaxs.IterableMatrix <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ...) {
iter <- iterate_matrix(convert_matrix_type(x, "double"))
if(x@transpose == TRUE) {
res <- matrix_max_per_row_cpp(iter)
} else {
res <- matrix_max_per_col_cpp(iter)
}
names(res) <- colnames(x)
return(res)
}
rlang::on_load({
if (requireNamespace("MatrixGenerics", quietly=TRUE)) {
setMethod(MatrixGenerics::colMaxs, "IterableMatrix", colMaxs.IterableMatrix)
}
})


# Index subsetting
setClass("MatrixSubset",
contains = "IterableMatrix",
Expand Down
20 changes: 19 additions & 1 deletion r/man/IterableMatrix-methods.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 24 additions & 0 deletions r/src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2418,6 +2418,28 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// matrix_max_per_row_cpp
NumericVector matrix_max_per_row_cpp(SEXP matrix);
RcppExport SEXP _BPCells_matrix_max_per_row_cpp(SEXP matrixSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< SEXP >::type matrix(matrixSEXP);
rcpp_result_gen = Rcpp::wrap(matrix_max_per_row_cpp(matrix));
return rcpp_result_gen;
END_RCPP
}
// matrix_max_per_col_cpp
NumericVector matrix_max_per_col_cpp(SEXP matrix);
RcppExport SEXP _BPCells_matrix_max_per_col_cpp(SEXP matrixSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< SEXP >::type matrix(matrixSEXP);
rcpp_result_gen = Rcpp::wrap(matrix_max_per_col_cpp(matrix));
return rcpp_result_gen;
END_RCPP
}
// matrix_identical_uint32_t_cpp
bool matrix_identical_uint32_t_cpp(SEXP mat1, SEXP mat2);
RcppExport SEXP _BPCells_matrix_identical_uint32_t_cpp(SEXP mat1SEXP, SEXP mat2SEXP) {
Expand Down Expand Up @@ -2614,6 +2636,8 @@ static const R_CallMethodDef CallEntries[] = {
{"_BPCells_matrix_value_histogram_cpp", (DL_FUNC) &_BPCells_matrix_value_histogram_cpp, 2},
{"_BPCells_checksum_double_cpp", (DL_FUNC) &_BPCells_checksum_double_cpp, 1},
{"_BPCells_apply_matrix_double_cpp", (DL_FUNC) &_BPCells_apply_matrix_double_cpp, 3},
{"_BPCells_matrix_max_per_row_cpp", (DL_FUNC) &_BPCells_matrix_max_per_row_cpp, 1},
{"_BPCells_matrix_max_per_col_cpp", (DL_FUNC) &_BPCells_matrix_max_per_col_cpp, 1},
{"_BPCells_matrix_identical_uint32_t_cpp", (DL_FUNC) &_BPCells_matrix_identical_uint32_t_cpp, 2},
{NULL, NULL, 0}
};
Expand Down
47 changes: 46 additions & 1 deletion r/src/matrix_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
#include "bpcells-cpp/matrixIterators/SVD.h"
#include "bpcells-cpp/matrixIterators/TSparseMatrixWriter.h"
#include "bpcells-cpp/matrixUtils/WilcoxonRankSum.h"

#include "R_array_io.h"
#include "R_interrupts.h"
#include "R_xptr_wrapper.h"
Expand Down Expand Up @@ -624,6 +623,52 @@ List apply_matrix_double_cpp(SEXP mat_sexp, Function f, bool row_major) {
return ret;
}

// Compute the max of each row
// [[Rcpp::export]]
NumericVector matrix_max_per_row_cpp(SEXP matrix) {
MatrixIterator<double> it(take_unique_xptr<MatrixLoader<double>>(matrix));
std::vector<double> result(it.rows());
std::vector<uint32_t> row_count(it.rows(), 0);
std::fill(result.begin(), result.end(), -INFINITY);
// keep track of the number of times we've seen each row
while (it.nextCol()) {
while (it.nextValue()) {
result[it.row()] = std::max(result[it.row()], it.val());
row_count[it.row()]++;
}}
// If we've seen a row less than the num of cols, we know there's at least one 0
for (size_t i = 0; i < it.rows(); i++) {
if (row_count[i] < it.cols()) {
result[i] = std::max(0.0, result[i]);
}
}
return Rcpp::wrap(result);
}


// Compute the max of each col
// [[Rcpp::export]]
NumericVector matrix_max_per_col_cpp(SEXP matrix) {
MatrixIterator<double> it(take_unique_xptr<MatrixLoader<double>>(matrix));
std::vector<double> result(it.cols(), -INFINITY);
// std::fill(result.begin(), result.end(), 0);
// keep track of the number of times we've seen each col
std::vector<uint32_t> col_count(it.cols(), 0);
while (it.nextCol()) {
while (it.nextValue()) {
result[it.col()] = std::max(result[it.col()], it.val());
col_count[it.col()]++;
}}
// If we've seen a col less than the num of rows, we know there's at least one 0
for (size_t i = 0; i < it.cols(); i++) {
if (col_count[i] < it.rows()) {
result[i] = std::max(0.0, result[i]);
}
}
return Rcpp::wrap(result);
}


// [[Rcpp::export]]
bool matrix_identical_uint32_t_cpp(SEXP mat1, SEXP mat2) {
MatrixIterator<uint32_t> i1(take_unique_xptr<MatrixLoader<uint32_t>>(mat1));
Expand Down
22 changes: 21 additions & 1 deletion r/tests/testthat/test-matrix_stats.R
Original file line number Diff line number Diff line change
Expand Up @@ -187,4 +187,24 @@ test_that("svds registers generic with RSpectra", {
# Ensure RSpectra works with BPCells and BPCells works with RSpectra
equal_svds(ans, RSpectra::svds(i1, k=5))
equal_svds(ans, BPCells::svds(m1, k=5))
})
})

test_that("rowMaxs and colMaxs works comprehensive", {
withr::local_seed(195123)
m1_neg <- matrix(runif(12, min = -10, max = -1), nrow = 3, ncol = 4)
m2_dense <- generate_dense_matrix(4, 64)
# Make a sparse matrix with half positive and half negative values
m3_sparse <- generate_sparse_matrix(4, 512)
m3_sparse@x <- m3_sparse@x * sample(c(-1,1), length(m3_sparse@x), replace=TRUE)

m_test_cases <- list(m1_neg, m2_dense, m3_sparse)
for (m in m_test_cases) {
i_transpose <- t(m) %>% as("dgCMatrix") %>% as("IterableMatrix")
i <- m %>% as("dgCMatrix") %>% as("IterableMatrix")
m_tranpose_cases <- list(i_transpose, t(i_transpose), i, t(i))
for (i in m_tranpose_cases) {
expect_identical(rowMaxs(i), matrixStats::rowMaxs(as.matrix(i)))
expect_identical(colMaxs(i), matrixStats::colMaxs(as.matrix(i)))
}
}
})