Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[r] add row/col max stat functions #103

Merged
merged 5 commits into from
Jul 9, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions r/NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Contributions welcome :)
- `apply_by_col()` and `apply_by_row()` allow providing custom R functions to compute per row/col summaries.
In initial tests calculating row/col means using R functions is ~2x slower than the C++-based implementation but memory
usage remains low.
- Add `row_max()` and `colMaxs()` functions, which return the maximum value in each row or column of a matrix. If `matrixStats` or `MatrixGenerics` packages are installed, `BPCells::row_max()` will fall back to their implementations for non-BPCells objects.
bnprks marked this conversation as resolved.
Show resolved Hide resolved

## Bug-fixes
- Fixed error message when a matrix is too large to be converted to dgCMatrix (Thanks to @RookieA1 for reporting issue #95)
Expand Down
8 changes: 8 additions & 0 deletions r/R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -729,6 +729,14 @@ apply_matrix_double_cpp <- function(mat_sexp, f, row_major) {
.Call(`_BPCells_apply_matrix_double_cpp`, mat_sexp, f, row_major)
}

matrix_max_per_row_cpp <- function(matrix) {
.Call(`_BPCells_matrix_max_per_row_cpp`, matrix)
}

matrix_max_per_col_cpp <- function(matrix) {
.Call(`_BPCells_matrix_max_per_col_cpp`, matrix)
}

matrix_identical_uint32_t_cpp <- function(mat1, mat2) {
.Call(`_BPCells_matrix_identical_uint32_t_cpp`, mat1, mat2)
}
Expand Down
71 changes: 70 additions & 1 deletion r/R/matrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ setClass("IterableMatrix",
)
)


#' Construct an S4 matrix object wrapping another matrix object
#'
#' Helps to avoid duplicate storage of dimnames
Expand Down Expand Up @@ -158,6 +157,76 @@ all_matrix_inputs <- function(x) {
x
}

#' Get the max of each row in an iterable matrix
#' @param x IterableMatrix object/dgCMatrix object
#' @return *vector of maxes for every row
#' @describeIn IterableMatrix-methods Calculate rowMax (replacement for `matrixStats::rowMax()`)
bnprks marked this conversation as resolved.
Show resolved Hide resolved
#' @export
rowMaxs <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ...) UseMethod("rowMaxs")
#' @export
rowMaxs.default <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ...) {
if (requireNamespace("MatrixGenerics", quietly = TRUE)) {
MatrixGenerics::rowMaxs(x, rows = rows, cols = cols, na.rm = na.rm, ...)
} else if (requireNamespace("matrixStats", quietly = TRUE)) {
matrixStats::rowMaxs(x, rows = rows, cols = cols, na.rm = na.rm, ...)
}
else {
stop("Can't run rowMaxs on a non-BPCells object unless MatrixGenerics or matrixStats are installed.")
}
}
#' @export
rowMaxs.IterableMatrix <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ...) {
if(!is.null(rows) || !is.null(cols) || !isFALSE(na.rm)) {
stop("rowMaxs(IterableMatrix) doesn't support extra arguments rows, cols, or na.rm")
}
iter <- iterate_matrix(convert_matrix_type(x, "double"))
if(x@transpose == TRUE) {
res <- matrix_max_per_col_cpp(iter)
} else {
res <- matrix_max_per_row_cpp(iter)
}
names(res) <- rownames(x)
return(res)
}
if (requireNamespace("MatrixGenerics", quietly=TRUE)) {
bnprks marked this conversation as resolved.
Show resolved Hide resolved
setMethod(MatrixGenerics::rowMaxs, "IterableMatrix", rowMaxs.IterableMatrix)
}

#' Get the max of each col in an interable matrix
#' @param x IterableMatrix/dgCMatrix object
#' @return * `colMaxs()`: vector of column maxes
#' @describeIn IterableMatrix-methods Calculate colMax (replacement for `matrixStats::colMax()`)
#' @export
immanuelazn marked this conversation as resolved.
Show resolved Hide resolved
colMaxs <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ...) UseMethod("colMaxs")
#' @export
colMaxs.default <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ...) {
if (requireNamespace("MatrixGenerics", quietly = TRUE)) {
MatrixGenerics::colMaxs(x, rows = rows, cols = cols, na.rm = na.rm, ...)
} else if (requireNamespace("matrixStats", quietly = TRUE)) {
matrixStats::colMaxs(x, rows = rows, cols = cols, na.rm = na.rm, ...)
}
else {
stop("Can't run colMaxs on a non-BPCells object unless MatrixGenerics or matrixStats are installed.")
}
}
#' @export
colMaxs.IterableMatrix <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ...) {
iter <- iterate_matrix(convert_matrix_type(x, "double"))
if(x@transpose == TRUE) {
res <- matrix_max_per_row_cpp(iter)
} else {
res <- matrix_max_per_col_cpp(iter)
}
names(res) <- colnames(x)
return(res)
}
rlang::on_load({
if (requireNamespace("MatrixGenerics", quietly=TRUE)) {
setMethod(MatrixGenerics::colMaxs, "IterableMatrix", colMaxs.IterableMatrix)
}
})


setMethod("short_description", "IterableMatrix", function(x) {
character(0)
})
Expand Down
24 changes: 24 additions & 0 deletions r/src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2418,6 +2418,28 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// matrix_max_per_row_cpp
NumericVector matrix_max_per_row_cpp(SEXP matrix);
RcppExport SEXP _BPCells_matrix_max_per_row_cpp(SEXP matrixSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< SEXP >::type matrix(matrixSEXP);
rcpp_result_gen = Rcpp::wrap(matrix_max_per_row_cpp(matrix));
return rcpp_result_gen;
END_RCPP
}
// matrix_max_per_col_cpp
NumericVector matrix_max_per_col_cpp(SEXP matrix);
RcppExport SEXP _BPCells_matrix_max_per_col_cpp(SEXP matrixSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< SEXP >::type matrix(matrixSEXP);
rcpp_result_gen = Rcpp::wrap(matrix_max_per_col_cpp(matrix));
return rcpp_result_gen;
END_RCPP
}
// matrix_identical_uint32_t_cpp
bool matrix_identical_uint32_t_cpp(SEXP mat1, SEXP mat2);
RcppExport SEXP _BPCells_matrix_identical_uint32_t_cpp(SEXP mat1SEXP, SEXP mat2SEXP) {
Expand Down Expand Up @@ -2614,6 +2636,8 @@ static const R_CallMethodDef CallEntries[] = {
{"_BPCells_matrix_value_histogram_cpp", (DL_FUNC) &_BPCells_matrix_value_histogram_cpp, 2},
{"_BPCells_checksum_double_cpp", (DL_FUNC) &_BPCells_checksum_double_cpp, 1},
{"_BPCells_apply_matrix_double_cpp", (DL_FUNC) &_BPCells_apply_matrix_double_cpp, 3},
{"_BPCells_matrix_max_per_row_cpp", (DL_FUNC) &_BPCells_matrix_max_per_row_cpp, 1},
{"_BPCells_matrix_max_per_col_cpp", (DL_FUNC) &_BPCells_matrix_max_per_col_cpp, 1},
{"_BPCells_matrix_identical_uint32_t_cpp", (DL_FUNC) &_BPCells_matrix_identical_uint32_t_cpp, 2},
{NULL, NULL, 0}
};
Expand Down
47 changes: 46 additions & 1 deletion r/src/matrix_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
#include "bpcells-cpp/matrixIterators/SVD.h"
#include "bpcells-cpp/matrixIterators/TSparseMatrixWriter.h"
#include "bpcells-cpp/matrixUtils/WilcoxonRankSum.h"

#include "R_array_io.h"
#include "R_interrupts.h"
#include "R_xptr_wrapper.h"
Expand Down Expand Up @@ -624,6 +623,52 @@ List apply_matrix_double_cpp(SEXP mat_sexp, Function f, bool row_major) {
return ret;
}

// Compute the max of each row
// [[Rcpp::export]]
NumericVector matrix_max_per_row_cpp(SEXP matrix) {
MatrixIterator<double> it(take_unique_xptr<MatrixLoader<double>>(matrix));
std::vector<double> result(it.rows());
std::vector<uint32_t> row_count(it.rows(), 0);
std::fill(result.begin(), result.end(), -INFINITY);
// keep track of the number of times we've seen each row
while (it.nextCol()) {
while (it.nextValue()) {
result[it.row()] = std::max(result[it.row()], it.val());
row_count[it.row()]++;
}}
// If we've seen a row less than the num of cols, we know there's at least one 0
for (size_t i = 0; i < it.rows(); i++) {
if (row_count[i] < it.cols()) {
result[i] = std::max(0.0, result[i]);
}
}
return Rcpp::wrap(result);
}


// Compute the max of each col
// [[Rcpp::export]]
NumericVector matrix_max_per_col_cpp(SEXP matrix) {
MatrixIterator<double> it(take_unique_xptr<MatrixLoader<double>>(matrix));
std::vector<double> result(it.cols(), -INFINITY);
// std::fill(result.begin(), result.end(), 0);
// keep track of the number of times we've seen each col
std::vector<uint32_t> col_count(it.cols(), 0);
while (it.nextCol()) {
while (it.nextValue()) {
result[it.col()] = std::max(result[it.col()], it.val());
col_count[it.col()]++;
}}
// If we've seen a col less than the num of rows, we know there's at least one 0
for (size_t i = 0; i < it.cols(); i++) {
if (col_count[i] < it.rows()) {
result[i] = std::max(0.0, result[i]);
}
}
return Rcpp::wrap(result);
}


// [[Rcpp::export]]
bool matrix_identical_uint32_t_cpp(SEXP mat1, SEXP mat2) {
MatrixIterator<uint32_t> i1(take_unique_xptr<MatrixLoader<uint32_t>>(mat1));
Expand Down
65 changes: 64 additions & 1 deletion r/tests/testthat/test-matrix_stats.R
Original file line number Diff line number Diff line change
Expand Up @@ -187,4 +187,67 @@ test_that("svds registers generic with RSpectra", {
# Ensure RSpectra works with BPCells and BPCells works with RSpectra
equal_svds(ans, RSpectra::svds(i1, k=5))
equal_svds(ans, BPCells::svds(m1, k=5))
})
})

test_that("row_max and colMaxs works with negative only matrices", {
withr::local_seed(195123)
m1 <- matrix(runif(12, min=-10, max=-1), nrow = 3, ncol = 4)
i1 <- m1 %>% as("dgCMatrix") %>% as("IterableMatrix")
expect_equal(rowMaxs(i1), matrixStats::rowMaxs(m1))
expect_equal(colMaxs(i1), matrixStats::colMaxs(m1))
})

test_that("row_max and colMaxs works with dense matrices", {
withr::local_seed(195123)
m1_dense <- generate_dense_matrix(4, 64)
i1_dense <- m1_dense %>% as("dgCMatrix") %>% as("IterableMatrix")
expect_equal(rowMaxs(i1_dense), matrixStats::rowMaxs(m1_dense))
expect_equal(colMaxs(i1_dense), matrixStats::colMaxs(m1_dense))
})

test_that("row_max and colMaxs works with sparse matrices", {
withr::local_seed(195123)
m1_sparse <- generate_sparse_matrix(4, 64)
i1_sparse <- m1_sparse %>% as("dgCMatrix") %>% as("IterableMatrix")
expect_equal(rowMaxs(i1_sparse), matrixStats::rowMaxs(as.matrix(m1_sparse)))
expect_equal(colMaxs(i1_sparse), matrixStats::colMaxs(as.matrix(m1_sparse)))
})

test_that("row_max works for iterable matrices", {
withr::local_seed(195123)
m1_dense <- generate_dense_matrix(4, 64)
i1_dense <- m1_dense %>% as("dgCMatrix") %>% as("IterableMatrix")
expect_equal(rowMaxs(i1_dense), matrixStats::rowMaxs(as.matrix(m1_dense)))
})


test_that("row_max and colMaxs tranpose works", {
withr::local_seed(195123)
m1 <- generate_sparse_matrix(4, 64)
i1_t <- t(m1) %>% as("dgCMatrix") %>% as("IterableMatrix")
i2_t <- m1 %>% as("dgCMatrix") %>% as("IterableMatrix") %>% t()
# for the case of pre-tranposed matrics prior to conversion to IterableMatrix
expect_equal(rowMaxs(i1_t), matrixStats::rowMaxs(as.matrix(t(m1))))
expect_equal(colMaxs(i1_t), matrixStats::colMaxs(as.matrix(t(m1))))
# case of transposing after conversion to IterableMatrix
expect_equal(rowMaxs(i2_t), matrixStats::rowMaxs(as.matrix(t(m1))))
expect_equal(colMaxs(i2_t), matrixStats::colMaxs(as.matrix(t(m1))))
})

bnprks marked this conversation as resolved.
Show resolved Hide resolved
test_that("row_max and colMaxs works comprehensive", {
withr::local_seed(195123)
m1_neg <- matrix(runif(12, min = -10, max = -1), nrow = 3, ncol = 4)
m2_dense <- generate_dense_matrix(4, 64)
m3_sparse <- generate_sparse_matrix(4, 64)
m_test_cases <- list(m1_neg, m2_dense, m3_sparse)
for (m in m_test_cases) {
i_tranpose_pre <- t(m) %>% as("dgCMatrix") %>% as("IterableMatrix")
i <- m %>% as("dgCMatrix") %>% as("IterableMatrix")
i_transpose_post <- t(i)
m_tranpose_cases <- list(i_tranpose_pre, i_transpose_post, i)
for (i in m_tranpose_cases) {
expect_equal(rowMaxs(i), matrixStats::rowMaxs(as.matrix(i)))
expect_equal(colMaxs(i), matrixStats::colMaxs(as.matrix(i)))
}
}
})