[cpp] Fix svds for row-major matrices #55

bnprks · Nov 15, 2023 · 6597d79 · 6597d79
1 parent f88d60c
commit 6597d79
Show file tree

Hide file tree

Showing 7 changed files with 25 additions and 6 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -29,6 +29,7 @@ Contributions welcome :)
   compression. This is generally much slower than bitpacking compression, but it adds improved storage options for
   files that must be read by outside programs. Thanks to @ycli1995 for submitting this improvement in pull #42.
 - AnnData export now supported via `write_matrix_anndata_hdf5()` (issue #49)
+- Re-licensed code base to use dual-licensed Apache V2 or MIT instead of GPLv3
 
 ## Improvements
 - Merging fragments with `c()` now handles inputs with mismatched chromosome names.
@@ -60,6 +61,8 @@ Contributions welcome :)
 - Fixed possible memory safety bug where wrapped R objects (such as dgCMatrix) could be potentially garbage collected
   while C++ was still trying to access the data in rare circumstances.
 - Fixed case when dimnames were not preserved when calling `convert_matrix_type()` twice in a row such that it cancels out (e.g. double -> uint32_t -> double). Thanks to @brgrew reporting issue #43
+- Caused and fixed issue resulting in unusably slow performance reading matrices from HDF5 files. Broken versions range from commit  21f8dcf until the fix in 3711a40 (October 18-November 3, 2023). Thanks to @abhiachoudhary for reporting this in issue #53
+- Fixed error with `svds()` not handling row-major matrices correctly. Thanks to @ycli1995 for reporting this in issue #55
 
 # BPCells 0.1.0
 

diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -693,8 +693,8 @@ wilcoxon_rank_sum_pval_double_cpp <- function(matrix, groups) {
     .Call(`_BPCells_wilcoxon_rank_sum_pval_double_cpp`, matrix, groups)
 }
 
-svds_cpp <- function(matrix, k, n_cv, maxit, tol) {
-    .Call(`_BPCells_svds_cpp`, matrix, k, n_cv, maxit, tol)
+svds_cpp <- function(matrix, k, n_cv, maxit, tol, transpose) {
+    .Call(`_BPCells_svds_cpp`, matrix, k, n_cv, maxit, tol, transpose)
 }
 
 matrix_value_histogram_cpp <- function(matrix, max_value) {

diff --git a/docs/news/index.html b/docs/news/index.html
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
@@ -2298,8 +2298,8 @@ BEGIN_RCPP
 END_RCPP
 }
 // svds_cpp
-SEXP svds_cpp(SEXP matrix, int k, int n_cv, int maxit, double tol);
-RcppExport SEXP _BPCells_svds_cpp(SEXP matrixSEXP, SEXP kSEXP, SEXP n_cvSEXP, SEXP maxitSEXP, SEXP tolSEXP) {
+SEXP svds_cpp(SEXP matrix, int k, int n_cv, int maxit, double tol, bool transpose);
+RcppExport SEXP _BPCells_svds_cpp(SEXP matrixSEXP, SEXP kSEXP, SEXP n_cvSEXP, SEXP maxitSEXP, SEXP tolSEXP, SEXP transposeSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
@@ -2308,7 +2308,8 @@ BEGIN_RCPP
     Rcpp::traits::input_parameter< int >::type n_cv(n_cvSEXP);
     Rcpp::traits::input_parameter< int >::type maxit(maxitSEXP);
     Rcpp::traits::input_parameter< double >::type tol(tolSEXP);
-    rcpp_result_gen = Rcpp::wrap(svds_cpp(matrix, k, n_cv, maxit, tol));
+    Rcpp::traits::input_parameter< bool >::type transpose(transposeSEXP);
+    rcpp_result_gen = Rcpp::wrap(svds_cpp(matrix, k, n_cv, maxit, tol, transpose));
     return rcpp_result_gen;
 END_RCPP
 }
@@ -2511,7 +2512,7 @@ static const R_CallMethodDef CallEntries[] = {
     {"_BPCells_wilcoxon_rank_sum_pval_uint32_t_cpp", (DL_FUNC) &_BPCells_wilcoxon_rank_sum_pval_uint32_t_cpp, 2},
     {"_BPCells_wilcoxon_rank_sum_pval_float_cpp", (DL_FUNC) &_BPCells_wilcoxon_rank_sum_pval_float_cpp, 2},
     {"_BPCells_wilcoxon_rank_sum_pval_double_cpp", (DL_FUNC) &_BPCells_wilcoxon_rank_sum_pval_double_cpp, 2},
-    {"_BPCells_svds_cpp", (DL_FUNC) &_BPCells_svds_cpp, 5},
+    {"_BPCells_svds_cpp", (DL_FUNC) &_BPCells_svds_cpp, 6},
     {"_BPCells_matrix_value_histogram_cpp", (DL_FUNC) &_BPCells_matrix_value_histogram_cpp, 2},
     {"_BPCells_matrix_identical_uint32_t_cpp", (DL_FUNC) &_BPCells_matrix_identical_uint32_t_cpp, 2},
     {NULL, NULL, 0}

diff --git a/src/matrixIterators/SVD.cpp b/src/matrixIterators/SVD.cpp
@@ -1,5 +1,7 @@
 #include "SVD.h"
 
+#include <algorithm>
+
 #include "../lib/Spectra/SymEigsSolver.h"
 #include <Eigen/Core>
 namespace BPCells {
@@ -39,13 +41,15 @@ void SpectraMatOp::perform_op(const double *x_in, double *y_out) const {
 // n_cv - convergence speed parameter. Higher values use more memory, and more
 //        operations per iteration, but converge faster
 // maxit - maximum iterations
+// transpose - if true, treat mat as transposed
 // tol - precision for eigenvalues
 SVDResult
 svd(MatrixLoader<double> *mat,
     int k,
     int n_cv,
     int maxit,
     double tol,
+    bool transpose,
     std::atomic<bool> *user_interrupt) {
 
     SVDResult res;
@@ -87,6 +91,10 @@ svd(MatrixLoader<double> *mat,
     }
     res.num_operations += k;
 
+    if (transpose) {
+        std::swap(res.u, res.v);
+    }
+
     return res;
 }
 

diff --git a/src/matrixIterators/SVD.h b/src/matrixIterators/SVD.h
@@ -42,13 +42,15 @@ class SVDResult {
 // n_cv - convergence speed parameter. Higher values use more memory, and more
 //        operations per iteration, but converge faster
 // maxit - maximum iterations
+// transpose - if true, treat mat as transposed
 // tol - precision for eigenvalues
 SVDResult
 svd(MatrixLoader<double> *mat,
     int k,
     int n_cv,
     int maxit,
     double tol,
+    bool transpose,
     std::atomic<bool> *user_interrupt);
 
 } // end namespace BPCells
diff --git a/tests/testthat/test-matrix_stats.R b/tests/testthat/test-matrix_stats.R
@@ -151,4 +151,6 @@ test_that("svds works", {
 
   equal_svds(ans, mine_threaded)
   equal_svds(ans_t, mine_t_threaded)
+
+  equal_svds(ans_t, svds(t(i1), k=5))
 })