-
Notifications
You must be signed in to change notification settings - Fork 19
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add AnnData dense matrix read support #146
Merged
Merged
Changes from all commits
Commits
Show all changes
11 commits
Select commit
Hold shift + click to select a range
fdf4de9
[cpp] Implement dense AnnData read support
bnprks fea0a16
[cpp] Split 10x and AnnData import code files
bnprks 81008f6
[cpp] Move non-interface headers to cpp file
bnprks 0b00853
[r] Update docs for AnnData dense support
bnprks 29b0a26
[cpp] Add FilterZeros
bnprks 6d0675a
[cpp] Clean up readAnnDataDims() interface
bnprks edd311a
[cpp] Code review changes for dense matrix read
bnprks 7bbd8e3
[cpp] Remove intermediate code from test-matrixIterators
bnprks 330e664
[r] Adjust `dims_matrix` parameter typing
bnprks 3ed0bbf
Merge branch 'main' into bp/dense-anndata-read
bnprks eae371e
Merge branch 'main' into bp/dense-anndata-read
bnprks File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
// Copyright 2024 BPCells contributors | ||
// | ||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | ||
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license | ||
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your | ||
// option. This file may not be copied, modified, or distributed | ||
// except according to those terms. | ||
|
||
#include <random> | ||
#include <sstream> | ||
|
||
#include <gmock/gmock.h> | ||
#include <gtest/gtest.h> | ||
|
||
#include <matrixIterators/CSparseMatrix.h> | ||
#include <matrixIterators/FilterZeros.h> | ||
|
||
#include <Eigen/Core> | ||
|
||
using namespace BPCells; | ||
using namespace ::testing; | ||
using namespace Eigen; | ||
|
||
SparseMatrix<double> generate_mat(uint32_t n_row, uint32_t n_col, uint32_t seed = 125124) { | ||
std::mt19937 gen(seed); // Standard mersenne_twister_engine | ||
std::uniform_int_distribution<> distrib(1, 20); | ||
std::uniform_int_distribution<> nonzero(0, 4); // 1/5 chance of being non-zero | ||
|
||
std::vector<Triplet<double>> triplets; | ||
|
||
for (int i = 0; i < n_row; i++) { | ||
for (int j = 0; j < n_col; j++) { | ||
if (0 == nonzero(gen)) { | ||
triplets.push_back({i, j, (double) distrib(gen)}); | ||
} | ||
} | ||
} | ||
|
||
SparseMatrix<double> mat(n_row, n_col); | ||
mat.setFromTriplets(triplets.begin(), triplets.end()); | ||
return mat; | ||
} | ||
|
||
Map<SparseMatrix<double>> get_map(SparseMatrix<double> &mat) { | ||
return Map<SparseMatrix<double>>( | ||
mat.rows(), | ||
mat.cols(), | ||
mat.nonZeros(), | ||
(int *)mat.outerIndexPtr(), | ||
(int *)mat.innerIndexPtr(), | ||
(double *)mat.valuePtr() | ||
); | ||
} | ||
|
||
bool matrices_are_identical(SparseMatrix<double> a, SparseMatrix<double> b) { | ||
if (a.rows() != b.rows()) return false; | ||
if (a.cols() != b.cols()) return false; | ||
EXPECT_TRUE(a.isCompressed()); | ||
EXPECT_TRUE(b.isCompressed()); | ||
if (a.nonZeros() != b.nonZeros()) return false; | ||
for (size_t i = 0; i < a.nonZeros(); i++) { | ||
if (a.innerIndexPtr()[i] != b.innerIndexPtr()[i]) return false; | ||
if (a.valuePtr()[i] != b.valuePtr()[i]) return false; | ||
} | ||
if (a.outerSize() != b.outerSize()) return false; | ||
for (size_t i = 0; i < a.outerSize(); i++) { | ||
if (a.outerIndexPtr()[i] != b.outerIndexPtr()[i]) return false; | ||
} | ||
return true; | ||
} | ||
|
||
TEST(MatrixIterators, FilterZeros) { | ||
// A matrix without any explicit zeros should not be transformed | ||
SparseMatrix<double> m1 = generate_mat(100, 50, 125123); | ||
FilterZeros<double> m1_filt(std::make_unique<CSparseMatrix<double>>(get_map(m1), std::unique_ptr<StringReader>(), std::unique_ptr<StringReader>(), 5)); | ||
|
||
CSparseMatrixWriter<double> res_m1; | ||
res_m1.write(m1_filt); | ||
EXPECT_TRUE(matrices_are_identical(m1, res_m1.getMat())); | ||
|
||
// Introduce explicit zeros into m2 | ||
SparseMatrix<double> m2 = m1; | ||
for (auto &x : m2.coeffs()) { | ||
if (x < 10) x = 0; | ||
} | ||
SparseMatrix<double> m2_pruned = m2.pruned(); | ||
|
||
// The explicit zeros should be filtered out | ||
FilterZeros<double> m2_filt(std::make_unique<CSparseMatrix<double>>(get_map(m2), std::unique_ptr<StringReader>(), std::unique_ptr<StringReader>(), 5)); | ||
CSparseMatrixWriter<double> res_m2; | ||
res_m2.write(m2_filt); | ||
EXPECT_FALSE(matrices_are_identical(m2, res_m2.getMat())); | ||
EXPECT_TRUE(matrices_are_identical(m2_pruned, res_m2.getMat())); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
// Copyright 2024 BPCells contributors | ||
// | ||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | ||
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license | ||
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your | ||
// option. This file may not be copied, modified, or distributed | ||
// except according to those terms. | ||
|
||
#include <atomic> | ||
|
||
#include "../arrayIO/array_interfaces.h" | ||
#include "MatrixIterator.h" | ||
|
||
namespace BPCells { | ||
|
||
// Filter out zero values from a MatrixLoader | ||
// This is useful when reading dense matrices that have many zero values, | ||
// or when performing operations that will cause new zeros to be created (e.g. multiplying a row by zero) | ||
template<typename T> | ||
class FilterZeros : public MatrixLoaderWrapper<T> { | ||
private: | ||
size_t capacity_ = 0; | ||
public: | ||
FilterZeros(std::unique_ptr<MatrixLoader<T>> &&loader) : MatrixLoaderWrapper<T>(std::move(loader)) {} | ||
|
||
// Return false if there are no more entries to load | ||
bool load() override { | ||
capacity_ = 0; | ||
|
||
while (capacity_ == 0) { | ||
if (!this->loader->load()) return false; | ||
|
||
uint32_t *row_data = this->loader->rowData(); | ||
T* val_data = this->loader->valData(); | ||
size_t cap = this->loader->capacity(); | ||
|
||
for (size_t i = 0; i < cap; i++) { | ||
row_data[capacity_] = row_data[i]; | ||
val_data[capacity_] = val_data[i]; | ||
capacity_ += val_data[capacity_] != 0; | ||
} | ||
} | ||
return true; | ||
} | ||
|
||
// Number of loaded entries available | ||
uint32_t capacity() const override {return capacity_;} | ||
}; | ||
|
||
} // end namespace BPCells |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Very cool and elegant! Can already see this being used in a handful of derived matrixloader types