Skip to content

Commit

Permalink
[r] Fix perf subsetting large conatenated matrices
Browse files Browse the repository at this point in the history
The logic for subsetting rbind or cbind matrices was taking time
O(length(selection)* # sub-matrices), which could be quite slow for large
datasets. Now, rather than performing a linear search through the
selection indices to find the indices in range for each sub-matrix,
we just do a binary search which eliminates the performance issue.
  • Loading branch information
bnprks committed Dec 23, 2024
1 parent e755135 commit e0100cd
Showing 1 changed file with 30 additions and 16 deletions.
46 changes: 30 additions & 16 deletions r/R/matrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -1257,14 +1257,23 @@ setMethod("[", "RowBindMatrices", function(x, i, j, ...) {

x <- selection_fix_dims(x, rlang::maybe_missing(i$subset), rlang::maybe_missing(j$subset))

last_row <- 0L
new_mats <- list()
for (mat in x@matrix_list) {
row_start <- last_row + 1L
row_end <- last_row + nrow(mat)
# Calculate helper variables to extract and transform the relevant parts of i$subset for each sub-matrix
if (!rlang::is_missing(i$subset)) {
rows <- vapply(x@matrix_list, nrow, integer(1))
local_i_offset <- cumsum(c(0, rows))
# Find the range of indices in j$subset that correspond to each matrix in x@matrix_list
local_i_range <- findInterval(cumsum(c(0, rows)), i$subset)
}

new_mats <- list()
for (k in seq_along(x@matrix_list)) {
mat <- x@matrix_list[[k]]
if (!rlang::is_missing(i$subset)) {
local_i <- i$subset[i$subset >= row_start & i$subset <= row_end] - last_row
if (local_i_range[k] == local_i_range[k+1]) {
local_i <- integer(0)
} else {
local_i <- i$subset[(local_i_range[k]+1):(local_i_range[k+1])] - local_i_offset[k]
}
mat <- mat[local_i,]
}
if (!rlang::is_missing(j$subset)) {
Expand All @@ -1274,8 +1283,6 @@ setMethod("[", "RowBindMatrices", function(x, i, j, ...) {
if (nrow(mat) > 0) {
new_mats <- c(new_mats, mat)
}

last_row <- row_end
}
if (length(new_mats) > 1) {
x@matrix_list <- new_mats
Expand Down Expand Up @@ -1320,14 +1327,23 @@ setMethod("[", "ColBindMatrices", function(x, i, j, ...) {

x <- selection_fix_dims(x, rlang::maybe_missing(i$subset), rlang::maybe_missing(j$subset))

last_col <- 0L
new_mats <- list()
for (mat in x@matrix_list) {
col_start <- last_col + 1L
col_end <- last_col + ncol(mat)
# Calculate helper variables to extract and transform the relevant parts of j$subset for each sub-matrix
if (!rlang::is_missing(j$subset)) {
cols <- vapply(x@matrix_list, ncol, integer(1))
local_j_offset <- cumsum(c(0, cols))
# Find the range of indices in j$subset that correspond to each matrix in x@matrix_list
local_j_range <- findInterval(cumsum(c(0, cols)), j$subset)
}

new_mats <- list()
for (k in seq_along(x@matrix_list)) {
mat <- x@matrix_list[[k]]
if (!rlang::is_missing(j$subset)) {
local_j <- j$subset[j$subset >= col_start & j$subset <= col_end] - last_col
if (local_j_range[k] == local_j_range[k+1]) {
local_j <- integer(0)
} else {
local_j <- j$subset[(local_j_range[k]+1):(local_j_range[k+1])] - local_j_offset[k]
}
mat <- mat[,local_j]
}
if (!rlang::is_missing(i$subset)) {
Expand All @@ -1337,8 +1353,6 @@ setMethod("[", "ColBindMatrices", function(x, i, j, ...) {
if (ncol(mat) > 0) {
new_mats <- c(new_mats, mat)
}

last_col <- col_end
}
if (length(new_mats) > 1) {
x@matrix_list <- new_mats
Expand Down

0 comments on commit e0100cd

Please sign in to comment.