From 7d4d1031808889e4df927b32fc2f48c59c61b6cb Mon Sep 17 00:00:00 2001 From: Pablo E Garcia-Nieto Date: Sun, 23 Apr 2023 20:12:11 -0500 Subject: [PATCH 01/18] add: iterator classes experiment --- apis/r/R/ReadIter.R | 71 ++++++++++++++++++++++++++++++++++++++++ apis/r/R/SOMAArrayBase.R | 10 ++++++ apis/r/R/SOMADataFrame.R | 21 ++---------- apis/r/R/TableReadIter.R | 35 ++++++++++++++++++++ 4 files changed, 118 insertions(+), 19 deletions(-) create mode 100644 apis/r/R/ReadIter.R create mode 100644 apis/r/R/TableReadIter.R diff --git a/apis/r/R/ReadIter.R b/apis/r/R/ReadIter.R new file mode 100644 index 0000000000..7dd223b0c8 --- /dev/null +++ b/apis/r/R/ReadIter.R @@ -0,0 +1,71 @@ +#' SOMA Read Iterator Base class +#' +#' Class that allows for read iteration of SOMA reads. + +ReadIter <- R6::R6Class( + classname = "ReadIter", + + public = list( + + uri = NULL, + tiledbsoma_ctx = NULL, + + #' @description Create (lifecycle: experimental) + initialize = function(uri, tiledbsoma_ctx) { + self$uri <- uri + self$tiledbsoma_ctx <- tiledbsoma_ctx + private$soma_reader_setup() + }, + + #' @description Check if iterated read is complete or not. (lifecycle: experimental) + read_complete = function() { + if (is.null(private$soma_reader_pointer)) { + TRUE + } else { + sr_complete(private$soma_reader_pointer) + } + }, + + #' @description Read the next chunk of an iterated read. (lifecycle: experimental) + read_next = function() { + if (is.null(private$soma_reader_pointer)) { + NULL + } else { + if (sr_complete(private$soma_reader_pointer)) { + warning("Iteration complete, returning NULL") + NULL + } else { + rl <- sr_next(private$soma_reader_pointer) + private$soma_reader_transform(rl) + } + } + }, + + #' @description TODO + # to be refined in derived classes + concat = function() { + NULL + } + + ), + + private = list( + + # Internal 'external pointer' object used for iterated reads + soma_reader_pointer = NULL, + + # Instantiate soma_reader_pointer with a soma_array_reader object + soma_reader_setup = function() { + private$soma_reader_pointer <- sr_setup( + self$uri, + config=as.character(tiledb::config(self$tiledbsoma_ctx$context())) + ) + }, + + ## to be refined in derived classes + soma_reader_transform = function(x) { + x + } + + ) +) diff --git a/apis/r/R/SOMAArrayBase.R b/apis/r/R/SOMAArrayBase.R index 624994f2b7..a476950268 100644 --- a/apis/r/R/SOMAArrayBase.R +++ b/apis/r/R/SOMAArrayBase.R @@ -20,6 +20,8 @@ SOMAArrayBase <- R6::R6Class( public = list( #' @description Check if iterated read is complete or not. (lifecycle: experimental) + # SHOULD REMOVE IF iterator functionality moves to ReatIter class for + # all read functions, currently only implemented for SOMADataFrame read_complete = function() { if (is.null(private$soma_reader_pointer)) { TRUE @@ -29,6 +31,8 @@ SOMAArrayBase <- R6::R6Class( }, #' @description Read the next chunk of an iterated read. (lifecycle: experimental) + # SHOULD REMOVE IF iterator functionality moves to ReatIter class for + # all read functions, currently only implemented for SOMADataFrame read_next = function() { if (is.null(private$soma_reader_pointer)) { NULL @@ -61,9 +65,13 @@ SOMAArrayBase <- R6::R6Class( }, # Internal 'external pointer' object used for iterated reads + # SHOULD REMOVE IF iterator functionality moves to ReatIter class for + # all read functions, currently only implemented for SOMADataFrame soma_reader_pointer = NULL, # Instantiate soma_reader_pointer with a soma_array_reader object + # SHOULD REMOVE IF iterator functionality moves to ReatIter class for + # all read functions, currently only implemented for SOMADataFrame soma_reader_setup = function() { private$soma_reader_pointer <- sr_setup( self$uri, @@ -72,6 +80,8 @@ SOMAArrayBase <- R6::R6Class( }, ## to be refined in derived classes + # SHOULD REMOVE IF iterator functionality moves to ReatIter class for + # all read functions, currently only implemented for SOMADataFrame soma_reader_transform = function(x) { x } diff --git a/apis/r/R/SOMADataFrame.R b/apis/r/R/SOMADataFrame.R index ed21b43e62..01a96b378b 100644 --- a/apis/r/R/SOMADataFrame.R +++ b/apis/r/R/SOMADataFrame.R @@ -215,19 +215,7 @@ SOMADataFrame <- R6::R6Class( config = cfg) private$soma_reader_transform(rl) } else { - ## should we error if this isn't null? - if (!is.null(private$soma_reader_pointer)) { - warning("Reader pointer not null, skipping") - rl <- NULL - } else { - private$soma_reader_setup() - rl <- list() - while (!self$read_complete()) { - ## soma_reader_transform() applied inside read_next() - rl <- c(rl, self$read_next()) - } - } - invisible(rl) + TableReadIter$new(self$uri, self$tiledbsoma_ctx) } } @@ -265,12 +253,7 @@ SOMADataFrame <- R6::R6Class( } schema - }, - - ## refined from base class - soma_reader_transform = function(x) { - arrow::as_arrow_table(arrow::RecordBatch$import_from_c(x[[1]], x[[2]])) } - + ) ) diff --git a/apis/r/R/TableReadIter.R b/apis/r/R/TableReadIter.R new file mode 100644 index 0000000000..0e714e610d --- /dev/null +++ b/apis/r/R/TableReadIter.R @@ -0,0 +1,35 @@ +#' TableReadIter +#' +#' @description +#' `TableReadIter` is a class that allows for iteration over +# the results of a read operation from SOMA objects#' @importFrom stats setNames +#' @export + +TableReadIter <- R6::R6Class( + classname = "TableReadIter", + inherit = ReadIter, + + public = list( + + ## refined from base class + read_concat = function(){ + + rl <- list() + + while (!self$read_complete()) { + rl <- c(rl, self$read_next()) + } + + do.call(arrow::concat_tables, rl) + + }), + + private = list( + + ## refined from base class + soma_reader_transform = function(x) { + arrow::as_arrow_table(arrow::RecordBatch$import_from_c(x[[1]], x[[2]])) + } + + ) +) From bb82a0beee517f89de690eb2344fe6db4eacdfe8 Mon Sep 17 00:00:00 2001 From: Pablo E Garcia-Nieto Date: Sun, 7 May 2023 13:53:18 -0700 Subject: [PATCH 02/18] Add iter classes --- apis/r/R/ReadFull.R | 41 ++++++++++++++++++++++++++++++++++++++++ apis/r/R/ReadIter.R | 33 ++++++++++++++------------------ apis/r/R/SOMADataFrame.R | 28 ++++++++++++++++++--------- apis/r/R/TableReadFull.R | 17 +++++++++++++++++ 4 files changed, 91 insertions(+), 28 deletions(-) create mode 100644 apis/r/R/ReadFull.R create mode 100644 apis/r/R/TableReadFull.R diff --git a/apis/r/R/ReadFull.R b/apis/r/R/ReadFull.R new file mode 100644 index 0000000000..7f53860c66 --- /dev/null +++ b/apis/r/R/ReadFull.R @@ -0,0 +1,41 @@ +#' SOMA Read Iterator Base class +#' +#' Dispatcher class to read full SOMAArray + +ReadFull <- R6::R6Class( + classname = "ReadFull", + + public = list( + + #' @description Create (lifecycle: experimental) + initialize = function(uri, config, colnames = NULL, qc = NULL, dim_points = NULL, loglevel = "auto") { + # Instantiate soma_reader_pointer with a soma_array_reader object + private$rl <- soma_array_reader( + uri = uri, + config = config, + colnames = colnames, + qc = qc, + dim_points = dim_points, + loglevel = loglevel + ) + }, + + #' @description Check if iterated read is complete or not. (lifecycle: experimental) + read = function() { + return(private$soma_reader_transform(private$rl)) + }, + + ), + + private = list( + + # Internal 'external pointer' object used for iterated reads + rl = NULL, + + ## to be refined in derived classes + soma_reader_transform = function(x) { + .NotYetImplemented() + } + + ) +) diff --git a/apis/r/R/ReadIter.R b/apis/r/R/ReadIter.R index 7dd223b0c8..b35533648b 100644 --- a/apis/r/R/ReadIter.R +++ b/apis/r/R/ReadIter.R @@ -7,14 +7,17 @@ ReadIter <- R6::R6Class( public = list( - uri = NULL, - tiledbsoma_ctx = NULL, - #' @description Create (lifecycle: experimental) - initialize = function(uri, tiledbsoma_ctx) { - self$uri <- uri - self$tiledbsoma_ctx <- tiledbsoma_ctx - private$soma_reader_setup() + initialize = function(uri, config, colnames = NULL, qc = NULL, dim_points = NULL, loglevel = "auto") { + # Instantiate soma_reader_pointer with a soma_array_reader object + private$soma_reader_pointer <- sr_setup( + uri = uri, + config = config, + colnames = colnames, + qc = qc, + dim_points = dim_points, + loglevel = loglevel + ) }, #' @description Check if iterated read is complete or not. (lifecycle: experimental) @@ -31,12 +34,12 @@ ReadIter <- R6::R6Class( if (is.null(private$soma_reader_pointer)) { NULL } else { - if (sr_complete(private$soma_reader_pointer)) { + if (self$read_complete()) { warning("Iteration complete, returning NULL") NULL } else { rl <- sr_next(private$soma_reader_pointer) - private$soma_reader_transform(rl) + return(private$soma_reader_transform(rl)) } } }, @@ -44,7 +47,7 @@ ReadIter <- R6::R6Class( #' @description TODO # to be refined in derived classes concat = function() { - NULL + .NotYetImplemented() } ), @@ -54,17 +57,9 @@ ReadIter <- R6::R6Class( # Internal 'external pointer' object used for iterated reads soma_reader_pointer = NULL, - # Instantiate soma_reader_pointer with a soma_array_reader object - soma_reader_setup = function() { - private$soma_reader_pointer <- sr_setup( - self$uri, - config=as.character(tiledb::config(self$tiledbsoma_ctx$context())) - ) - }, - ## to be refined in derived classes soma_reader_transform = function(x) { - x + .NotYetImplemented() } ) diff --git a/apis/r/R/SOMADataFrame.R b/apis/r/R/SOMADataFrame.R index 01a96b378b..62924a94ad 100644 --- a/apis/r/R/SOMADataFrame.R +++ b/apis/r/R/SOMADataFrame.R @@ -205,17 +205,27 @@ SOMADataFrame <- R6::R6Class( value_filter <- parsed@ptr } + cfg <- as.character(tiledb::config(self$tiledbsoma_ctx$context())) if (isFALSE(iterated)) { - cfg <- as.character(tiledb::config(self$tiledbsoma_ctx$context())) - rl <- soma_array_reader(uri = uri, - colnames = column_names, # NULL dealt with by soma_array_reader() - qc = value_filter, # idem - dim_points = coords, # idem - loglevel = log_level, # idem - config = cfg) - private$soma_reader_transform(rl) + read_full <- TableReadFull$new(uri = self$uri, + config = cfg, + colnames = column_names, # NULL dealt with by sr_setup() + qc = value_filter, # idem + dim_points = coords, # idem + loglevel = log_level # idem + ) + + return(read_full$read()) + } else { - TableReadIter$new(self$uri, self$tiledbsoma_ctx) + read_iter <- TableReadIter$new(uri = self$uri, + config = cfg, + colnames = column_names, # NULL dealt with by sr_setup() + qc = value_filter, # idem + dim_points = coords, # idem + loglevel = log_level # idem + ) + return(read_iter) } } diff --git a/apis/r/R/TableReadFull.R b/apis/r/R/TableReadFull.R new file mode 100644 index 0000000000..dc9b5e581f --- /dev/null +++ b/apis/r/R/TableReadFull.R @@ -0,0 +1,17 @@ +#' TableReadFull +#' +#' @description TODO +#' @export + +TableReadFull <- R6::R6Class( + classname = "TableReadFull", + inherit = ReadFull, + + private = list( + ## refined from base class + soma_reader_transform = function(x) { + arrow::as_arrow_table(arrow::RecordBatch$import_from_c(x[[1]], x[[2]])) + } + + ) +) From 46c4b1e28f2e565e5f1dffcb14c8b3966bfda90c Mon Sep 17 00:00:00 2001 From: Pablo E Garcia-Nieto Date: Sun, 7 May 2023 15:33:22 -0700 Subject: [PATCH 03/18] Improve classes --- apis/r/R/ReadFull.R | 2 +- apis/r/R/SOMADataFrame.R | 1 - apis/r/R/SOMASparseNDArray.R | 36 +++++++++++++++--------------------- 3 files changed, 16 insertions(+), 23 deletions(-) diff --git a/apis/r/R/ReadFull.R b/apis/r/R/ReadFull.R index 7f53860c66..0bd97dfd89 100644 --- a/apis/r/R/ReadFull.R +++ b/apis/r/R/ReadFull.R @@ -23,7 +23,7 @@ ReadFull <- R6::R6Class( #' @description Check if iterated read is complete or not. (lifecycle: experimental) read = function() { return(private$soma_reader_transform(private$rl)) - }, + } ), diff --git a/apis/r/R/SOMADataFrame.R b/apis/r/R/SOMADataFrame.R index 62924a94ad..7996b3eefa 100644 --- a/apis/r/R/SOMADataFrame.R +++ b/apis/r/R/SOMADataFrame.R @@ -216,7 +216,6 @@ SOMADataFrame <- R6::R6Class( ) return(read_full$read()) - } else { read_iter <- TableReadIter$new(uri = self$uri, config = cfg, diff --git a/apis/r/R/SOMASparseNDArray.R b/apis/r/R/SOMASparseNDArray.R index 12e9088e07..dc66a44ce2 100644 --- a/apis/r/R/SOMASparseNDArray.R +++ b/apis/r/R/SOMASparseNDArray.R @@ -133,29 +133,23 @@ SOMASparseNDArray <- R6::R6Class( coords <- lapply(coords, function(x) if (inherits(x, "integer")) bit64::as.integer64(x) else x) } + cfg <- as.character(tiledb::config(self$tiledbsoma_ctx$context())) if (isFALSE(iterated)) { - cfg <- as.character(tiledb::config(self$tiledbsoma_ctx$context())) - rl <- soma_array_reader(uri = uri, - dim_points = coords, # NULL dealt with by soma_array_reader() - result_order = result_order, - loglevel = log_level, # idem - config = cfg) - private$soma_reader_transform(rl) + read_full <- TableReadFull$new(uri = self$uri, + config = cfg, + qc = value_filter, # idem + dim_points = coords, # idem + loglevel = log_level # idem + ) + return(read_full$read()) } else { - ## should we error if this isn't null? - if (!is.null(self$soma_reader_pointer)) { - warning("Reader pointer not null, skipping") - rl <- NULL - } else { - private$soma_reader_setup() - private$sparse_repr <- "" # no sparse matrix transformation - rl <- list() - while (!self$read_complete()) { - ## soma_reader_transform() applied inside read_next() - rl <- c(rl, self$read_next()) - } - } - invisible(rl) + read_iter <- TableReadIter$new(uri = self$uri, + config = cfg, + qc = value_filter, # idem + dim_points = coords, # idem + loglevel = log_level # idem + ) + return(read_iter) } }, From ff25eee63b1afeea16d756de51a8e37524150cc8 Mon Sep 17 00:00:00 2001 From: Pablo E Garcia-Nieto Date: Tue, 9 May 2023 12:54:05 -0700 Subject: [PATCH 04/18] Add iters --- apis/r/R/SOMASparseNDArray.R | 13 +++++------ apis/r/R/SparseReadIter.R | 35 +++++++++++++++++++++++++++++ apis/r/R/TableReadIter.R | 7 +++--- apis/r/R/utils-readerTransformers.R | 22 ++++++++++++++++++ 4 files changed, 66 insertions(+), 11 deletions(-) create mode 100644 apis/r/R/SparseReadIter.R create mode 100644 apis/r/R/utils-readerTransformers.R diff --git a/apis/r/R/SOMASparseNDArray.R b/apis/r/R/SOMASparseNDArray.R index dc66a44ce2..617f948bc7 100644 --- a/apis/r/R/SOMASparseNDArray.R +++ b/apis/r/R/SOMASparseNDArray.R @@ -137,7 +137,6 @@ SOMASparseNDArray <- R6::R6Class( if (isFALSE(iterated)) { read_full <- TableReadFull$new(uri = self$uri, config = cfg, - qc = value_filter, # idem dim_points = coords, # idem loglevel = log_level # idem ) @@ -145,7 +144,6 @@ SOMASparseNDArray <- R6::R6Class( } else { read_iter <- TableReadIter$new(uri = self$uri, config = cfg, - qc = value_filter, # idem dim_points = coords, # idem loglevel = log_level # idem ) @@ -181,20 +179,19 @@ SOMASparseNDArray <- R6::R6Class( "Array must contain columns 'soma_dim_0' and 'soma_dim_1'" = all.equal(c("soma_dim_0", "soma_dim_1"), names(dims)), "Array must contain column 'soma_data'" = all.equal("soma_data", names(attr))) + if (isFALSE(iterated)) { - tbl <- self$read_arrow_table(coords = coords, result_order = result_order, log_level = log_level) + # To instantiate the one-based Matrix::sparseMatrix, we need to add 1 to the - # zero-based soma_dim_0 and soma_dim_1. But, because these dimensions are + # zero-based soma_dim_0 and soma_dim_1 (done by arrow_table_to_sparse). But, because these dimensions are # usually populated with soma_joinid, users will need to access the matrix # using the original, possibly-zero IDs. Therefore, we'll wrap the one-based # sparseMatrix with a shim providing basic access with zero-based indexes. # If needed, user can then explicitly ask the shim for the underlying # sparseMatrix using `as.one.based()`. - mat <- Matrix::sparseMatrix(i = 1 + as.numeric(tbl$GetColumnByName("soma_dim_0")), - j = 1 + as.numeric(tbl$GetColumnByName("soma_dim_1")), - x = as.numeric(tbl$GetColumnByName("soma_data")), - dims = as.integer(self$shape()), repr = repr) + mat <- arrow_table_to_sparse(tbl) + matrixZeroBasedView(mat) } else { ## should we error if this isn't null? diff --git a/apis/r/R/SparseReadIter.R b/apis/r/R/SparseReadIter.R new file mode 100644 index 0000000000..a2522fadda --- /dev/null +++ b/apis/r/R/SparseReadIter.R @@ -0,0 +1,35 @@ +#' SparseReadIter +#' +#' @description +#' `SparseReadIter` is a class that allows for iteration over +#' the results of a read operation from SOMA objects#' +#' @export + +SparseReadIter <- R6::R6Class( + classname = "SparseReadIter", + inherit = ReadIter, + + public = list( + + ## refined from base class + concat = function(){ + + rl <- list() + + while (!self$read_complete()) { + rl <- c(rl, self$read_next()) + } + + do.call(arrow::concat_tables, rl) + + }), + + private = list( + + ## refined from base class + soma_reader_transform = function(x) { + soma_array_to_arrow(x) + } + + ) +) diff --git a/apis/r/R/TableReadIter.R b/apis/r/R/TableReadIter.R index 0e714e610d..1d48ee68ae 100644 --- a/apis/r/R/TableReadIter.R +++ b/apis/r/R/TableReadIter.R @@ -2,7 +2,8 @@ #' #' @description #' `TableReadIter` is a class that allows for iteration over -# the results of a read operation from SOMA objects#' @importFrom stats setNames +#' the results of a read operation from SOMA objects#' +#' @importFrom stats setNames #' @export TableReadIter <- R6::R6Class( @@ -12,7 +13,7 @@ TableReadIter <- R6::R6Class( public = list( ## refined from base class - read_concat = function(){ + concat = function(){ rl <- list() @@ -28,7 +29,7 @@ TableReadIter <- R6::R6Class( ## refined from base class soma_reader_transform = function(x) { - arrow::as_arrow_table(arrow::RecordBatch$import_from_c(x[[1]], x[[2]])) + soma_array_to_arrow(x) } ) diff --git a/apis/r/R/utils-readerTransformers.R b/apis/r/R/utils-readerTransformers.R new file mode 100644 index 0000000000..1a90fa8a6b --- /dev/null +++ b/apis/r/R/utils-readerTransformers.R @@ -0,0 +1,22 @@ +soma_array_to_arrow <- function(x) { + arrow::as_arrow_table(arrow::RecordBatch$import_from_c(x[[1]], x[[2]])) +} + +arrow_table_to_sparse <- function(tbl) { + + soma_dim_0_one_based <- 1 + as.numeric(tbl$GetColumnByName("soma_dim_0")) + soma_dim_1_one_based <- 1 + as.numeric(tbl$GetColumnByName("soma_dim_1")) + soma_data <- as.numeric(tbl$GetColumnByName("soma_data")) + dims <- c(max(soma_dim_0_one_based), max(soma_dim_1_one_based)) + + if(any(dims > .Machine$integer.max)) { + error("The dimensions of the array are larger than supported by Matrix::sparseMatrix") + } + + Matrix::sparseMatrix(i = soma_dim_0_one_based, + j = soma_dim_1_one_based, + x = soma_data, + dims = dims, repr = repr) + +} + From 34c8d417ab149808a94c157a5301dc01c7880c46 Mon Sep 17 00:00:00 2001 From: Pablo E Garcia-Nieto Date: Tue, 9 May 2023 21:17:02 -0700 Subject: [PATCH 05/18] Improve iterators --- apis/r/R/SOMASparseNDArray.R | 19 +++++++++---------- apis/r/R/SparseReadIter.R | 23 ++++++++++++++++++++++- apis/r/R/utils-readerTransformers.R | 8 ++++++-- 3 files changed, 37 insertions(+), 13 deletions(-) diff --git a/apis/r/R/SOMASparseNDArray.R b/apis/r/R/SOMASparseNDArray.R index 617f948bc7..48ed289cbd 100644 --- a/apis/r/R/SOMASparseNDArray.R +++ b/apis/r/R/SOMASparseNDArray.R @@ -182,7 +182,7 @@ SOMASparseNDArray <- R6::R6Class( if (isFALSE(iterated)) { - + tbl <- self$read_arrow_table(coords = coords, result_order = result_order, log_level = log_level) # To instantiate the one-based Matrix::sparseMatrix, we need to add 1 to the # zero-based soma_dim_0 and soma_dim_1 (done by arrow_table_to_sparse). But, because these dimensions are # usually populated with soma_joinid, users will need to access the matrix @@ -190,18 +190,17 @@ SOMASparseNDArray <- R6::R6Class( # sparseMatrix with a shim providing basic access with zero-based indexes. # If needed, user can then explicitly ask the shim for the underlying # sparseMatrix using `as.one.based()`. - mat <- arrow_table_to_sparse(tbl) + mat <- arrow_table_to_sparse(tbl, repr = repr) matrixZeroBasedView(mat) } else { - ## should we error if this isn't null? - if (!is.null(self$soma_reader_pointer)) { - warning("pointer not null, skipping") - } else { - private$soma_reader_setup() - private$sparse_repr <- repr - } - invisible(NULL) + cfg <- as.character(tiledb::config(self$tiledbsoma_ctx$context())) + SparseReadIter$new(uri = self$uri, + config = cfg, + dim_points = coords, + loglevel = log_level, + repr = repr) + } }, diff --git a/apis/r/R/SparseReadIter.R b/apis/r/R/SparseReadIter.R index a2522fadda..3caf6322ac 100644 --- a/apis/r/R/SparseReadIter.R +++ b/apis/r/R/SparseReadIter.R @@ -10,6 +10,24 @@ SparseReadIter <- R6::R6Class( inherit = ReadIter, public = list( + + #' @description Create (lifecycle: experimental) + initialize = function(uri, config, colnames = NULL, qc = NULL, dim_points = NULL, loglevel = "auto", repr) { + # Initiate super class + super$initialize (uri = uri, config = config, colnames = colnames, qc = qc, + dim_points = dim_points, loglevel = loglevel) + + private$repr <- repr + + # Get max soma dims for indeces + tiledb_array <- tiledb::tiledb_array(uri) + tiledb::tiledb_array_open(tiledb_array, type = "READ") + max_soma_dim_0 <- as.integer(max(tiledb::tiledb_array_get_non_empty_domain_from_index(tiledb_array, 1))) + max_soma_dim_1 <- as.integer(max(tiledb::tiledb_array_get_non_empty_domain_from_index(tiledb_array, 2))) + tiledb::tiledb_array_close(tiledb_array) + + private$dims <- c(max_soma_dim_0, max_soma_dim_1) + }, ## refined from base class concat = function(){ @@ -26,9 +44,12 @@ SparseReadIter <- R6::R6Class( private = list( + repr=NULL, + dims=NULL, + ## refined from base class soma_reader_transform = function(x) { - soma_array_to_arrow(x) + arrow_table_to_sparse(soma_array_to_arrow(x), repr = private$repr, dims = private$dims) } ) diff --git a/apis/r/R/utils-readerTransformers.R b/apis/r/R/utils-readerTransformers.R index 1a90fa8a6b..9c94864382 100644 --- a/apis/r/R/utils-readerTransformers.R +++ b/apis/r/R/utils-readerTransformers.R @@ -2,12 +2,16 @@ soma_array_to_arrow <- function(x) { arrow::as_arrow_table(arrow::RecordBatch$import_from_c(x[[1]], x[[2]])) } -arrow_table_to_sparse <- function(tbl) { +arrow_table_to_sparse <- function(tbl, repr, dims = NULL) { soma_dim_0_one_based <- 1 + as.numeric(tbl$GetColumnByName("soma_dim_0")) soma_dim_1_one_based <- 1 + as.numeric(tbl$GetColumnByName("soma_dim_1")) + soma_data <- as.numeric(tbl$GetColumnByName("soma_data")) - dims <- c(max(soma_dim_0_one_based), max(soma_dim_1_one_based)) + + if (is.null(dims)) { + dims <- c(max(soma_dim_0_one_based), max(soma_dim_1_one_based)) + } if(any(dims > .Machine$integer.max)) { error("The dimensions of the array are larger than supported by Matrix::sparseMatrix") From b210ce2b31607167be5a449c6d451b019d91b442 Mon Sep 17 00:00:00 2001 From: Pablo E Garcia-Nieto Date: Thu, 11 May 2023 10:45:34 -0700 Subject: [PATCH 06/18] First iterators MVP --- apis/r/R/ReadFull.R | 41 ------------ apis/r/R/ReadIter.R | 26 ++++++-- apis/r/R/SOMAArrayBase.R | 74 +++++++-------------- apis/r/R/SOMADataFrame.R | 20 +++--- apis/r/R/SOMASparseNDArray.R | 96 ++++++++++++---------------- apis/r/R/SparseReadIter.R | 52 +++++++++++---- apis/r/R/TableReadFull.R | 17 ----- apis/r/R/TableReadIter.R | 22 ++++--- apis/r/R/utils-matrixZeroBasedView.R | 36 ++++++++++- apis/r/R/utils-readerTransformers.R | 63 +++++++++++++++--- 10 files changed, 235 insertions(+), 212 deletions(-) delete mode 100644 apis/r/R/ReadFull.R delete mode 100644 apis/r/R/TableReadFull.R diff --git a/apis/r/R/ReadFull.R b/apis/r/R/ReadFull.R deleted file mode 100644 index 0bd97dfd89..0000000000 --- a/apis/r/R/ReadFull.R +++ /dev/null @@ -1,41 +0,0 @@ -#' SOMA Read Iterator Base class -#' -#' Dispatcher class to read full SOMAArray - -ReadFull <- R6::R6Class( - classname = "ReadFull", - - public = list( - - #' @description Create (lifecycle: experimental) - initialize = function(uri, config, colnames = NULL, qc = NULL, dim_points = NULL, loglevel = "auto") { - # Instantiate soma_reader_pointer with a soma_array_reader object - private$rl <- soma_array_reader( - uri = uri, - config = config, - colnames = colnames, - qc = qc, - dim_points = dim_points, - loglevel = loglevel - ) - }, - - #' @description Check if iterated read is complete or not. (lifecycle: experimental) - read = function() { - return(private$soma_reader_transform(private$rl)) - } - - ), - - private = list( - - # Internal 'external pointer' object used for iterated reads - rl = NULL, - - ## to be refined in derived classes - soma_reader_transform = function(x) { - .NotYetImplemented() - } - - ) -) diff --git a/apis/r/R/ReadIter.R b/apis/r/R/ReadIter.R index b35533648b..067afd1b43 100644 --- a/apis/r/R/ReadIter.R +++ b/apis/r/R/ReadIter.R @@ -8,7 +8,22 @@ ReadIter <- R6::R6Class( public = list( #' @description Create (lifecycle: experimental) - initialize = function(uri, config, colnames = NULL, qc = NULL, dim_points = NULL, loglevel = "auto") { + #' @param uri Character value with URI path to a SOMADataFrame or SOMASparseNDArray + #' @param config character vector containing TileDB config. + #' @param colnames Optional vector of character value with the name of the columns to retrieve + #' @param qc Optional external Pointer object to TileDB Query Condition, defaults to \sQuote{NULL} i.e. + #' no query condition + #' @param dim_points Optional named list with vector of data points to select on the given + #' dimension(s). Each dimension can be one entry in the list. + #' @param loglevel Character value with the desired logging level, defaults to \sQuote{auto} + #' which lets prior setting prevail, any other value is set as new logging level. + initialize = function(uri, + config, + colnames = NULL, + qc = NULL, + dim_points = NULL, + loglevel = "auto") { + # Instantiate soma_reader_pointer with a soma_array_reader object private$soma_reader_pointer <- sr_setup( uri = uri, @@ -21,6 +36,7 @@ ReadIter <- R6::R6Class( }, #' @description Check if iterated read is complete or not. (lifecycle: experimental) + #' @return logical read_complete = function() { if (is.null(private$soma_reader_pointer)) { TRUE @@ -29,7 +45,9 @@ ReadIter <- R6::R6Class( } }, - #' @description Read the next chunk of an iterated read. (lifecycle: experimental) + #' @description Read the next chunk of an iterated read. (lifecycle: experimental). + #' If read is complete, retunrs `NULL` and raises warning. + #' @return \code{NULL} or one of \link[Arrow]{Table}, \link{matrixZeroBasedView} read_next = function() { if (is.null(private$soma_reader_pointer)) { NULL @@ -44,7 +62,7 @@ ReadIter <- R6::R6Class( } }, - #' @description TODO + #' @description Concatenate remainder of iterator # to be refined in derived classes concat = function() { .NotYetImplemented() @@ -57,7 +75,7 @@ ReadIter <- R6::R6Class( # Internal 'external pointer' object used for iterated reads soma_reader_pointer = NULL, - ## to be refined in derived classes + # to be refined in derived classes soma_reader_transform = function(x) { .NotYetImplemented() } diff --git a/apis/r/R/SOMAArrayBase.R b/apis/r/R/SOMAArrayBase.R index a476950268..82007bce6b 100644 --- a/apis/r/R/SOMAArrayBase.R +++ b/apis/r/R/SOMAArrayBase.R @@ -17,37 +17,6 @@ SOMAArrayBase <- R6::R6Class( } ), - public = list( - - #' @description Check if iterated read is complete or not. (lifecycle: experimental) - # SHOULD REMOVE IF iterator functionality moves to ReatIter class for - # all read functions, currently only implemented for SOMADataFrame - read_complete = function() { - if (is.null(private$soma_reader_pointer)) { - TRUE - } else { - sr_complete(private$soma_reader_pointer) - } - }, - - #' @description Read the next chunk of an iterated read. (lifecycle: experimental) - # SHOULD REMOVE IF iterator functionality moves to ReatIter class for - # all read functions, currently only implemented for SOMADataFrame - read_next = function() { - if (is.null(private$soma_reader_pointer)) { - NULL - } else { - if (sr_complete(private$soma_reader_pointer)) { - invisible(NULL) - } else { - rl <- sr_next(private$soma_reader_pointer) - private$soma_reader_transform(rl) - } - } - } - - ), - private = list( # Cache object's SOMA_OBJECT_TYPE_METADATA_KEY @@ -63,27 +32,28 @@ SOMAArrayBase <- R6::R6Class( meta[[SOMA_ENCODING_VERSION_METADATA_KEY]] <- SOMA_ENCODING_VERSION self$set_metadata(meta) }, - - # Internal 'external pointer' object used for iterated reads - # SHOULD REMOVE IF iterator functionality moves to ReatIter class for - # all read functions, currently only implemented for SOMADataFrame - soma_reader_pointer = NULL, - - # Instantiate soma_reader_pointer with a soma_array_reader object - # SHOULD REMOVE IF iterator functionality moves to ReatIter class for - # all read functions, currently only implemented for SOMADataFrame - soma_reader_setup = function() { - private$soma_reader_pointer <- sr_setup( - self$uri, - config=as.character(tiledb::config(self$tiledbsoma_ctx$context())) - ) - }, - - ## to be refined in derived classes - # SHOULD REMOVE IF iterator functionality moves to ReatIter class for - # all read functions, currently only implemented for SOMADataFrame - soma_reader_transform = function(x) { - x + + #' @description Converts a list of vectors corresponding to coords to a + #' format acceptable for sr_setup and soma_array_reader + convert_coords = function(coords) { + + ## ensure coords is a named list, use to select dim points + stopifnot("'coords' must be a list" = is.list(coords), + "'coords' must be a list of vectors or integer64" = + all(vapply_lgl(coords, is_vector_or_int64)), + "'coords' if unnamed must have length of dim names, else if named names must match dim names" = + (is.null(names(coords)) && length(coords) == length(self$dimnames())) || + (!is.null(names(coords)) && all(names(coords) %in% self$dimnames())) + ) + + ## if unnamed (and test for length has passed in previous statement) set names + if (is.null(names(coords))) names(coords) <- self$dimnames() + + ## convert integer to integer64 to match dimension type + coords <- lapply(coords, function(x) if (inherits(x, "integer")) bit64::as.integer64(x) else x) + + coords + } ) diff --git a/apis/r/R/SOMADataFrame.R b/apis/r/R/SOMADataFrame.R index 7996b3eefa..ec5f34f740 100644 --- a/apis/r/R/SOMADataFrame.R +++ b/apis/r/R/SOMADataFrame.R @@ -181,7 +181,7 @@ SOMADataFrame <- R6::R6Class( result_order <- match_query_layout(result_order) uri <- self$uri arr <- self$object # need array (schema) to properly parse query condition - + ## if unnamed set names if (!is.null(coords)) { if (!is.list(coords)) @@ -207,15 +207,15 @@ SOMADataFrame <- R6::R6Class( cfg <- as.character(tiledb::config(self$tiledbsoma_ctx$context())) if (isFALSE(iterated)) { - read_full <- TableReadFull$new(uri = self$uri, - config = cfg, - colnames = column_names, # NULL dealt with by sr_setup() - qc = value_filter, # idem - dim_points = coords, # idem - loglevel = log_level # idem - ) - - return(read_full$read()) + rl <- soma_array_reader(uri = self$uri, + config = cfg, + colnames = column_names, # NULL dealt with by sr_setup() + qc = value_filter, # idem + dim_points = coords, + loglevel = log_level + ) + + soma_array_to_arrow_table(rl) } else { read_iter <- TableReadIter$new(uri = self$uri, config = cfg, diff --git a/apis/r/R/SOMASparseNDArray.R b/apis/r/R/SOMASparseNDArray.R index 48ed289cbd..f490f46b7e 100644 --- a/apis/r/R/SOMASparseNDArray.R +++ b/apis/r/R/SOMASparseNDArray.R @@ -117,37 +117,25 @@ SOMASparseNDArray <- R6::R6Class( result_order <- map_query_layout(match_query_layout(result_order)) if (!is.null(coords)) { - ## ensure coords is a named list, use to select dim points - stopifnot("'coords' must be a list" = is.list(coords), - "'coords' must be a list of vectors or integer64" = - all(vapply_lgl(coords, is_vector_or_int64)), - "'coords' if unnamed must have length of dim names, else if named names must match dim names" = - (is.null(names(coords)) && length(coords) == length(self$dimnames())) || - (!is.null(names(coords)) && all(names(coords) %in% self$dimnames())) - ) - - ## if unnamed (and test for length has passed in previous statement) set names - if (is.null(names(coords))) names(coords) <- self$dimnames() - - ## convert integer to integer64 to match dimension type - coords <- lapply(coords, function(x) if (inherits(x, "integer")) bit64::as.integer64(x) else x) + coords <- private$convert_coords(coords) } cfg <- as.character(tiledb::config(self$tiledbsoma_ctx$context())) if (isFALSE(iterated)) { - read_full <- TableReadFull$new(uri = self$uri, - config = cfg, - dim_points = coords, # idem - loglevel = log_level # idem - ) - return(read_full$read()) + rl <- soma_array_reader(uri = self$uri, + config = cfg, + dim_points = coords, + loglevel = log_level + ) + + soma_array_to_arrow_table(rl) } else { read_iter <- TableReadIter$new(uri = self$uri, config = cfg, - dim_points = coords, # idem - loglevel = log_level # idem + dim_points = coords, + loglevel = log_level ) - return(read_iter) + read_iter } }, @@ -180,27 +168,20 @@ SOMASparseNDArray <- R6::R6Class( all.equal(c("soma_dim_0", "soma_dim_1"), names(dims)), "Array must contain column 'soma_data'" = all.equal("soma_data", names(attr))) + if (!is.null(coords)) { + coords <- private$convert_coords(coords) + } if (isFALSE(iterated)) { - tbl <- self$read_arrow_table(coords = coords, result_order = result_order, log_level = log_level) - # To instantiate the one-based Matrix::sparseMatrix, we need to add 1 to the - # zero-based soma_dim_0 and soma_dim_1 (done by arrow_table_to_sparse). But, because these dimensions are - # usually populated with soma_joinid, users will need to access the matrix - # using the original, possibly-zero IDs. Therefore, we'll wrap the one-based - # sparseMatrix with a shim providing basic access with zero-based indexes. - # If needed, user can then explicitly ask the shim for the underlying - # sparseMatrix using `as.one.based()`. - mat <- arrow_table_to_sparse(tbl, repr = repr) - - matrixZeroBasedView(mat) + tbl <- self$read_arrow_table(coords = coords, result_order = result_order, log_level = log_level) + arrow_table_to_sparse(tbl, repr = repr) } else { - cfg <- as.character(tiledb::config(self$tiledbsoma_ctx$context())) - SparseReadIter$new(uri = self$uri, - config = cfg, - dim_points = coords, - loglevel = log_level, - repr = repr) - + cfg <- as.character(tiledb::config(self$tiledbsoma_ctx$context())) + SparseReadIter$new(uri = self$uri, + config = cfg, + dim_points = coords, + loglevel = log_level, + repr = repr) } }, @@ -245,20 +226,27 @@ SOMASparseNDArray <- R6::R6Class( arr <- self$object arr[] <- values }, + + #' @description Converts a list of vectors corresponding to coords to a + #' format acceptable for sr_setup and soma_array_reader + convert_coords = function(coords) { + + ## ensure coords is a named list, use to select dim points + stopifnot("'coords' must be a list" = is.list(coords), + "'coords' must be a list of vectors or integer64" = + all(vapply_lgl(coords, is_vector_or_int64)), + "'coords' if unnamed must have length of dim names, else if named names must match dim names" = + (is.null(names(coords)) && length(coords) == length(self$dimnames())) || + (!is.null(names(coords)) && all(names(coords) %in% self$dimnames())) + ) - ## refined from base class - soma_reader_transform = function(x) { - tbl <- as_arrow_table(x) - if (private$sparse_repr == "") { - tbl - } else { - mat <- Matrix::sparseMatrix(i = 1 + as.numeric(tbl$GetColumnByName("soma_dim_0")), - j = 1 + as.numeric(tbl$GetColumnByName("soma_dim_1")), - x = as.numeric(tbl$GetColumnByName("soma_data")), - dims = as.integer(self$shape()), repr = private$sparse_repr) - # see read_sparse_matrix_zero_based() abave - matrixZeroBasedView(mat) - } + ## if unnamed (and test for length has passed in previous statement) set names + if (is.null(names(coords))) names(coords) <- self$dimnames() + + ## convert integer to integer64 to match dimension type + coords <- lapply(coords, function(x) if (inherits(x, "integer")) bit64::as.integer64(x) else x) + + coords }, ## internal 'repr' state variable, by default 'unset' diff --git a/apis/r/R/SparseReadIter.R b/apis/r/R/SparseReadIter.R index 3caf6322ac..2bf2721554 100644 --- a/apis/r/R/SparseReadIter.R +++ b/apis/r/R/SparseReadIter.R @@ -1,8 +1,9 @@ #' SparseReadIter #' #' @description -#' `SparseReadIter` is a class that allows for iteration over -#' the results of a read operation from SOMA objects#' +#' \code{SparseReadIter} is a class that allows for iteration over +#' a reads on \link{SOMASparseNDArray}. +#' Iteration chunks are retrieved as 0-based Views of \link[Matrix]{SparseMatrix}. #' @export SparseReadIter <- R6::R6Class( @@ -12,45 +13,68 @@ SparseReadIter <- R6::R6Class( public = list( #' @description Create (lifecycle: experimental) - initialize = function(uri, config, colnames = NULL, qc = NULL, dim_points = NULL, loglevel = "auto", repr) { + #' @param uri Character value with URI path to a SOMADataFrame or SOMASparseNDArray + #' @param config character vector containing TileDB config. + #' @param colnames Optional vector of character value with the name of the columns to retrieve + #' @param qc Optional external Pointer object to TileDB Query Condition, defaults to \sQuote{NULL} i.e. + #' no query condition + #' @param dim_points Optional named list with vector of data points to select on the given + #' dimension(s). Each dimension can be one entry in the list. + #' @param loglevel Character value with the desired logging level, defaults to \sQuote{auto} + #' @param repr Optional one-character code for sparse matrix representation type + #' which lets prior setting prevail, any other value is set as new logging level. + initialize = function(uri, + config, + colnames = NULL, + qc = NULL, + dim_points = NULL, + loglevel = "auto", + repr = c("C", "T", "R")) { + # Initiate super class super$initialize (uri = uri, config = config, colnames = colnames, qc = qc, dim_points = dim_points, loglevel = loglevel) private$repr <- repr - # Get max soma dims for indeces + # Get max soma dims for indeces via tiledb tiledb_array <- tiledb::tiledb_array(uri) tiledb::tiledb_array_open(tiledb_array, type = "READ") max_soma_dim_0 <- as.integer(max(tiledb::tiledb_array_get_non_empty_domain_from_index(tiledb_array, 1))) max_soma_dim_1 <- as.integer(max(tiledb::tiledb_array_get_non_empty_domain_from_index(tiledb_array, 2))) tiledb::tiledb_array_close(tiledb_array) - private$dims <- c(max_soma_dim_0, max_soma_dim_1) + private$dims_one_based <- c(max_soma_dim_0 + 1, max_soma_dim_1 + 1) }, - - ## refined from base class + + #' @description Concatenate remainder of iterator + #' @return \link{matrixZeroBasedView} concat = function(){ - - rl <- list() + + if(self$read_complete()) { + warning("Iteration complete, returning NULL") + return(NULL) + } + + mat <- self$read_next() while (!self$read_complete()) { - rl <- c(rl, self$read_next()) + mat <- mat + self$read_next() } - do.call(arrow::concat_tables, rl) + mat }), private = list( repr=NULL, - dims=NULL, + dims_one_based=NULL, ## refined from base class soma_reader_transform = function(x) { - arrow_table_to_sparse(soma_array_to_arrow(x), repr = private$repr, dims = private$dims) - } + arrow_table_to_sparse(soma_array_to_arrow_table(x), repr = private$repr, dims_one_based = private$dims_one_based) + } ) ) diff --git a/apis/r/R/TableReadFull.R b/apis/r/R/TableReadFull.R deleted file mode 100644 index dc9b5e581f..0000000000 --- a/apis/r/R/TableReadFull.R +++ /dev/null @@ -1,17 +0,0 @@ -#' TableReadFull -#' -#' @description TODO -#' @export - -TableReadFull <- R6::R6Class( - classname = "TableReadFull", - inherit = ReadFull, - - private = list( - ## refined from base class - soma_reader_transform = function(x) { - arrow::as_arrow_table(arrow::RecordBatch$import_from_c(x[[1]], x[[2]])) - } - - ) -) diff --git a/apis/r/R/TableReadIter.R b/apis/r/R/TableReadIter.R index 1d48ee68ae..6e1a9959c9 100644 --- a/apis/r/R/TableReadIter.R +++ b/apis/r/R/TableReadIter.R @@ -1,9 +1,9 @@ -#' TableReadIter +#' SOMA Read Iterator over Arrow Table #' #' @description #' `TableReadIter` is a class that allows for iteration over -#' the results of a read operation from SOMA objects#' -#' @importFrom stats setNames +#' a reads on \link{SOMASparseNDArray} and \link{SOMADataFrame}. +#' Iteration chunks are retrieved as Arrow Tables #' @export TableReadIter <- R6::R6Class( @@ -12,16 +12,22 @@ TableReadIter <- R6::R6Class( public = list( - ## refined from base class + #' @description Concatenate remainder of iterator + #' @return \link[Arrow]{Table} concat = function(){ - rl <- list() + if(self$read_complete()) { + warning("Iteration complete, returning NULL") + return(NULL) + } + + tbl <- self$read_next() while (!self$read_complete()) { - rl <- c(rl, self$read_next()) + tbl <- arrow::concat_tables(tbl, self$read_next()) } - do.call(arrow::concat_tables, rl) + tbl }), @@ -29,7 +35,7 @@ TableReadIter <- R6::R6Class( ## refined from base class soma_reader_transform = function(x) { - soma_array_to_arrow(x) + soma_array_to_arrow_table(x) } ) diff --git a/apis/r/R/utils-matrixZeroBasedView.R b/apis/r/R/utils-matrixZeroBasedView.R index 7467972746..2222e202a2 100644 --- a/apis/r/R/utils-matrixZeroBasedView.R +++ b/apis/r/R/utils-matrixZeroBasedView.R @@ -15,8 +15,8 @@ #' @return Shim providing elemental access to the matrix using zero-based indexes. #' @export matrixZeroBasedView <- function(one_based_matrix) { - if (!inherits(one_based_matrix, "matrix") && !inherits(one_based_matrix, "sparseMatrix")) { - stop("Matrix object must inherit class matrix or Matrix::sparseMatrix.") + if (!inherits(one_based_matrix, "matrix") && !inherits(one_based_matrix, "sparseMatrix") && !inherits(one_based_matrix, "denseMatrix")) { + stop("Matrix object must inherit class matrix or Matrix::sparseMatrix or Matrix:denseMatrix") } structure(list(one_based_matrix = one_based_matrix), class = "matrixZeroBasedView") } @@ -43,6 +43,27 @@ matrixZeroBasedView <- function(one_based_matrix) { } } +#' + +#' +#' @param e1 left side +#' @param e2 right sidet +#' +#' @return results of sum +#' @export +Ops.matrixZeroBasedView <- function(e1, e2 = NULL) { + + if(inherits(e1, "matrixZeroBasedView")) { + e1 <- e1$one_based_matrix + } + + if(inherits(e2, "matrixZeroBasedView")) { + e2 <- e2$one_based_matrix + } + + matrixZeroBasedView(NextMethod()) + +} + #' dim #' #' @param x The zero-based matrix view. @@ -73,6 +94,17 @@ ncol.matrixZeroBasedView <- function(x) { ncol(x$one_based_matrix) } +#' print +#' +#' @param x The zero-based matrix view. +#' +#' @return Matrix column count. +#' @export +print.matrixZeroBasedView <- function(x) { + cat("Non-mutable 0-based 'view' class for matrices.\n", + "To get 1-based matrix use `x$one_based_matrix` or `as.one.based(x)`\n") +} + #' Get one-based object #' #' @param x The object. diff --git a/apis/r/R/utils-readerTransformers.R b/apis/r/R/utils-readerTransformers.R index 9c94864382..fddf56cd7e 100644 --- a/apis/r/R/utils-readerTransformers.R +++ b/apis/r/R/utils-readerTransformers.R @@ -1,26 +1,69 @@ -soma_array_to_arrow <- function(x) { +#' @description Converts the results of a \link{soma_array_reader} or \link{sr_next} to +#' an \link[Arrow]{Table} +#' @return \link[Arrow]{Table} +soma_array_to_arrow_table <- function(x) { arrow::as_arrow_table(arrow::RecordBatch$import_from_c(x[[1]], x[[2]])) } -arrow_table_to_sparse <- function(tbl, repr, dims = NULL) { - +#' @description Converts a \link[Arrow]{Table} of sparse format (columns: "soma_dim_0", +#' "soma_dim_1", "soma_data") to a \link{matrixZeroBasedView} +#' @param tbl \link[Arrow]{Table} with columns "soma_dim_0", "soma_dim_1", and "soma_datah" +#' @param repr Optional one-character code for sparse matrix representation type +#' @param dims_one_based Numerical vectors with two elements, one for each dimension. If +#' \code{NULL}, then the following is used \code{c(max(tbl["soma_dim_0"]), max(tbl["soma_dim_1"]))} +#' @return \link{matrixZeroBasedView} +arrow_table_to_sparse <- function(tbl, repr = c("C", "T", "R"), dims_one_based = NULL) { + + # To instantiate the one-based Matrix::sparseMatrix, we need to add 1 to the + # zero-based soma_dim_0 and soma_dim_1 (done by arrow_table_to_sparse). But, because these dimensions are + # usually populated with soma_joinid, users will need to access the matrix + # using the original, possibly-zero IDs. Therefore, we'll wrap the one-based + # sparseMatrix with a shim providing basic access with zero-based indexes. + # If needed, user can then explicitly ask the shim for the underlying + # sparseMatrix using `as.one.based()`. + soma_dim_0_one_based <- 1 + as.numeric(tbl$GetColumnByName("soma_dim_0")) soma_dim_1_one_based <- 1 + as.numeric(tbl$GetColumnByName("soma_dim_1")) soma_data <- as.numeric(tbl$GetColumnByName("soma_data")) - if (is.null(dims)) { - dims <- c(max(soma_dim_0_one_based), max(soma_dim_1_one_based)) + if (is.null(dims_one_based)) { + dims_one_based <- c(max(soma_dim_0_one_based), max(soma_dim_1_one_based)) } - if(any(dims > .Machine$integer.max)) { + if(any(dims_one_based > .Machine$integer.max)) { error("The dimensions of the array are larger than supported by Matrix::sparseMatrix") } - Matrix::sparseMatrix(i = soma_dim_0_one_based, - j = soma_dim_1_one_based, - x = soma_data, - dims = dims, repr = repr) + mat <- Matrix::sparseMatrix(i = soma_dim_0_one_based, + j = soma_dim_1_one_based, + x = soma_data, + dims = dims_one_based, repr = repr) + matrixZeroBasedView(mat) +} + +#' @description Converts a \link[Arrow]{Table} of sparse format (columns: "soma_dim_0", +#' "soma_dim_1", "soma_data") to a \link{matrixZeroBasedView} +#' @param tbl \link[Arrow]{Table} with columns "soma_dim_0", "soma_dim_1", and "soma_datah" +#' @param repr Optional one-character code for sparse matrix representation type +#' @param dims_one_based Numerical vectors with two elements, one for each dimension. If +#' \code{NULL}, then the following is used \code{c(max(tbl["soma_dim_0"]), max(tbl["soma_dim_1"]))} +#' @return \link{matrixZeroBasedView} +arrow_table_to_dense <- function(tbl, byrow) { + + # To instantiate the one-based Matrix::sparseMatrix, we need to add 1 to the + # zero-based soma_dim_0 and soma_dim_1 (done by arrow_table_to_sparse). But, because these dimensions are + # usually populated with soma_joinid, users will need to access the matrix + # using the original, possibly-zero IDs. Therefore, we'll wrap the one-based + # sparseMatrix with a shim providing basic access with zero-based indexes. + # If needed, user can then explicitly ask the shim for the underlying + # sparseMatrix using `as.one.based()`. + + nrows <- length(unique(as.numeric(tbl$GetColumnByName("soma_dim_0"))) + ncols <- length(unique(as.numeric(tbl$GetColumnByName("soma_dim_1"))) + soma_data <- as.numeric(tbl$GetColumnByName("soma_data")) + mat <- matrix(soma_data, nrow = nrows, ncol = ncols, byrow = byrow) + matrixZeroBasedView(mat) } From 155585c6d89bf207d29fc584162dffef54023f02 Mon Sep 17 00:00:00 2001 From: Pablo E Garcia-Nieto Date: Thu, 11 May 2023 10:57:47 -0700 Subject: [PATCH 07/18] Update doc strings --- apis/r/NAMESPACE | 5 ++ apis/r/R/utils-readerTransformers.R | 4 +- apis/r/man/DenseReadIter.Rd | 94 +++++++++++++++++++ apis/r/man/Ops.matrixZeroBasedView.Rd | 23 +++++ apis/r/man/ReadIter.Rd | 115 ++++++++++++++++++++++++ apis/r/man/SOMAArrayBase.Rd | 22 ----- apis/r/man/SOMADataFrame.Rd | 2 - apis/r/man/SOMADenseNDArray.Rd | 2 - apis/r/man/SOMASparseNDArray.Rd | 18 ++-- apis/r/man/SparseReadIter.Rd | 100 +++++++++++++++++++++ apis/r/man/TableReadIter.Rd | 60 +++++++++++++ apis/r/man/example-datasets.Rd | 2 +- apis/r/man/print.matrixZeroBasedView.Rd | 17 ++++ apis/r/man/sr_setup.Rd | 8 +- 14 files changed, 431 insertions(+), 41 deletions(-) create mode 100644 apis/r/man/DenseReadIter.Rd create mode 100644 apis/r/man/Ops.matrixZeroBasedView.Rd create mode 100644 apis/r/man/ReadIter.Rd create mode 100644 apis/r/man/SparseReadIter.Rd create mode 100644 apis/r/man/TableReadIter.Rd create mode 100644 apis/r/man/print.matrixZeroBasedView.Rd diff --git a/apis/r/NAMESPACE b/apis/r/NAMESPACE index d6f312117c..76adcc03a7 100644 --- a/apis/r/NAMESPACE +++ b/apis/r/NAMESPACE @@ -4,6 +4,7 @@ S3method("[",matrixZeroBasedView) S3method("[<-",matrixZeroBasedView) S3method("[[",MappingBase) S3method("[[<-",MappingBase) +S3method(Ops,matrixZeroBasedView) S3method(as.list,MappingBase) S3method(as.one.based,matrixZeroBasedView) S3method(dim,matrixZeroBasedView) @@ -11,6 +12,7 @@ S3method(length,MappingBase) S3method(names,MappingBase) S3method(pad_matrix,default) S3method(pad_matrix,matrix) +S3method(print,matrixZeroBasedView) S3method(write_soma,Assay) S3method(write_soma,DimReduc) S3method(write_soma,Graph) @@ -20,6 +22,7 @@ S3method(write_soma,TsparseMatrix) S3method(write_soma,data.frame) S3method(write_soma,matrix) export(ConfigList) +export(DenseReadIter) export(EphemeralCollection) export(EphemeralExperiment) export(EphemeralMeasurement) @@ -50,6 +53,8 @@ export(SOMASparseNDArrayCreate) export(SOMASparseNDArrayOpen) export(SOMATileDBContext) export(ScalarMap) +export(SparseReadIter) +export(TableReadIter) export(TileDBArray) export(TileDBGroup) export(TileDBObject) diff --git a/apis/r/R/utils-readerTransformers.R b/apis/r/R/utils-readerTransformers.R index fddf56cd7e..37a42e689b 100644 --- a/apis/r/R/utils-readerTransformers.R +++ b/apis/r/R/utils-readerTransformers.R @@ -59,8 +59,8 @@ arrow_table_to_dense <- function(tbl, byrow) { # If needed, user can then explicitly ask the shim for the underlying # sparseMatrix using `as.one.based()`. - nrows <- length(unique(as.numeric(tbl$GetColumnByName("soma_dim_0"))) - ncols <- length(unique(as.numeric(tbl$GetColumnByName("soma_dim_1"))) + nrows <- length(unique(as.numeric(tbl$GetColumnByName("soma_dim_0")))) + ncols <- length(unique(as.numeric(tbl$GetColumnByName("soma_dim_1")))) soma_data <- as.numeric(tbl$GetColumnByName("soma_data")) mat <- matrix(soma_data, nrow = nrows, ncol = ncols, byrow = byrow) diff --git a/apis/r/man/DenseReadIter.Rd b/apis/r/man/DenseReadIter.Rd new file mode 100644 index 0000000000..ee6bc1bf6b --- /dev/null +++ b/apis/r/man/DenseReadIter.Rd @@ -0,0 +1,94 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/DenseReadIter.R +\name{DenseReadIter} +\alias{DenseReadIter} +\title{DenseReadIter} +\description{ +\code{DenseReadIter} is a class that allows for iteration over +a reads on \link{SOMADenseNDArray}. +Iteration chunks are retrieved as 0-based Views of \link[base]{matrix}. +} +\section{Super class}{ +\code{tiledbsoma::ReadIter} -> \code{DenseReadIter} +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-DenseReadIter-new}{\code{DenseReadIter$new()}} +\item \href{#method-DenseReadIter-concat}{\code{DenseReadIter$concat()}} +\item \href{#method-DenseReadIter-clone}{\code{DenseReadIter$clone()}} +} +} +\if{html}{\out{ +
Inherited methods + +
+}} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-DenseReadIter-new}{}}} +\subsection{Method \code{new()}}{ +Create (lifecycle: experimental) +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{DenseReadIter$new( + uri, + config, + dim_points = NULL, + result_order = NULL, + loglevel = "auto" +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{uri}}{Character value with URI path to a SOMADataFrame or SOMASparseNDArray} + +\item{\code{config}}{character vector containing TileDB config. +no query condition} + +\item{\code{dim_points}}{Optional named list with vector of data points to select on the given +dimension(s). Each dimension can be one entry in the list.} + +\item{\code{loglevel}}{Character value with the desired logging level, defaults to \sQuote{auto}} + +\item{\code{repr}}{Optional one-character code for sparse matrix representation type +which lets prior setting prevail, any other value is set as new logging level.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-DenseReadIter-concat}{}}} +\subsection{Method \code{concat()}}{ +Concatenate remainder of iterator +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{DenseReadIter$concat()}\if{html}{\out{
}} +} + +\subsection{Returns}{ +\link{matrixZeroBasedView} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-DenseReadIter-clone}{}}} +\subsection{Method \code{clone()}}{ +The objects of this class are cloneable with this method. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{DenseReadIter$clone(deep = FALSE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{deep}}{Whether to make a deep clone.} +} +\if{html}{\out{
}} +} +} +} diff --git a/apis/r/man/Ops.matrixZeroBasedView.Rd b/apis/r/man/Ops.matrixZeroBasedView.Rd new file mode 100644 index 0000000000..554414fd38 --- /dev/null +++ b/apis/r/man/Ops.matrixZeroBasedView.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils-matrixZeroBasedView.R +\name{Ops.matrixZeroBasedView} +\alias{Ops.matrixZeroBasedView} +\title{\itemize{ +\item +}} +\usage{ +\method{Ops}{matrixZeroBasedView}(e1, e2 = NULL) +} +\arguments{ +\item{e1}{left side} + +\item{e2}{right sidet} +} +\value{ +results of sum +} +\description{ +\itemize{ +\item +} +} diff --git a/apis/r/man/ReadIter.Rd b/apis/r/man/ReadIter.Rd new file mode 100644 index 0000000000..e43c230ccf --- /dev/null +++ b/apis/r/man/ReadIter.Rd @@ -0,0 +1,115 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ReadIter.R +\name{ReadIter} +\alias{ReadIter} +\title{SOMA Read Iterator Base class} +\description{ +SOMA Read Iterator Base class + +SOMA Read Iterator Base class +} +\details{ +Class that allows for read iteration of SOMA reads. +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-ReadIter-new}{\code{ReadIter$new()}} +\item \href{#method-ReadIter-read_complete}{\code{ReadIter$read_complete()}} +\item \href{#method-ReadIter-read_next}{\code{ReadIter$read_next()}} +\item \href{#method-ReadIter-concat}{\code{ReadIter$concat()}} +\item \href{#method-ReadIter-clone}{\code{ReadIter$clone()}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ReadIter-new}{}}} +\subsection{Method \code{new()}}{ +Create (lifecycle: experimental) +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ReadIter$new( + uri, + config, + colnames = NULL, + qc = NULL, + dim_points = NULL, + loglevel = "auto" +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{uri}}{Character value with URI path to a SOMADataFrame or SOMASparseNDArray} + +\item{\code{config}}{character vector containing TileDB config.} + +\item{\code{colnames}}{Optional vector of character value with the name of the columns to retrieve} + +\item{\code{qc}}{Optional external Pointer object to TileDB Query Condition, defaults to \sQuote{NULL} i.e. +no query condition} + +\item{\code{dim_points}}{Optional named list with vector of data points to select on the given +dimension(s). Each dimension can be one entry in the list.} + +\item{\code{loglevel}}{Character value with the desired logging level, defaults to \sQuote{auto} +which lets prior setting prevail, any other value is set as new logging level.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ReadIter-read_complete}{}}} +\subsection{Method \code{read_complete()}}{ +Check if iterated read is complete or not. (lifecycle: experimental) +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ReadIter$read_complete()}\if{html}{\out{
}} +} + +\subsection{Returns}{ +logical +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ReadIter-read_next}{}}} +\subsection{Method \code{read_next()}}{ +Read the next chunk of an iterated read. (lifecycle: experimental). +If read is complete, retunrs \code{NULL} and raises warning. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ReadIter$read_next()}\if{html}{\out{
}} +} + +\subsection{Returns}{ +\code{NULL} or one of \link[Arrow]{Table}, \link{matrixZeroBasedView} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ReadIter-concat}{}}} +\subsection{Method \code{concat()}}{ +Concatenate remainder of iterator +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ReadIter$concat()}\if{html}{\out{
}} +} + +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ReadIter-clone}{}}} +\subsection{Method \code{clone()}}{ +The objects of this class are cloneable with this method. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ReadIter$clone(deep = FALSE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{deep}}{Whether to make a deep clone.} +} +\if{html}{\out{
}} +} +} +} diff --git a/apis/r/man/SOMAArrayBase.Rd b/apis/r/man/SOMAArrayBase.Rd index 01c3a74d63..072c87686c 100644 --- a/apis/r/man/SOMAArrayBase.Rd +++ b/apis/r/man/SOMAArrayBase.Rd @@ -24,8 +24,6 @@ Adds SOMA-specific functionality to the \code{\link{TileDBArray}} class. (lifec \section{Methods}{ \subsection{Public methods}{ \itemize{ -\item \href{#method-SOMAArrayBase-read_complete}{\code{SOMAArrayBase$read_complete()}} -\item \href{#method-SOMAArrayBase-read_next}{\code{SOMAArrayBase$read_next()}} \item \href{#method-SOMAArrayBase-clone}{\code{SOMAArrayBase$clone()}} } } @@ -58,26 +56,6 @@ Adds SOMA-specific functionality to the \code{\link{TileDBArray}} class. (lifec }} \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-SOMAArrayBase-read_complete}{}}} -\subsection{Method \code{read_complete()}}{ -Check if iterated read is complete or not. (lifecycle: experimental) -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{SOMAArrayBase$read_complete()}\if{html}{\out{
}} -} - -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-SOMAArrayBase-read_next}{}}} -\subsection{Method \code{read_next()}}{ -Read the next chunk of an iterated read. (lifecycle: experimental) -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{SOMAArrayBase$read_next()}\if{html}{\out{
}} -} - -} -\if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-SOMAArrayBase-clone}{}}} \subsection{Method \code{clone()}}{ diff --git a/apis/r/man/SOMADataFrame.Rd b/apis/r/man/SOMADataFrame.Rd index c05ec68350..2119c7b3a9 100644 --- a/apis/r/man/SOMADataFrame.Rd +++ b/apis/r/man/SOMADataFrame.Rd @@ -46,8 +46,6 @@ row and is intended to act as a join key for other objects, such as
  • tiledbsoma::TileDBArray$shape()
  • tiledbsoma::TileDBArray$tiledb_array()
  • tiledbsoma::TileDBArray$tiledb_schema()
  • -
  • tiledbsoma::SOMAArrayBase$read_complete()
  • -
  • tiledbsoma::SOMAArrayBase$read_next()
  • }} diff --git a/apis/r/man/SOMADenseNDArray.Rd b/apis/r/man/SOMADenseNDArray.Rd index d35d4ef4b8..15b09ce65c 100644 --- a/apis/r/man/SOMADenseNDArray.Rd +++ b/apis/r/man/SOMADenseNDArray.Rd @@ -62,8 +62,6 @@ The \code{write} method is currently limited to writing from 2-d matrices.
  • tiledbsoma::TileDBArray$shape()
  • tiledbsoma::TileDBArray$tiledb_array()
  • tiledbsoma::TileDBArray$tiledb_schema()
  • -
  • tiledbsoma::SOMAArrayBase$read_complete()
  • -
  • tiledbsoma::SOMAArrayBase$read_next()
  • }} diff --git a/apis/r/man/SOMASparseNDArray.Rd b/apis/r/man/SOMASparseNDArray.Rd index f49ca36731..e2eb88af1e 100644 --- a/apis/r/man/SOMASparseNDArray.Rd +++ b/apis/r/man/SOMASparseNDArray.Rd @@ -30,7 +30,7 @@ the object are overwritten and new index values are added. (lifecycle: experimen \itemize{ \item \href{#method-SOMASparseNDArray-create}{\code{SOMASparseNDArray$create()}} \item \href{#method-SOMASparseNDArray-read_arrow_table}{\code{SOMASparseNDArray$read_arrow_table()}} -\item \href{#method-SOMASparseNDArray-read_sparse_matrix}{\code{SOMASparseNDArray$read_sparse_matrix()}} +\item \href{#method-SOMASparseNDArray-read_sparse_matrix_zero_based}{\code{SOMASparseNDArray$read_sparse_matrix_zero_based()}} \item \href{#method-SOMASparseNDArray-write}{\code{SOMASparseNDArray$write()}} \item \href{#method-SOMASparseNDArray-nnz}{\code{SOMASparseNDArray$nnz()}} \item \href{#method-SOMASparseNDArray-clone}{\code{SOMASparseNDArray$clone()}} @@ -61,8 +61,6 @@ the object are overwritten and new index values are added. (lifecycle: experimen
  • tiledbsoma::TileDBArray$shape()
  • tiledbsoma::TileDBArray$tiledb_array()
  • tiledbsoma::TileDBArray$tiledb_schema()
  • -
  • tiledbsoma::SOMAArrayBase$read_complete()
  • -
  • tiledbsoma::SOMAArrayBase$read_next()
  • }} @@ -124,12 +122,12 @@ An \code{\link[arrow:Table]{arrow::Table}}. } } \if{html}{\out{
    }} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-SOMASparseNDArray-read_sparse_matrix}{}}} -\subsection{Method \code{read_sparse_matrix()}}{ +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-SOMASparseNDArray-read_sparse_matrix_zero_based}{}}} +\subsection{Method \code{read_sparse_matrix_zero_based()}}{ Read as a sparse matrix (lifecycle: experimental) \subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{SOMASparseNDArray$read_sparse_matrix( +\if{html}{\out{
    }}\preformatted{SOMASparseNDArray$read_sparse_matrix_zero_based( coords = NULL, result_order = "auto", repr = c("C", "T", "R"), @@ -161,7 +159,11 @@ read. List elements can be named when specifying a subset of dimensions.} \if{html}{\out{
    }} } \subsection{Returns}{ -A \code{matrix} object +A \code{matrix}-like object accessed using zero-based indexes. It supports +only basic access operations with zero-based indexes as well as \code{dim()}, +\code{nrow()}, and \code{ncol()}. Use \code{as.one.based()} to get a fully-featured +sparse matrix object supporting more advanced operations (with one-based +indexing). } } \if{html}{\out{
    }} diff --git a/apis/r/man/SparseReadIter.Rd b/apis/r/man/SparseReadIter.Rd new file mode 100644 index 0000000000..af6a971c13 --- /dev/null +++ b/apis/r/man/SparseReadIter.Rd @@ -0,0 +1,100 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/SparseReadIter.R +\name{SparseReadIter} +\alias{SparseReadIter} +\title{SparseReadIter} +\description{ +\code{SparseReadIter} is a class that allows for iteration over +a reads on \link{SOMASparseNDArray}. +Iteration chunks are retrieved as 0-based Views of \link[Matrix]{SparseMatrix}. +} +\section{Super class}{ +\code{tiledbsoma::ReadIter} -> \code{SparseReadIter} +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-SparseReadIter-new}{\code{SparseReadIter$new()}} +\item \href{#method-SparseReadIter-concat}{\code{SparseReadIter$concat()}} +\item \href{#method-SparseReadIter-clone}{\code{SparseReadIter$clone()}} +} +} +\if{html}{\out{ +
    Inherited methods + +
    +}} +\if{html}{\out{
    }} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-SparseReadIter-new}{}}} +\subsection{Method \code{new()}}{ +Create (lifecycle: experimental) +\subsection{Usage}{ +\if{html}{\out{
    }}\preformatted{SparseReadIter$new( + uri, + config, + colnames = NULL, + qc = NULL, + dim_points = NULL, + loglevel = "auto", + repr = c("C", "T", "R") +)}\if{html}{\out{
    }} +} + +\subsection{Arguments}{ +\if{html}{\out{
    }} +\describe{ +\item{\code{uri}}{Character value with URI path to a SOMADataFrame or SOMASparseNDArray} + +\item{\code{config}}{character vector containing TileDB config.} + +\item{\code{colnames}}{Optional vector of character value with the name of the columns to retrieve} + +\item{\code{qc}}{Optional external Pointer object to TileDB Query Condition, defaults to \sQuote{NULL} i.e. +no query condition} + +\item{\code{dim_points}}{Optional named list with vector of data points to select on the given +dimension(s). Each dimension can be one entry in the list.} + +\item{\code{loglevel}}{Character value with the desired logging level, defaults to \sQuote{auto}} + +\item{\code{repr}}{Optional one-character code for sparse matrix representation type +which lets prior setting prevail, any other value is set as new logging level.} +} +\if{html}{\out{
    }} +} +} +\if{html}{\out{
    }} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-SparseReadIter-concat}{}}} +\subsection{Method \code{concat()}}{ +Concatenate remainder of iterator +\subsection{Usage}{ +\if{html}{\out{
    }}\preformatted{SparseReadIter$concat()}\if{html}{\out{
    }} +} + +\subsection{Returns}{ +\link{matrixZeroBasedView} +} +} +\if{html}{\out{
    }} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-SparseReadIter-clone}{}}} +\subsection{Method \code{clone()}}{ +The objects of this class are cloneable with this method. +\subsection{Usage}{ +\if{html}{\out{
    }}\preformatted{SparseReadIter$clone(deep = FALSE)}\if{html}{\out{
    }} +} + +\subsection{Arguments}{ +\if{html}{\out{
    }} +\describe{ +\item{\code{deep}}{Whether to make a deep clone.} +} +\if{html}{\out{
    }} +} +} +} diff --git a/apis/r/man/TableReadIter.Rd b/apis/r/man/TableReadIter.Rd new file mode 100644 index 0000000000..5562161a3c --- /dev/null +++ b/apis/r/man/TableReadIter.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/TableReadIter.R +\name{TableReadIter} +\alias{TableReadIter} +\title{SOMA Read Iterator over Arrow Table} +\description{ +\code{TableReadIter} is a class that allows for iteration over +a reads on \link{SOMASparseNDArray} and \link{SOMADataFrame}. +Iteration chunks are retrieved as Arrow Tables +} +\section{Super class}{ +\code{tiledbsoma::ReadIter} -> \code{TableReadIter} +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-TableReadIter-concat}{\code{TableReadIter$concat()}} +\item \href{#method-TableReadIter-clone}{\code{TableReadIter$clone()}} +} +} +\if{html}{\out{ +
    Inherited methods + +
    +}} +\if{html}{\out{
    }} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-TableReadIter-concat}{}}} +\subsection{Method \code{concat()}}{ +Concatenate remainder of iterator +\subsection{Usage}{ +\if{html}{\out{
    }}\preformatted{TableReadIter$concat()}\if{html}{\out{
    }} +} + +\subsection{Returns}{ +\link[Arrow]{Table} +} +} +\if{html}{\out{
    }} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-TableReadIter-clone}{}}} +\subsection{Method \code{clone()}}{ +The objects of this class are cloneable with this method. +\subsection{Usage}{ +\if{html}{\out{
    }}\preformatted{TableReadIter$clone(deep = FALSE)}\if{html}{\out{
    }} +} + +\subsection{Arguments}{ +\if{html}{\out{
    }} +\describe{ +\item{\code{deep}}{Whether to make a deep clone.} +} +\if{html}{\out{
    }} +} +} +} diff --git a/apis/r/man/example-datasets.Rd b/apis/r/man/example-datasets.Rd index b6e4cb12da..683e381cf8 100644 --- a/apis/r/man/example-datasets.Rd +++ b/apis/r/man/example-datasets.Rd @@ -29,7 +29,7 @@ load_dataset(name, dir = tempdir()) } \itemize{ -\item \code{load_dataset()} returns an SOMA object. +\item \code{load_dataset()} returns a SOMA object. } } \description{ diff --git a/apis/r/man/print.matrixZeroBasedView.Rd b/apis/r/man/print.matrixZeroBasedView.Rd new file mode 100644 index 0000000000..bece76ed93 --- /dev/null +++ b/apis/r/man/print.matrixZeroBasedView.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils-matrixZeroBasedView.R +\name{print.matrixZeroBasedView} +\alias{print.matrixZeroBasedView} +\title{print} +\usage{ +\method{print}{matrixZeroBasedView}(x) +} +\arguments{ +\item{x}{The zero-based matrix view.} +} +\value{ +Matrix column count. +} +\description{ +print +} diff --git a/apis/r/man/sr_setup.Rd b/apis/r/man/sr_setup.Rd index e9a9f8c658..9bae6aaa05 100644 --- a/apis/r/man/sr_setup.Rd +++ b/apis/r/man/sr_setup.Rd @@ -4,7 +4,7 @@ \alias{sr_setup} \alias{sr_complete} \alias{sr_next} -\title{Iterator-Style Access to SOMA Array via SOMAArrayReader} +\title{Iterator-Style Access to SOMA Array via SOMAArray} \usage{ sr_setup( uri, @@ -40,14 +40,14 @@ for the given dimension. Each dimension can be one entry in the list.} \item{loglevel}{Character value with the desired logging level, defaults to \sQuote{auto} which lets prior setting prevail, any other value is set as new logging level.} -\item{sr}{An external pointer to a TileDB SOMAArrayReader object} +\item{sr}{An external pointer to a TileDB SOMAArray object} } \value{ -\code{sr_setup} returns an external pointer to a SOMAArrayReader. \code{sr_complete} +\code{sr_setup} returns an external pointer to a SOMAArray. \code{sr_complete} returns a boolean, and \code{sr_next} returns an Arrow array helper object. } \description{ -The \verb{sr_*} functions provide low-level access to an instance of the SOMAArrayReader +The \verb{sr_*} functions provide low-level access to an instance of the SOMAArray class so that iterative access over parts of a (large) array is possible. \describe{ \item{\code{sr_setup}}{instantiates and by default also submits a query} From a415191d25f645cdac0cfe89929c6c191a608426 Mon Sep 17 00:00:00 2001 From: Pablo E Garcia-Nieto Date: Thu, 11 May 2023 11:44:42 -0700 Subject: [PATCH 08/18] update docs --- apis/r/R/ReadIter.R | 2 +- apis/r/R/SOMADataFrame.R | 2 +- apis/r/R/SOMASparseNDArray.R | 17 ++++++++++------- apis/r/R/SparseReadIter.R | 6 +++--- apis/r/R/TableReadIter.R | 6 +++--- apis/r/R/utils-matrixZeroBasedView.R | 3 ++- apis/r/R/utils-readerTransformers.R | 8 ++++---- apis/r/man/ConfigList.Rd | 2 +- apis/r/man/DenseReadIter.Rd | 2 +- apis/r/man/EphemeralCollection.Rd | 2 +- apis/r/man/EphemeralCollectionBase.Rd | 2 +- apis/r/man/EphemeralExperiment.Rd | 2 +- apis/r/man/EphemeralMeasurement.Rd | 2 +- apis/r/man/PlatformConfig.Rd | 2 +- apis/r/man/ReadIter.Rd | 2 +- apis/r/man/SOMAArrayBase.Rd | 2 +- apis/r/man/SOMACollection.Rd | 2 +- apis/r/man/SOMACollectionBase.Rd | 2 +- apis/r/man/SOMAContextBase.Rd | 2 +- apis/r/man/SOMADataFrame.Rd | 4 ++-- apis/r/man/SOMADenseNDArray.Rd | 2 +- apis/r/man/SOMAExperiment.Rd | 2 +- apis/r/man/SOMAMeasurement.Rd | 2 +- apis/r/man/SOMASparseNDArray.Rd | 19 +++++++++++-------- apis/r/man/SOMATileDBContext.Rd | 2 +- apis/r/man/ScalarMap.Rd | 2 +- apis/r/man/SparseReadIter.Rd | 8 ++++---- apis/r/man/TableReadIter.Rd | 8 ++++---- apis/r/man/TileDBArray.Rd | 2 +- apis/r/man/TileDBCreateOptions.Rd | 2 +- apis/r/man/TileDBGroup.Rd | 2 +- 31 files changed, 65 insertions(+), 58 deletions(-) diff --git a/apis/r/R/ReadIter.R b/apis/r/R/ReadIter.R index 067afd1b43..c412889922 100644 --- a/apis/r/R/ReadIter.R +++ b/apis/r/R/ReadIter.R @@ -47,7 +47,7 @@ ReadIter <- R6::R6Class( #' @description Read the next chunk of an iterated read. (lifecycle: experimental). #' If read is complete, retunrs `NULL` and raises warning. - #' @return \code{NULL} or one of \link[Arrow]{Table}, \link{matrixZeroBasedView} + #' @return \code{NULL} or one of arrow::\link[arrow]{Table}, \link{matrixZeroBasedView} read_next = function() { if (is.null(private$soma_reader_pointer)) { NULL diff --git a/apis/r/R/SOMADataFrame.R b/apis/r/R/SOMADataFrame.R index ec5f34f740..da8cb91803 100644 --- a/apis/r/R/SOMADataFrame.R +++ b/apis/r/R/SOMADataFrame.R @@ -170,7 +170,7 @@ SOMADataFrame <- R6::R6Class( #' @param iterated Option boolean indicated whether data is read in call (when #' `FALSE`, the default value) or in several iterated steps. #' @param log_level Optional logging level with default value of `"warn"`. - #' @return An [`arrow::Table`]. + #' @return arrow::\link[arrow]{Table} or \link{TableReadIter} read = function(coords = NULL, column_names = NULL, value_filter = NULL, diff --git a/apis/r/R/SOMASparseNDArray.R b/apis/r/R/SOMASparseNDArray.R index f490f46b7e..260a093f11 100644 --- a/apis/r/R/SOMASparseNDArray.R +++ b/apis/r/R/SOMASparseNDArray.R @@ -105,7 +105,7 @@ SOMASparseNDArray <- R6::R6Class( #' @param iterated Option boolean indicated whether data is read in call (when #' `FALSE`, the default value) or in several iterated steps. #' @param log_level Optional logging level with default value of `"warn"`. - #' @return An [`arrow::Table`]. + #' @return arrow::\link[arrow]{Table} or \link{TableReadIter} read_arrow_table = function( coords = NULL, result_order = "auto", @@ -139,7 +139,13 @@ SOMASparseNDArray <- R6::R6Class( } }, - #' @description Read as a sparse matrix (lifecycle: experimental) + #' @description Read as a sparse matrix (lifecycle: experimental). Returns + #' a `matrix`-like object accessed using zero-based indexes or an iterator + #' of those. The matrix-like objects supports only basic access operations + #' with zero-based indexes as well as `dim()`,`nrow()`, and `ncol()` and + #' arithmetic operations as defined in \link[base]{groupGeneric}. + #' Use `as.one.based()` to get a fully-featured sparse matrix object supporting + #' more advanced operations (with one-based indexing). #' @param coords Optional `list` of integer vectors, one for each dimension, with a #' length equal to the number of values to read. If `NULL`, all values are #' read. List elements can be named when specifying a subset of dimensions. @@ -148,11 +154,8 @@ SOMASparseNDArray <- R6::R6Class( #' @param iterated Option boolean indicated whether data is read in call (when #' `FALSE`, the default value) or in several iterated steps. #' @param log_level Optional logging level with default value of `"warn"`. - #' @return A `matrix`-like object accessed using zero-based indexes. It supports - #' only basic access operations with zero-based indexes as well as `dim()`, - #' `nrow()`, and `ncol()`. Use `as.one.based()` to get a fully-featured - #' sparse matrix object supporting more advanced operations (with one-based - #' indexing). + #' @return \link{matrixZeroBasedView} of Matrix::\link[Matrix]{SparseMatrix} or + #' \link{SparseReadIter} read_sparse_matrix_zero_based = function( coords = NULL, result_order = "auto", diff --git a/apis/r/R/SparseReadIter.R b/apis/r/R/SparseReadIter.R index 2bf2721554..742cfb5ea2 100644 --- a/apis/r/R/SparseReadIter.R +++ b/apis/r/R/SparseReadIter.R @@ -3,7 +3,7 @@ #' @description #' \code{SparseReadIter} is a class that allows for iteration over #' a reads on \link{SOMASparseNDArray}. -#' Iteration chunks are retrieved as 0-based Views of \link[Matrix]{SparseMatrix}. +#' Iteration chunks are retrieved as 0-based Views \link{matrixZeroBasedView} of Matrix::\link[Matrix]{SparseMatrix}. #' @export SparseReadIter <- R6::R6Class( @@ -47,8 +47,8 @@ SparseReadIter <- R6::R6Class( private$dims_one_based <- c(max_soma_dim_0 + 1, max_soma_dim_1 + 1) }, - #' @description Concatenate remainder of iterator - #' @return \link{matrixZeroBasedView} + #' @description Concatenate remainder of iterator. + #' @return \link{matrixZeroBasedView} of Matrix::\link[Matrix]{SparseMatrix} concat = function(){ if(self$read_complete()) { diff --git a/apis/r/R/TableReadIter.R b/apis/r/R/TableReadIter.R index 6e1a9959c9..d739c10f64 100644 --- a/apis/r/R/TableReadIter.R +++ b/apis/r/R/TableReadIter.R @@ -3,7 +3,7 @@ #' @description #' `TableReadIter` is a class that allows for iteration over #' a reads on \link{SOMASparseNDArray} and \link{SOMADataFrame}. -#' Iteration chunks are retrieved as Arrow Tables +#' Iteration chunks are retrieved as arrow::\link[arrow]{Table} #' @export TableReadIter <- R6::R6Class( @@ -12,8 +12,8 @@ TableReadIter <- R6::R6Class( public = list( - #' @description Concatenate remainder of iterator - #' @return \link[Arrow]{Table} + #' @description Concatenate remainder of iterator. + #' @return arrow::\link[arrow]{Table} concat = function(){ if(self$read_complete()) { diff --git a/apis/r/R/utils-matrixZeroBasedView.R b/apis/r/R/utils-matrixZeroBasedView.R index 2222e202a2..895427e722 100644 --- a/apis/r/R/utils-matrixZeroBasedView.R +++ b/apis/r/R/utils-matrixZeroBasedView.R @@ -6,7 +6,8 @@ # - In particular, if M0[i,j] is vector- or matrix-valued, then the returned # vector/matrix is ONE-based. # - as.one.based(M0) returns M1. -# - The only other supported operations are: dim(M0), nrow(M0), ncol(M0). +# - The only other supported operations are: dim(M0), nrow(M0), ncol(M0), and arihmetic +# operations as defined in \link[base]{groupGeneric} #' Zero-based matrix shim #' diff --git a/apis/r/R/utils-readerTransformers.R b/apis/r/R/utils-readerTransformers.R index 37a42e689b..ca7b020fb2 100644 --- a/apis/r/R/utils-readerTransformers.R +++ b/apis/r/R/utils-readerTransformers.R @@ -1,6 +1,6 @@ #' @description Converts the results of a \link{soma_array_reader} or \link{sr_next} to -#' an \link[Arrow]{Table} -#' @return \link[Arrow]{Table} +#' an arrow::\link[arrow]{Table} +#' @return arrow::\link[arrow]{Table} soma_array_to_arrow_table <- function(x) { arrow::as_arrow_table(arrow::RecordBatch$import_from_c(x[[1]], x[[2]])) } @@ -11,7 +11,7 @@ soma_array_to_arrow_table <- function(x) { #' @param repr Optional one-character code for sparse matrix representation type #' @param dims_one_based Numerical vectors with two elements, one for each dimension. If #' \code{NULL}, then the following is used \code{c(max(tbl["soma_dim_0"]), max(tbl["soma_dim_1"]))} -#' @return \link{matrixZeroBasedView} +#' @return \link{matrixZeroBasedView} of Matrix::\link[Matrix]{SparseMatrix} arrow_table_to_sparse <- function(tbl, repr = c("C", "T", "R"), dims_one_based = NULL) { # To instantiate the one-based Matrix::sparseMatrix, we need to add 1 to the @@ -48,7 +48,7 @@ arrow_table_to_sparse <- function(tbl, repr = c("C", "T", "R"), dims_one_based = #' @param repr Optional one-character code for sparse matrix representation type #' @param dims_one_based Numerical vectors with two elements, one for each dimension. If #' \code{NULL}, then the following is used \code{c(max(tbl["soma_dim_0"]), max(tbl["soma_dim_1"]))} -#' @return \link{matrixZeroBasedView} +#' @return \link{matrixZeroBasedView} of \link[base]{matrix} arrow_table_to_dense <- function(tbl, byrow) { # To instantiate the one-based Matrix::sparseMatrix, we need to add 1 to the diff --git a/apis/r/man/ConfigList.Rd b/apis/r/man/ConfigList.Rd index 7e9c9dcf84..52ba1f9a1d 100644 --- a/apis/r/man/ConfigList.Rd +++ b/apis/r/man/ConfigList.Rd @@ -10,7 +10,7 @@ Essentially, serves as a nested map where the inner map is a \code{\{: \link[tiledbsoma:ScalarMap]{\{: \}}\}} } \section{Super class}{ -\code{tiledbsoma::MappingBase} -> \code{ConfigList} +\code{\link[tiledbsoma:MappingBase]{tiledbsoma::MappingBase}} -> \code{ConfigList} } \section{Methods}{ \subsection{Public methods}{ diff --git a/apis/r/man/DenseReadIter.Rd b/apis/r/man/DenseReadIter.Rd index ee6bc1bf6b..33603c5a75 100644 --- a/apis/r/man/DenseReadIter.Rd +++ b/apis/r/man/DenseReadIter.Rd @@ -9,7 +9,7 @@ a reads on \link{SOMADenseNDArray}. Iteration chunks are retrieved as 0-based Views of \link[base]{matrix}. } \section{Super class}{ -\code{tiledbsoma::ReadIter} -> \code{DenseReadIter} +\code{\link[tiledbsoma:ReadIter]{tiledbsoma::ReadIter}} -> \code{DenseReadIter} } \section{Methods}{ \subsection{Public methods}{ diff --git a/apis/r/man/EphemeralCollection.Rd b/apis/r/man/EphemeralCollection.Rd index a49b32d8db..17e25f8acb 100644 --- a/apis/r/man/EphemeralCollection.Rd +++ b/apis/r/man/EphemeralCollection.Rd @@ -8,7 +8,7 @@ } \keyword{internal} \section{Super classes}{ -\code{tiledbsoma::TileDBObject} -> \code{tiledbsoma::TileDBGroup} -> \code{tiledbsoma::SOMACollectionBase} -> \code{tiledbsoma::EphemeralCollectionBase} -> \code{EphemeralCollection} +\code{\link[tiledbsoma:TileDBObject]{tiledbsoma::TileDBObject}} -> \code{\link[tiledbsoma:TileDBGroup]{tiledbsoma::TileDBGroup}} -> \code{\link[tiledbsoma:SOMACollectionBase]{tiledbsoma::SOMACollectionBase}} -> \code{\link[tiledbsoma:EphemeralCollectionBase]{tiledbsoma::EphemeralCollectionBase}} -> \code{EphemeralCollection} } \section{Active bindings}{ \if{html}{\out{
    }} diff --git a/apis/r/man/EphemeralCollectionBase.Rd b/apis/r/man/EphemeralCollectionBase.Rd index 57c57b46ca..e5042fd38a 100644 --- a/apis/r/man/EphemeralCollectionBase.Rd +++ b/apis/r/man/EphemeralCollectionBase.Rd @@ -8,7 +8,7 @@ } \keyword{internal} \section{Super classes}{ -\code{tiledbsoma::TileDBObject} -> \code{tiledbsoma::TileDBGroup} -> \code{tiledbsoma::SOMACollectionBase} -> \code{EphemeralCollectionBase} +\code{\link[tiledbsoma:TileDBObject]{tiledbsoma::TileDBObject}} -> \code{\link[tiledbsoma:TileDBGroup]{tiledbsoma::TileDBGroup}} -> \code{\link[tiledbsoma:SOMACollectionBase]{tiledbsoma::SOMACollectionBase}} -> \code{EphemeralCollectionBase} } \section{Active bindings}{ \if{html}{\out{
    }} diff --git a/apis/r/man/EphemeralExperiment.Rd b/apis/r/man/EphemeralExperiment.Rd index 02b0ddfd88..0a1c7ad026 100644 --- a/apis/r/man/EphemeralExperiment.Rd +++ b/apis/r/man/EphemeralExperiment.Rd @@ -8,7 +8,7 @@ } \keyword{internal} \section{Super classes}{ -\code{tiledbsoma::TileDBObject} -> \code{tiledbsoma::TileDBGroup} -> \code{tiledbsoma::SOMACollectionBase} -> \code{tiledbsoma::EphemeralCollectionBase} -> \code{EphemeralExperiment} +\code{\link[tiledbsoma:TileDBObject]{tiledbsoma::TileDBObject}} -> \code{\link[tiledbsoma:TileDBGroup]{tiledbsoma::TileDBGroup}} -> \code{\link[tiledbsoma:SOMACollectionBase]{tiledbsoma::SOMACollectionBase}} -> \code{\link[tiledbsoma:EphemeralCollectionBase]{tiledbsoma::EphemeralCollectionBase}} -> \code{EphemeralExperiment} } \section{Active bindings}{ \if{html}{\out{
    }} diff --git a/apis/r/man/EphemeralMeasurement.Rd b/apis/r/man/EphemeralMeasurement.Rd index befbe6157f..a89590e560 100644 --- a/apis/r/man/EphemeralMeasurement.Rd +++ b/apis/r/man/EphemeralMeasurement.Rd @@ -8,7 +8,7 @@ } \keyword{internal} \section{Super classes}{ -\code{tiledbsoma::TileDBObject} -> \code{tiledbsoma::TileDBGroup} -> \code{tiledbsoma::SOMACollectionBase} -> \code{tiledbsoma::EphemeralCollectionBase} -> \code{EphemeralMeasurement} +\code{\link[tiledbsoma:TileDBObject]{tiledbsoma::TileDBObject}} -> \code{\link[tiledbsoma:TileDBGroup]{tiledbsoma::TileDBGroup}} -> \code{\link[tiledbsoma:SOMACollectionBase]{tiledbsoma::SOMACollectionBase}} -> \code{\link[tiledbsoma:EphemeralCollectionBase]{tiledbsoma::EphemeralCollectionBase}} -> \code{EphemeralMeasurement} } \section{Active bindings}{ \if{html}{\out{
    }} diff --git a/apis/r/man/PlatformConfig.Rd b/apis/r/man/PlatformConfig.Rd index b8a7b426d8..af1fc2558c 100644 --- a/apis/r/man/PlatformConfig.Rd +++ b/apis/r/man/PlatformConfig.Rd @@ -11,7 +11,7 @@ map is a \code{\link{ScalarMap}} contained within a \code{\link{ConfigList}} \code{\{platform: \{param: \{key: value\}\}\}} } \section{Super class}{ -\code{tiledbsoma::MappingBase} -> \code{PlatformConfig} +\code{\link[tiledbsoma:MappingBase]{tiledbsoma::MappingBase}} -> \code{PlatformConfig} } \section{Methods}{ \subsection{Public methods}{ diff --git a/apis/r/man/ReadIter.Rd b/apis/r/man/ReadIter.Rd index e43c230ccf..9070af48a8 100644 --- a/apis/r/man/ReadIter.Rd +++ b/apis/r/man/ReadIter.Rd @@ -82,7 +82,7 @@ If read is complete, retunrs \code{NULL} and raises warning. } \subsection{Returns}{ -\code{NULL} or one of \link[Arrow]{Table}, \link{matrixZeroBasedView} +\code{NULL} or one of arrow::\link[arrow]{Table}, \link{matrixZeroBasedView} } } \if{html}{\out{
    }} diff --git a/apis/r/man/SOMAArrayBase.Rd b/apis/r/man/SOMAArrayBase.Rd index 072c87686c..319c044cc4 100644 --- a/apis/r/man/SOMAArrayBase.Rd +++ b/apis/r/man/SOMAArrayBase.Rd @@ -12,7 +12,7 @@ SOMA Array Base Class Adds SOMA-specific functionality to the \code{\link{TileDBArray}} class. (lifecycle: experimental) } \section{Super classes}{ -\code{tiledbsoma::TileDBObject} -> \code{tiledbsoma::TileDBArray} -> \code{SOMAArrayBase} +\code{\link[tiledbsoma:TileDBObject]{tiledbsoma::TileDBObject}} -> \code{\link[tiledbsoma:TileDBArray]{tiledbsoma::TileDBArray}} -> \code{SOMAArrayBase} } \section{Active bindings}{ \if{html}{\out{
    }} diff --git a/apis/r/man/SOMACollection.Rd b/apis/r/man/SOMACollection.Rd index ef75e65766..1b358d9eac 100644 --- a/apis/r/man/SOMACollection.Rd +++ b/apis/r/man/SOMACollection.Rd @@ -10,7 +10,7 @@ and the values are any SOMA-defined foundational or composed type, including or \code{SOMAExperiment}. (lifecycle: experimental) } \section{Super classes}{ -\code{tiledbsoma::TileDBObject} -> \code{tiledbsoma::TileDBGroup} -> \code{tiledbsoma::SOMACollectionBase} -> \code{SOMACollection} +\code{\link[tiledbsoma:TileDBObject]{tiledbsoma::TileDBObject}} -> \code{\link[tiledbsoma:TileDBGroup]{tiledbsoma::TileDBGroup}} -> \code{\link[tiledbsoma:SOMACollectionBase]{tiledbsoma::SOMACollectionBase}} -> \code{SOMACollection} } \section{Methods}{ \subsection{Public methods}{ diff --git a/apis/r/man/SOMACollectionBase.Rd b/apis/r/man/SOMACollectionBase.Rd index 67cb2ebd75..790dfe0231 100644 --- a/apis/r/man/SOMACollectionBase.Rd +++ b/apis/r/man/SOMACollectionBase.Rd @@ -11,7 +11,7 @@ Base class for objects containing persistent collection of SOMA objects, mapping string keys to any SOMA object. (lifecycle: experimental) } \section{Super classes}{ -\code{tiledbsoma::TileDBObject} -> \code{tiledbsoma::TileDBGroup} -> \code{SOMACollectionBase} +\code{\link[tiledbsoma:TileDBObject]{tiledbsoma::TileDBObject}} -> \code{\link[tiledbsoma:TileDBGroup]{tiledbsoma::TileDBGroup}} -> \code{SOMACollectionBase} } \section{Active bindings}{ \if{html}{\out{
    }} diff --git a/apis/r/man/SOMAContextBase.Rd b/apis/r/man/SOMAContextBase.Rd index 8d62a0e93e..01ba86a9f2 100644 --- a/apis/r/man/SOMAContextBase.Rd +++ b/apis/r/man/SOMAContextBase.Rd @@ -10,7 +10,7 @@ context options } \keyword{internal} \section{Super classes}{ -\code{tiledbsoma::MappingBase} -> \code{tiledbsoma::ScalarMap} -> \code{SOMAContextBase} +\code{\link[tiledbsoma:MappingBase]{tiledbsoma::MappingBase}} -> \code{\link[tiledbsoma:ScalarMap]{tiledbsoma::ScalarMap}} -> \code{SOMAContextBase} } \section{Methods}{ \subsection{Public methods}{ diff --git a/apis/r/man/SOMADataFrame.Rd b/apis/r/man/SOMADataFrame.Rd index 2119c7b3a9..a8a3d18de3 100644 --- a/apis/r/man/SOMADataFrame.Rd +++ b/apis/r/man/SOMADataFrame.Rd @@ -10,7 +10,7 @@ row and is intended to act as a join key for other objects, such as \code{\link{SOMASparseNDArray}}. (lifecycle: experimental) } \section{Super classes}{ -\code{tiledbsoma::TileDBObject} -> \code{tiledbsoma::TileDBArray} -> \code{tiledbsoma::SOMAArrayBase} -> \code{SOMADataFrame} +\code{\link[tiledbsoma:TileDBObject]{tiledbsoma::TileDBObject}} -> \code{\link[tiledbsoma:TileDBArray]{tiledbsoma::TileDBArray}} -> \code{\link[tiledbsoma:SOMAArrayBase]{tiledbsoma::SOMAArrayBase}} -> \code{SOMADataFrame} } \section{Methods}{ \subsection{Public methods}{ @@ -130,7 +130,7 @@ more information.} \if{html}{\out{
    }} } \subsection{Returns}{ -An \code{\link[arrow:Table]{arrow::Table}}. +arrow::\link[arrow]{Table} or \link{TableReadIter} } } \if{html}{\out{
    }} diff --git a/apis/r/man/SOMADenseNDArray.Rd b/apis/r/man/SOMADenseNDArray.Rd index 15b09ce65c..8bb6ae0152 100644 --- a/apis/r/man/SOMADenseNDArray.Rd +++ b/apis/r/man/SOMADenseNDArray.Rd @@ -25,7 +25,7 @@ The \code{write} method is currently limited to writing from 2-d matrices. (lifecycle: experimental) } \section{Super classes}{ -\code{tiledbsoma::TileDBObject} -> \code{tiledbsoma::TileDBArray} -> \code{tiledbsoma::SOMAArrayBase} -> \code{SOMADenseNDArray} +\code{\link[tiledbsoma:TileDBObject]{tiledbsoma::TileDBObject}} -> \code{\link[tiledbsoma:TileDBArray]{tiledbsoma::TileDBArray}} -> \code{\link[tiledbsoma:SOMAArrayBase]{tiledbsoma::SOMAArrayBase}} -> \code{SOMADenseNDArray} } \section{Methods}{ \subsection{Public methods}{ diff --git a/apis/r/man/SOMAExperiment.Rd b/apis/r/man/SOMAExperiment.Rd index 9c6b271245..8c36713eed 100644 --- a/apis/r/man/SOMAExperiment.Rd +++ b/apis/r/man/SOMAExperiment.Rd @@ -10,7 +10,7 @@ cells (aka a "multimodal dataset") with pre-defined fields: \code{obs} and \code (see \emph{Active Bindings} below for details). (lifecycle: experimental) } \section{Super classes}{ -\code{tiledbsoma::TileDBObject} -> \code{tiledbsoma::TileDBGroup} -> \code{tiledbsoma::SOMACollectionBase} -> \code{SOMAExperiment} +\code{\link[tiledbsoma:TileDBObject]{tiledbsoma::TileDBObject}} -> \code{\link[tiledbsoma:TileDBGroup]{tiledbsoma::TileDBGroup}} -> \code{\link[tiledbsoma:SOMACollectionBase]{tiledbsoma::SOMACollectionBase}} -> \code{SOMAExperiment} } \section{Active bindings}{ \if{html}{\out{
    }} diff --git a/apis/r/man/SOMAMeasurement.Rd b/apis/r/man/SOMAMeasurement.Rd index c08147f97e..05db0b0f13 100644 --- a/apis/r/man/SOMAMeasurement.Rd +++ b/apis/r/man/SOMAMeasurement.Rd @@ -10,7 +10,7 @@ and is otherwise a specialized \code{\link{SOMACollection}} with pre-defined fie details). (lifecycle: experimental) } \section{Super classes}{ -\code{tiledbsoma::TileDBObject} -> \code{tiledbsoma::TileDBGroup} -> \code{tiledbsoma::SOMACollectionBase} -> \code{SOMAMeasurement} +\code{\link[tiledbsoma:TileDBObject]{tiledbsoma::TileDBObject}} -> \code{\link[tiledbsoma:TileDBGroup]{tiledbsoma::TileDBGroup}} -> \code{\link[tiledbsoma:SOMACollectionBase]{tiledbsoma::SOMACollectionBase}} -> \code{SOMAMeasurement} } \section{Active bindings}{ \if{html}{\out{
    }} diff --git a/apis/r/man/SOMASparseNDArray.Rd b/apis/r/man/SOMASparseNDArray.Rd index e2eb88af1e..74d3dfdc46 100644 --- a/apis/r/man/SOMASparseNDArray.Rd +++ b/apis/r/man/SOMASparseNDArray.Rd @@ -23,7 +23,7 @@ the object are overwritten and new index values are added. (lifecycle: experimen } } \section{Super classes}{ -\code{tiledbsoma::TileDBObject} -> \code{tiledbsoma::TileDBArray} -> \code{tiledbsoma::SOMAArrayBase} -> \code{SOMASparseNDArray} +\code{\link[tiledbsoma:TileDBObject]{tiledbsoma::TileDBObject}} -> \code{\link[tiledbsoma:TileDBArray]{tiledbsoma::TileDBArray}} -> \code{\link[tiledbsoma:SOMAArrayBase]{tiledbsoma::SOMAArrayBase}} -> \code{SOMASparseNDArray} } \section{Methods}{ \subsection{Public methods}{ @@ -118,14 +118,20 @@ read. List elements can be named when specifying a subset of dimensions.} \if{html}{\out{
    }} } \subsection{Returns}{ -An \code{\link[arrow:Table]{arrow::Table}}. +arrow::\link[arrow]{Table} or \link{TableReadIter} } } \if{html}{\out{
    }} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-SOMASparseNDArray-read_sparse_matrix_zero_based}{}}} \subsection{Method \code{read_sparse_matrix_zero_based()}}{ -Read as a sparse matrix (lifecycle: experimental) +Read as a sparse matrix (lifecycle: experimental). Returns +a \code{matrix}-like object accessed using zero-based indexes or an iterator +of those. The matrix-like objects supports only basic access operations +with zero-based indexes as well as \code{dim()},\code{nrow()}, and \code{ncol()} and +arithmetic operations as defined in \link[base]{groupGeneric}. +Use \code{as.one.based()} to get a fully-featured sparse matrix object supporting +more advanced operations (with one-based indexing). \subsection{Usage}{ \if{html}{\out{
    }}\preformatted{SOMASparseNDArray$read_sparse_matrix_zero_based( coords = NULL, @@ -159,11 +165,8 @@ read. List elements can be named when specifying a subset of dimensions.} \if{html}{\out{
    }} } \subsection{Returns}{ -A \code{matrix}-like object accessed using zero-based indexes. It supports -only basic access operations with zero-based indexes as well as \code{dim()}, -\code{nrow()}, and \code{ncol()}. Use \code{as.one.based()} to get a fully-featured -sparse matrix object supporting more advanced operations (with one-based -indexing). +\link{matrixZeroBasedView} of Matrix::\link[Matrix]{SparseMatrix} or +\link{SparseReadIter} } } \if{html}{\out{
    }} diff --git a/apis/r/man/SOMATileDBContext.Rd b/apis/r/man/SOMATileDBContext.Rd index dffe768b62..e5a606b138 100644 --- a/apis/r/man/SOMATileDBContext.Rd +++ b/apis/r/man/SOMATileDBContext.Rd @@ -7,7 +7,7 @@ Context map for TileDB-backed SOMA objects } \section{Super classes}{ -\code{tiledbsoma::MappingBase} -> \code{tiledbsoma::ScalarMap} -> \code{tiledbsoma::SOMAContextBase} -> \code{SOMATileDBContext} +\code{\link[tiledbsoma:MappingBase]{tiledbsoma::MappingBase}} -> \code{\link[tiledbsoma:ScalarMap]{tiledbsoma::ScalarMap}} -> \code{\link[tiledbsoma:SOMAContextBase]{tiledbsoma::SOMAContextBase}} -> \code{SOMATileDBContext} } \section{Methods}{ \subsection{Public methods}{ diff --git a/apis/r/man/ScalarMap.Rd b/apis/r/man/ScalarMap.Rd index 4089120f4e..055c5300b8 100644 --- a/apis/r/man/ScalarMap.Rd +++ b/apis/r/man/ScalarMap.Rd @@ -9,7 +9,7 @@ optionally be limited further to a specific atomic vector type (eg. \dQuote{\code{logical}}) } \section{Super class}{ -\code{tiledbsoma::MappingBase} -> \code{ScalarMap} +\code{\link[tiledbsoma:MappingBase]{tiledbsoma::MappingBase}} -> \code{ScalarMap} } \section{Active bindings}{ \if{html}{\out{
    }} diff --git a/apis/r/man/SparseReadIter.Rd b/apis/r/man/SparseReadIter.Rd index af6a971c13..5beaf8eac4 100644 --- a/apis/r/man/SparseReadIter.Rd +++ b/apis/r/man/SparseReadIter.Rd @@ -6,10 +6,10 @@ \description{ \code{SparseReadIter} is a class that allows for iteration over a reads on \link{SOMASparseNDArray}. -Iteration chunks are retrieved as 0-based Views of \link[Matrix]{SparseMatrix}. +Iteration chunks are retrieved as 0-based Views \link{matrixZeroBasedView} of Matrix::\link[Matrix]{SparseMatrix}. } \section{Super class}{ -\code{tiledbsoma::ReadIter} -> \code{SparseReadIter} +\code{\link[tiledbsoma:ReadIter]{tiledbsoma::ReadIter}} -> \code{SparseReadIter} } \section{Methods}{ \subsection{Public methods}{ @@ -71,13 +71,13 @@ which lets prior setting prevail, any other value is set as new logging level.} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-SparseReadIter-concat}{}}} \subsection{Method \code{concat()}}{ -Concatenate remainder of iterator +Concatenate remainder of iterator. \subsection{Usage}{ \if{html}{\out{
    }}\preformatted{SparseReadIter$concat()}\if{html}{\out{
    }} } \subsection{Returns}{ -\link{matrixZeroBasedView} +\link{matrixZeroBasedView} of Matrix::\link[Matrix]{SparseMatrix} } } \if{html}{\out{
    }} diff --git a/apis/r/man/TableReadIter.Rd b/apis/r/man/TableReadIter.Rd index 5562161a3c..2dfd2cad17 100644 --- a/apis/r/man/TableReadIter.Rd +++ b/apis/r/man/TableReadIter.Rd @@ -6,10 +6,10 @@ \description{ \code{TableReadIter} is a class that allows for iteration over a reads on \link{SOMASparseNDArray} and \link{SOMADataFrame}. -Iteration chunks are retrieved as Arrow Tables +Iteration chunks are retrieved as arrow::\link[arrow]{Table} } \section{Super class}{ -\code{tiledbsoma::ReadIter} -> \code{TableReadIter} +\code{\link[tiledbsoma:ReadIter]{tiledbsoma::ReadIter}} -> \code{TableReadIter} } \section{Methods}{ \subsection{Public methods}{ @@ -31,13 +31,13 @@ Iteration chunks are retrieved as Arrow Tables \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-TableReadIter-concat}{}}} \subsection{Method \code{concat()}}{ -Concatenate remainder of iterator +Concatenate remainder of iterator. \subsection{Usage}{ \if{html}{\out{
    }}\preformatted{TableReadIter$concat()}\if{html}{\out{
    }} } \subsection{Returns}{ -\link[Arrow]{Table} +arrow::\link[arrow]{Table} } } \if{html}{\out{
    }} diff --git a/apis/r/man/TileDBArray.Rd b/apis/r/man/TileDBArray.Rd index 4fb3200122..2abf98162b 100644 --- a/apis/r/man/TileDBArray.Rd +++ b/apis/r/man/TileDBArray.Rd @@ -8,7 +8,7 @@ Base class for representing an individual TileDB array. (lifecycle: experimental) } \section{Super class}{ -\code{tiledbsoma::TileDBObject} -> \code{TileDBArray} +\code{\link[tiledbsoma:TileDBObject]{tiledbsoma::TileDBObject}} -> \code{TileDBArray} } \section{Methods}{ \subsection{Public methods}{ diff --git a/apis/r/man/TileDBCreateOptions.Rd b/apis/r/man/TileDBCreateOptions.Rd index b6ec492716..85d2f91051 100644 --- a/apis/r/man/TileDBCreateOptions.Rd +++ b/apis/r/man/TileDBCreateOptions.Rd @@ -11,7 +11,7 @@ Provides strongly-typed access and default values for `platform_config` options Intended for internal use only. } \section{Super class}{ -\code{tiledbsoma::MappingBase} -> \code{TileDBCreateOptions} +\code{\link[tiledbsoma:MappingBase]{tiledbsoma::MappingBase}} -> \code{TileDBCreateOptions} } \section{Methods}{ \subsection{Public methods}{ diff --git a/apis/r/man/TileDBGroup.Rd b/apis/r/man/TileDBGroup.Rd index 2f2b310c73..65f4573f72 100644 --- a/apis/r/man/TileDBGroup.Rd +++ b/apis/r/man/TileDBGroup.Rd @@ -10,7 +10,7 @@ A \code{TileDBArray} or \code{TileDBGroup}. Base class for interacting with TileDB groups (lifecycle: experimental) } \section{Super class}{ -\code{tiledbsoma::TileDBObject} -> \code{TileDBGroup} +\code{\link[tiledbsoma:TileDBObject]{tiledbsoma::TileDBObject}} -> \code{TileDBGroup} } \section{Methods}{ \subsection{Public methods}{ From 4534226c3b6c1dc885928f1224bab0da7a4b6d60 Mon Sep 17 00:00:00 2001 From: Pablo E Garcia-Nieto Date: Thu, 11 May 2023 12:15:28 -0700 Subject: [PATCH 09/18] remove dup function --- apis/r/R/SOMAArrayBase.R | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/apis/r/R/SOMAArrayBase.R b/apis/r/R/SOMAArrayBase.R index 82007bce6b..1ee767029f 100644 --- a/apis/r/R/SOMAArrayBase.R +++ b/apis/r/R/SOMAArrayBase.R @@ -31,30 +31,6 @@ SOMAArrayBase <- R6::R6Class( meta[[SOMA_OBJECT_TYPE_METADATA_KEY]] <- self$class() meta[[SOMA_ENCODING_VERSION_METADATA_KEY]] <- SOMA_ENCODING_VERSION self$set_metadata(meta) - }, - - #' @description Converts a list of vectors corresponding to coords to a - #' format acceptable for sr_setup and soma_array_reader - convert_coords = function(coords) { - - ## ensure coords is a named list, use to select dim points - stopifnot("'coords' must be a list" = is.list(coords), - "'coords' must be a list of vectors or integer64" = - all(vapply_lgl(coords, is_vector_or_int64)), - "'coords' if unnamed must have length of dim names, else if named names must match dim names" = - (is.null(names(coords)) && length(coords) == length(self$dimnames())) || - (!is.null(names(coords)) && all(names(coords) %in% self$dimnames())) - ) - - ## if unnamed (and test for length has passed in previous statement) set names - if (is.null(names(coords))) names(coords) <- self$dimnames() - - ## convert integer to integer64 to match dimension type - coords <- lapply(coords, function(x) if (inherits(x, "integer")) bit64::as.integer64(x) else x) - - coords - } - ) ) From 2d22be8ba4001bc67361b0e47cd11208f655176f Mon Sep 17 00:00:00 2001 From: Pablo E Garcia-Nieto Date: Tue, 16 May 2023 16:32:47 -0700 Subject: [PATCH 10/18] update iterators --- apis/r/NAMESPACE | 1 - apis/r/R/SOMADenseNDArray.R | 118 ++++++++++---------------------- apis/r/R/SOMASparseNDArray.R | 14 ++-- apis/r/R/SparseReadIter.R | 13 +--- apis/r/R/datasets.R | 6 +- apis/r/man/DenseReadIter.Rd | 94 ------------------------- apis/r/man/SOMAArrayBase.Rd | 5 -- apis/r/man/SOMADenseNDArray.Rd | 8 --- apis/r/man/SOMASparseNDArray.Rd | 3 - apis/r/man/SparseReadIter.Rd | 3 +- 10 files changed, 55 insertions(+), 210 deletions(-) delete mode 100644 apis/r/man/DenseReadIter.Rd diff --git a/apis/r/NAMESPACE b/apis/r/NAMESPACE index 76adcc03a7..127df0b130 100644 --- a/apis/r/NAMESPACE +++ b/apis/r/NAMESPACE @@ -22,7 +22,6 @@ S3method(write_soma,TsparseMatrix) S3method(write_soma,data.frame) S3method(write_soma,matrix) export(ConfigList) -export(DenseReadIter) export(EphemeralCollection) export(EphemeralExperiment) export(EphemeralMeasurement) diff --git a/apis/r/R/SOMADenseNDArray.R b/apis/r/R/SOMADenseNDArray.R index 7bdc3e32b6..929d22d534 100644 --- a/apis/r/R/SOMADenseNDArray.R +++ b/apis/r/R/SOMADenseNDArray.R @@ -106,62 +106,29 @@ SOMADenseNDArray <- R6::R6Class( #' length equal to the number of values to read. If `NULL`, all values are #' read. List elements can be named when specifying a subset of dimensions. #' @template param-result-order - #' @param iterated Option boolean indicated whether data is read in call (when - #' `FALSE`, the default value) or in several iterated steps. #' @param log_level Optional logging level with default value of `"warn"`. #' @return An [`arrow::Table`]. read_arrow_table = function( coords = NULL, result_order = "auto", - iterated = FALSE, log_level = "warn" ) { uri <- self$uri result_order <- map_query_layout(match_query_layout(result_order)) - + if (!is.null(coords)) { - ## ensure coords is a named list, use to select dim points - stopifnot("'coords' must be a list" = is.list(coords), - "'coords' must be a list of vectors or integer64" = - all(vapply_lgl(coords, is_vector_or_int64)), - "'coords' if unnamed must have length of dim names, else if named names must match dim names" = - (is.null(names(coords)) && length(coords) == length(self$dimnames())) || - (!is.null(names(coords)) && all(names(coords) %in% self$dimnames())) - ) - - ## if unnamed (and test for length has passed in previous statement) set names - if (is.null(names(coords))) names(coords) <- self$dimnames() - - ## convert integer to integer64 to match dimension type - coords <- lapply(coords, function(x) if (inherits(x, "integer")) bit64::as.integer64(x) else x) + coords <- private$convert_coords(coords) } - private$dense_matrix <- FALSE - - if (isFALSE(iterated)) { - cfg <- as.character(tiledb::config(self$tiledbsoma_ctx$context())) - rl <- soma_array_reader(uri = uri, - dim_points = coords, # NULL dealt with by soma_array_reader() - result_order = result_order, - loglevel = log_level, # idem - config = cfg) - private$soma_reader_transform(rl) - } else { - ## should we error if this isn't null? - if (!is.null(self$soma_reader_pointer)) { - warning("pointer not null, skipping") - rl <- NULL - } else { - private$soma_reader_setup() - rl <- list() - while (!self$read_complete()) { - ## soma_reader_transform() applied inside read_next() - rl <- c(rl, self$read_next()) - } - } - invisible(rl) - } + cfg <- as.character(tiledb::config(self$tiledbsoma_ctx$context())) + rl <- soma_array_reader(uri = uri, + dim_points = coords, # NULL dealt with by soma_array_reader() + result_order = result_order, + loglevel = log_level, # idem + config = cfg) + + soma_array_to_arrow_table(rl) }, #' @description Read as a dense matrix (lifecycle: experimental) @@ -169,14 +136,11 @@ SOMADenseNDArray <- R6::R6Class( #' length equal to the number of values to read. If `NULL`, all values are #' read. List elements can be named when specifying a subset of dimensions. #' @template param-result-order - #' @param iterated Option boolean indicated whether data is read in call (when - #' `FALSE`, the default value) or in several iterated steps. #' @param log_level Optional logging level with default value of `"warn"`. #' @return A `matrix` object read_dense_matrix = function( coords = NULL, result_order = "ROW_MAJOR", - iterated = FALSE, log_level = "warn" ) { dims <- self$dimensions() @@ -186,23 +150,12 @@ SOMADenseNDArray <- R6::R6Class( all.equal(c("soma_dim_0", "soma_dim_1"), names(dims)), "Array must contain column 'soma_data'" = all.equal("soma_data", names(attr))) - if (isFALSE(iterated)) { - tbl <- self$read_arrow_table(coords = coords, result_order = result_order, log_level = log_level) - m <- matrix(as.numeric(tbl$GetColumnByName("soma_data")), - nrow = length(unique(as.numeric(tbl$GetColumnByName("soma_dim_0")))), - ncol = length(unique(as.numeric(tbl$GetColumnByName("soma_dim_1")))), - byrow = result_order == "ROW_MAJOR") - } else { - ## should we error if this isn't null? - if (!is.null(self$soma_reader_pointer)) { - warning("pointer not null, skipping") - } else { - private$soma_reader_setup() - private$dense_matrix <- TRUE - private$result_order <- result_order - } - invisible(NULL) - } + tbl <- self$read_arrow_table(coords = coords, result_order = result_order, log_level = log_level) + m <- matrix(as.numeric(tbl$GetColumnByName("soma_data")), + nrow = length(unique(as.numeric(tbl$GetColumnByName("soma_dim_0")))), + ncol = length(unique(as.numeric(tbl$GetColumnByName("soma_dim_1")))), + byrow = result_order == "ROW_MAJOR") + }, #' @description Write matrix data to the array. (lifecycle: experimental) @@ -239,22 +192,27 @@ SOMADenseNDArray <- R6::R6Class( ), private = list( - - ## refined from base class - soma_reader_transform = function(x) { - tbl <- arrow::as_arrow_table(arrow::RecordBatch$import_from_c(x[[1]], x[[2]])) - if (isTRUE(private$dense_matrix)) { - m <- matrix(as.numeric(tbl$GetColumnByName("soma_data")), - nrow = length(unique(as.numeric(tbl$GetColumnByName("soma_dim_0")))), - ncol = length(unique(as.numeric(tbl$GetColumnByName("soma_dim_1")))), - byrow = private$result_order == "ROW_MAJOR") - } else { - tbl - } - }, - - ## internal state variable for dense matrix vs arrow table return - dense_matrix = TRUE, - result_order = "ROW_MAJOR" + + #' @description Converts a list of vectors corresponding to coords to a + #' format acceptable for sr_setup and soma_array_reader + convert_coords = function(coords) { + + ## ensure coords is a named list, use to select dim points + stopifnot("'coords' must be a list" = is.list(coords), + "'coords' must be a list of vectors or integer64" = + all(vapply_lgl(coords, is_vector_or_int64)), + "'coords' if unnamed must have length of dim names, else if named names must match dim names" = + (is.null(names(coords)) && length(coords) == length(self$dimnames())) || + (!is.null(names(coords)) && all(names(coords) %in% self$dimnames())) + ) + + ## if unnamed (and test for length has passed in previous statement) set names + if (is.null(names(coords))) names(coords) <- self$dimnames() + + ## convert integer to integer64 to match dimension type + coords <- lapply(coords, function(x) if (inherits(x, "integer")) bit64::as.integer64(x) else x) + + coords + } ) ) diff --git a/apis/r/R/SOMASparseNDArray.R b/apis/r/R/SOMASparseNDArray.R index 260a093f11..acf642655f 100644 --- a/apis/r/R/SOMASparseNDArray.R +++ b/apis/r/R/SOMASparseNDArray.R @@ -113,6 +113,8 @@ SOMASparseNDArray <- R6::R6Class( log_level = "warn" ) { uri <- self$uri + + stopifnot("Array must have non-zero elements less than '.Machine$integer.max'" = self$nnz() < .Machine$integer.max) result_order <- map_query_layout(match_query_layout(result_order)) @@ -150,7 +152,6 @@ SOMASparseNDArray <- R6::R6Class( #' length equal to the number of values to read. If `NULL`, all values are #' read. List elements can be named when specifying a subset of dimensions. #' @template param-result-order - #' @param repr Optional one-character code for sparse matrix representation type #' @param iterated Option boolean indicated whether data is read in call (when #' `FALSE`, the default value) or in several iterated steps. #' @param log_level Optional logging level with default value of `"warn"`. @@ -159,16 +160,18 @@ SOMASparseNDArray <- R6::R6Class( read_sparse_matrix_zero_based = function( coords = NULL, result_order = "auto", - repr = c("C", "T", "R"), iterated = FALSE, log_level = "warn" ) { - repr <- match.arg(repr) dims <- self$dimensions() attr <- self$attributes() + shape <- self$shape() + stopifnot("Array must have two dimensions" = length(dims) == 2, "Array must contain columns 'soma_dim_0' and 'soma_dim_1'" = all.equal(c("soma_dim_0", "soma_dim_1"), names(dims)), + "Array must have non-zero elements less than '.Machine$integer.max'" = self$nnz() < .Machine$integer.max, + "Array dimensions must not exceed '.Machine$integer.max'" = any(shape < .Machine$integer.max), "Array must contain column 'soma_data'" = all.equal("soma_data", names(attr))) if (!is.null(coords)) { @@ -177,14 +180,15 @@ SOMASparseNDArray <- R6::R6Class( if (isFALSE(iterated)) { tbl <- self$read_arrow_table(coords = coords, result_order = result_order, log_level = log_level) - arrow_table_to_sparse(tbl, repr = repr) + arrow_table_to_sparse(tbl, repr = "T", dims_one_based = shape + 1) } else { cfg <- as.character(tiledb::config(self$tiledbsoma_ctx$context())) SparseReadIter$new(uri = self$uri, config = cfg, dim_points = coords, loglevel = log_level, - repr = repr) + repr = "T", + shape = shape) } }, diff --git a/apis/r/R/SparseReadIter.R b/apis/r/R/SparseReadIter.R index 742cfb5ea2..1c521fdc5a 100644 --- a/apis/r/R/SparseReadIter.R +++ b/apis/r/R/SparseReadIter.R @@ -29,22 +29,15 @@ SparseReadIter <- R6::R6Class( qc = NULL, dim_points = NULL, loglevel = "auto", - repr = c("C", "T", "R")) { + repr = c("C", "T", "R"), + shape) { # Initiate super class super$initialize (uri = uri, config = config, colnames = colnames, qc = qc, dim_points = dim_points, loglevel = loglevel) private$repr <- repr - - # Get max soma dims for indeces via tiledb - tiledb_array <- tiledb::tiledb_array(uri) - tiledb::tiledb_array_open(tiledb_array, type = "READ") - max_soma_dim_0 <- as.integer(max(tiledb::tiledb_array_get_non_empty_domain_from_index(tiledb_array, 1))) - max_soma_dim_1 <- as.integer(max(tiledb::tiledb_array_get_non_empty_domain_from_index(tiledb_array, 2))) - tiledb::tiledb_array_close(tiledb_array) - - private$dims_one_based <- c(max_soma_dim_0 + 1, max_soma_dim_1 + 1) + private$dims_one_based <- shape + 1 }, #' @description Concatenate remainder of iterator. diff --git a/apis/r/R/datasets.R b/apis/r/R/datasets.R index ec47823c3d..34d6c215b2 100644 --- a/apis/r/R/datasets.R +++ b/apis/r/R/datasets.R @@ -61,7 +61,7 @@ extract_dataset <- function(name, dir = tempdir()) { #' @return #' - `load_dataset()` returns a SOMA object. #' @export -load_dataset <- function(name, dir = tempdir()) { +load_dataset <- function(name, dir = tempdir(), tiledbsoma_ctx = NULL) { dataset_uri <- extract_dataset(name, dir) # Inspect the object's metadata @@ -75,8 +75,8 @@ load_dataset <- function(name, dir = tempdir()) { # Instantiate the proper SOMA object switch( object$get_metadata("soma_object_type"), - "SOMAExperiment" = SOMAExperimentOpen(dataset_uri), - "SOMADataFrame" = SOMADataFrameOpen(dataset_uri), + "SOMAExperiment" = SOMAExperimentOpen(dataset_uri, tiledbsoma_ctx = tiledbsoma_ctx), + "SOMADataFrame" = SOMADataFrameOpen(dataset_uri, tiledbsoma_ctx = tiledbsoma_ctx), stop("The dataset is an unsupported SOMA object", call. = FALSE) ) } diff --git a/apis/r/man/DenseReadIter.Rd b/apis/r/man/DenseReadIter.Rd deleted file mode 100644 index 33603c5a75..0000000000 --- a/apis/r/man/DenseReadIter.Rd +++ /dev/null @@ -1,94 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/DenseReadIter.R -\name{DenseReadIter} -\alias{DenseReadIter} -\title{DenseReadIter} -\description{ -\code{DenseReadIter} is a class that allows for iteration over -a reads on \link{SOMADenseNDArray}. -Iteration chunks are retrieved as 0-based Views of \link[base]{matrix}. -} -\section{Super class}{ -\code{\link[tiledbsoma:ReadIter]{tiledbsoma::ReadIter}} -> \code{DenseReadIter} -} -\section{Methods}{ -\subsection{Public methods}{ -\itemize{ -\item \href{#method-DenseReadIter-new}{\code{DenseReadIter$new()}} -\item \href{#method-DenseReadIter-concat}{\code{DenseReadIter$concat()}} -\item \href{#method-DenseReadIter-clone}{\code{DenseReadIter$clone()}} -} -} -\if{html}{\out{ -
    Inherited methods - -
    -}} -\if{html}{\out{
    }} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-DenseReadIter-new}{}}} -\subsection{Method \code{new()}}{ -Create (lifecycle: experimental) -\subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{DenseReadIter$new( - uri, - config, - dim_points = NULL, - result_order = NULL, - loglevel = "auto" -)}\if{html}{\out{
    }} -} - -\subsection{Arguments}{ -\if{html}{\out{
    }} -\describe{ -\item{\code{uri}}{Character value with URI path to a SOMADataFrame or SOMASparseNDArray} - -\item{\code{config}}{character vector containing TileDB config. -no query condition} - -\item{\code{dim_points}}{Optional named list with vector of data points to select on the given -dimension(s). Each dimension can be one entry in the list.} - -\item{\code{loglevel}}{Character value with the desired logging level, defaults to \sQuote{auto}} - -\item{\code{repr}}{Optional one-character code for sparse matrix representation type -which lets prior setting prevail, any other value is set as new logging level.} -} -\if{html}{\out{
    }} -} -} -\if{html}{\out{
    }} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-DenseReadIter-concat}{}}} -\subsection{Method \code{concat()}}{ -Concatenate remainder of iterator -\subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{DenseReadIter$concat()}\if{html}{\out{
    }} -} - -\subsection{Returns}{ -\link{matrixZeroBasedView} -} -} -\if{html}{\out{
    }} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-DenseReadIter-clone}{}}} -\subsection{Method \code{clone()}}{ -The objects of this class are cloneable with this method. -\subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{DenseReadIter$clone(deep = FALSE)}\if{html}{\out{
    }} -} - -\subsection{Arguments}{ -\if{html}{\out{
    }} -\describe{ -\item{\code{deep}}{Whether to make a deep clone.} -} -\if{html}{\out{
    }} -} -} -} diff --git a/apis/r/man/SOMAArrayBase.Rd b/apis/r/man/SOMAArrayBase.Rd index 319c044cc4..6c07bf6323 100644 --- a/apis/r/man/SOMAArrayBase.Rd +++ b/apis/r/man/SOMAArrayBase.Rd @@ -4,11 +4,6 @@ \alias{SOMAArrayBase} \title{SOMA Array Base Class} \description{ -SOMA Array Base Class - -SOMA Array Base Class -} -\details{ Adds SOMA-specific functionality to the \code{\link{TileDBArray}} class. (lifecycle: experimental) } \section{Super classes}{ diff --git a/apis/r/man/SOMADenseNDArray.Rd b/apis/r/man/SOMADenseNDArray.Rd index 8bb6ae0152..af4cb41583 100644 --- a/apis/r/man/SOMADenseNDArray.Rd +++ b/apis/r/man/SOMADenseNDArray.Rd @@ -94,7 +94,6 @@ Read as an 'arrow::Table' (lifecycle: experimental) \if{html}{\out{
    }}\preformatted{SOMADenseNDArray$read_arrow_table( coords = NULL, result_order = "auto", - iterated = FALSE, log_level = "warn" )}\if{html}{\out{
    }} } @@ -112,9 +111,6 @@ read. List elements can be named when specifying a subset of dimensions.} \item{\code{result_order}}{Optional order of read results. This can be one of either \verb{"ROW_MAJOR, }"COL_MAJOR"\verb{, or }"auto"` (default).} -\item{\code{iterated}}{Option boolean indicated whether data is read in call (when -\code{FALSE}, the default value) or in several iterated steps.} - \item{\code{log_level}}{Optional logging level with default value of \code{"warn"}.} } \if{html}{\out{
    }} @@ -132,7 +128,6 @@ Read as a dense matrix (lifecycle: experimental) \if{html}{\out{
    }}\preformatted{SOMADenseNDArray$read_dense_matrix( coords = NULL, result_order = "ROW_MAJOR", - iterated = FALSE, log_level = "warn" )}\if{html}{\out{
    }} } @@ -150,9 +145,6 @@ read. List elements can be named when specifying a subset of dimensions.} \item{\code{result_order}}{Optional order of read results. This can be one of either \verb{"ROW_MAJOR, }"COL_MAJOR"\verb{, or }"auto"` (default).} -\item{\code{iterated}}{Option boolean indicated whether data is read in call (when -\code{FALSE}, the default value) or in several iterated steps.} - \item{\code{log_level}}{Optional logging level with default value of \code{"warn"}.} } \if{html}{\out{
    }} diff --git a/apis/r/man/SOMASparseNDArray.Rd b/apis/r/man/SOMASparseNDArray.Rd index 74d3dfdc46..5c554c9c35 100644 --- a/apis/r/man/SOMASparseNDArray.Rd +++ b/apis/r/man/SOMASparseNDArray.Rd @@ -136,7 +136,6 @@ more advanced operations (with one-based indexing). \if{html}{\out{
    }}\preformatted{SOMASparseNDArray$read_sparse_matrix_zero_based( coords = NULL, result_order = "auto", - repr = c("C", "T", "R"), iterated = FALSE, log_level = "warn" )}\if{html}{\out{
    }} @@ -155,8 +154,6 @@ read. List elements can be named when specifying a subset of dimensions.} \item{\code{result_order}}{Optional order of read results. This can be one of either \verb{"ROW_MAJOR, }"COL_MAJOR"\verb{, or }"auto"` (default).} -\item{\code{repr}}{Optional one-character code for sparse matrix representation type} - \item{\code{iterated}}{Option boolean indicated whether data is read in call (when \code{FALSE}, the default value) or in several iterated steps.} diff --git a/apis/r/man/SparseReadIter.Rd b/apis/r/man/SparseReadIter.Rd index 5beaf8eac4..de29f70aab 100644 --- a/apis/r/man/SparseReadIter.Rd +++ b/apis/r/man/SparseReadIter.Rd @@ -40,7 +40,8 @@ Create (lifecycle: experimental) qc = NULL, dim_points = NULL, loglevel = "auto", - repr = c("C", "T", "R") + repr = c("C", "T", "R"), + shape )}\if{html}{\out{
    }} } From 7906eec5674677eaf901b1920f82fbdf2d981160 Mon Sep 17 00:00:00 2001 From: Pablo E Garcia-Nieto Date: Wed, 17 May 2023 16:44:29 -0700 Subject: [PATCH 11/18] Update tests --- apis/r/R/SOMAExperimentAxisQuery.R | 4 +- apis/r/R/SOMASparseNDArray.R | 19 ++- apis/r/man/example-datasets.Rd | 2 +- .../testthat/test-SOMAArrayReader-Iterated.R | 143 ++++++++---------- .../r/tests/testthat/test-SOMASparseNDArray.R | 16 +- .../testthat/test-utils-matrixZeroBasedView.R | 7 +- 6 files changed, 96 insertions(+), 95 deletions(-) diff --git a/apis/r/R/SOMAExperimentAxisQuery.R b/apis/r/R/SOMAExperimentAxisQuery.R index e2e8c33db4..adc5500710 100644 --- a/apis/r/R/SOMAExperimentAxisQuery.R +++ b/apis/r/R/SOMAExperimentAxisQuery.R @@ -590,7 +590,7 @@ SOMAExperimentAxisQuery <- R6::R6Class( dims = seq_len(as.integer(embed$shape()[2L])) - 1L ) embed_mat <- if (inherits(embed, 'SOMASparseNDArray')) { - as.matrix(embed$read_sparse_matrix_zero_based()[coords$cells, coords$dims]) + as.matrix(embed$read_sparse_matrix_zero_based(repr = "C")[coords$cells, coords$dims]) } else if (inherits(embed, 'SOMADenseNDArray')) { warning( paste( @@ -639,7 +639,7 @@ SOMAExperimentAxisQuery <- R6::R6Class( dims = seq_len(as.integer(loads$shape()[2L])) - 1L ) load_mat <- if (inherits(loads, 'SOMASparseNDArray')) { - as.matrix(loads$read_sparse_matrix_zero_based()[coords$features, coords$dims]) + as.matrix(loads$read_sparse_matrix_zero_based(repr = "C")[coords$features, coords$dims]) } else if (inherits(loads, 'SOMADenseNDArray')) { warning( paste( diff --git a/apis/r/R/SOMASparseNDArray.R b/apis/r/R/SOMASparseNDArray.R index acf642655f..8b9b41ee97 100644 --- a/apis/r/R/SOMASparseNDArray.R +++ b/apis/r/R/SOMASparseNDArray.R @@ -127,6 +127,7 @@ SOMASparseNDArray <- R6::R6Class( rl <- soma_array_reader(uri = self$uri, config = cfg, dim_points = coords, + result_order = result_order, loglevel = log_level ) @@ -160,6 +161,7 @@ SOMASparseNDArray <- R6::R6Class( read_sparse_matrix_zero_based = function( coords = NULL, result_order = "auto", + repr = "T", iterated = FALSE, log_level = "warn" ) { @@ -167,12 +169,22 @@ SOMASparseNDArray <- R6::R6Class( attr <- self$attributes() shape <- self$shape() + stopifnot("'repr' must be a sinlge character string" = length(repr) == 1 | mode(repr) == "character", + "'repr' can only be one of 'C', 'R', or 'T', for dgCMatrix, dgRMatrix, or dgTMatrix, respectively" = + repr == "C" | repr == "R" | repr == "T") + + if (repr %in% c("C", "R") & iterated) { + stop("When `repr` is 'C' (dgCMatrix) or 'R' (dgRMatrix), iteration mode is not possible") + } + + stopifnot("Array must have two dimensions" = length(dims) == 2, "Array must contain columns 'soma_dim_0' and 'soma_dim_1'" = all.equal(c("soma_dim_0", "soma_dim_1"), names(dims)), "Array must have non-zero elements less than '.Machine$integer.max'" = self$nnz() < .Machine$integer.max, "Array dimensions must not exceed '.Machine$integer.max'" = any(shape < .Machine$integer.max), - "Array must contain column 'soma_data'" = all.equal("soma_data", names(attr))) + "Array must contain column 'soma_data'" = all.equal("soma_data", names(attr)) + ) if (!is.null(coords)) { coords <- private$convert_coords(coords) @@ -180,14 +192,15 @@ SOMASparseNDArray <- R6::R6Class( if (isFALSE(iterated)) { tbl <- self$read_arrow_table(coords = coords, result_order = result_order, log_level = log_level) - arrow_table_to_sparse(tbl, repr = "T", dims_one_based = shape + 1) + arrow_table_to_sparse(tbl, repr = repr, dims_one_based = shape + 1) } else { + stopifnot(repr == "T") cfg <- as.character(tiledb::config(self$tiledbsoma_ctx$context())) SparseReadIter$new(uri = self$uri, config = cfg, dim_points = coords, loglevel = log_level, - repr = "T", + repr = repr, shape = shape) } }, diff --git a/apis/r/man/example-datasets.Rd b/apis/r/man/example-datasets.Rd index 683e381cf8..d41e23f89d 100644 --- a/apis/r/man/example-datasets.Rd +++ b/apis/r/man/example-datasets.Rd @@ -11,7 +11,7 @@ list_datasets() extract_dataset(name, dir = tempdir()) -load_dataset(name, dir = tempdir()) +load_dataset(name, dir = tempdir(), tiledbsoma_ctx = NULL) } \arguments{ \item{name}{The name of the dataset.} diff --git a/apis/r/tests/testthat/test-SOMAArrayReader-Iterated.R b/apis/r/tests/testthat/test-SOMAArrayReader-Iterated.R index e4ad04dce0..dcbcb78659 100644 --- a/apis/r/tests/testthat/test-SOMAArrayReader-Iterated.R +++ b/apis/r/tests/testthat/test-SOMAArrayReader-Iterated.R @@ -66,16 +66,17 @@ test_that("Iterated Interface from SOMAArrayReader", { expect_true(tiledbsoma:::sr_complete(sr)) }) + test_that("Iterated Interface from SOMA Classes", { skip_if_not_installed("pbmc3k.tiledb") # a Suggests: pre-package 3k PBMC data tdir <- tempfile() tgzfile <- system.file("raw-data", "soco-pbmc3k.tar.gz", package="pbmc3k.tiledb") untar(tarfile = tgzfile, exdir = tdir) - uri <- file.path(tdir, "soco", "pbmc3k_processed", "ms", "RNA", "X", "data") + uri <- file.path(tdir, "soco", "pbmc3k_processed", "ms", "raw", "X", "data") ## parameterize test - test_cases <- c("data.frame", "sparse", "dense") + test_cases <- c("data.frame", "sparse") for (tc in test_cases) { sdf <- switch(tc, @@ -83,21 +84,49 @@ test_that("Iterated Interface from SOMA Classes", { sparse = SOMASparseNDArray$new(uri, internal_use_only = "allowed_use"), dense = SOMADenseNDArray$new(uri, internal_use_only = "allowed_use")) expect_true(inherits(sdf, "SOMAArrayBase")) - - rl <- switch(tc, - data.frame = sdf$read(iterated = TRUE), - sparse = sdf$read_arrow_table(iterated = TRUE), - dense = sdf$read_arrow_table(iterated = TRUE)) - expect_true(is.list(rl)) - expect_true(sdf$read_complete()) - n <- length(rl) - expect_true(n > 0) - - dat <- do.call(rbind, rl) + + iterator <- switch(tc, + data.frame = sdf$read(iterated = TRUE), + sparse = sdf$read_arrow_table(iterated = TRUE), + dense = sdf$read_arrow_table(iterated = TRUE)) + expect_true(inherits(iterator, "ReadIter")) + expect_true(inherits(iterator, "TableReadIter")) + + # Test $concat() + expect_false(iterator$read_complete()) + dat <- iterator$concat() + expect_true(iterator$read_complete()) expect_true(inherits(dat, "Table")) expect_equal(dat$num_columns, 3) - expect_equal(dat$num_rows, 4848644) - + expect_equal(dat$num_rows, 2238732) + + rm(iterator) + + # Test $read_next() + iterator <- switch(tc, + data.frame = sdf$read(iterated = TRUE), + sparse = sdf$read_arrow_table(iterated = TRUE), + dense = sdf$read_arrow_table(iterated = TRUE)) + + expect_false(iterator$read_complete()) + for (i in 1:2) { + + expect_false(iterator$read_complete()) + dat_slice <- iterator$read_next() + expect_true(inherits(dat_slice, "Table")) + expect_equal(dat_slice$num_columns, 3) + + if (i < 2) { + expect_equal(dat_slice$num_rows, 2097152) + } else { + expect_equal(dat_slice$num_rows, 141580) + } + } + + expect_true(iterator$read_complete()) + expect_null(iterator$read_next()) + expect_warning(iterator$read_next()) + rm(sdf) } @@ -109,84 +138,38 @@ test_that("Iterated Interface from SOMA Sparse Matrix", { tdir <- tempfile() tgzfile <- system.file("raw-data", "soco-pbmc3k.tar.gz", package="pbmc3k.tiledb") untar(tarfile = tgzfile, exdir = tdir) - uri <- file.path(tdir, "soco", "pbmc3k_processed", "ms", "RNA", "X", "data") + uri <- file.path(tdir, "soco", "pbmc3k_processed", "ms", "raw", "X", "data") sdf <- SOMASparseNDArray$new(uri, internal_use_only = "allowed_use") expect_true(inherits(sdf, "SOMAArrayBase")) + + expect_error(sdf$read_sparse_matrix_zero_based(repr = "x")) + expect_error(sdf$read_sparse_matrix_zero_based(iterated = TRUE, repr = "C")) + expect_error(sdf$read_sparse_matrix_zero_based(iterated = TRUE, repr = "R")) - sdf$read_sparse_matrix_zero_based(iterated = TRUE) + iterator <- sdf$read_sparse_matrix_zero_based(iterated = TRUE) - nnzRows <- function(m) { sum(Matrix::rowSums(m != 0) > 0) } nnzTotal <- 0 rowsTotal <- 0 - for (i in 1:4) { - expect_false(sdf$read_complete()) - dat <- as.one.based(sdf$read_next()) + for (i in 1:2) { + expect_false(iterator$read_complete()) + dat <- as.one.based(iterator$read_next()) nnz <- Matrix::nnzero(dat) expect_gt(nnz, 0) nnzTotal <- nnzTotal + nnz - rowsTotal <- rowsTotal + nnzRows(dat) - # the shard dims always match the shape of the whole sparse matrix - expect_equal(dim(dat), as.integer(sdf$shape())) + # the shard dims always match the shape of the whole sparse matrix + 1 + # + 1 beacause the shape is 0-based and dim() here returns 1-based + expect_equal(dim(dat), as.integer(sdf$shape()) + 1) } - expect_true(sdf$read_complete()) - - # FIXME: TileDB-SOMA issue #1111 - # expect_equal(rowsTotal, nnzRows(sdf$read_sparse_matrix())) - # expect_equal(nnzTotal, Matrix::nnzero(sdf$read_sparse_matrix())) - # in fact however, the test array is dense 2638x1838 with all nonzero entries. - expect_equal(rowsTotal, 2638) - expect_equal(nnzTotal, 4848644) - expect_equal(nnzTotal, prod(as.integer(sdf$shape()))) + + expect_true(iterator$read_complete()) + expect_null(iterator$read_next()) + expect_warning(iterator$read_next()) + + expect_equal(nnzTotal, Matrix::nnzero(as.one.based(sdf$read_sparse_matrix_zero_based(iterated=T)$concat()))) + expect_equal(nnzTotal, 2238732) rm(sdf) }) -test_that("Iterated Interface from SOMA Dense Matrix", { - skip_if_not_installed("pbmc3k.tiledb") # a Suggests: pre-package 3k PBMC data - - tdir <- tempfile() - tgzfile <- system.file("raw-data", "soco-pbmc3k.tar.gz", package="pbmc3k.tiledb") - untar(tarfile = tgzfile, exdir = tdir) - uri <- file.path(tdir, "soco", "pbmc3k_processed", "ms", "RNA", "X", "data") - - sdf <- SOMADenseNDArray$new(uri, internal_use_only = "allowed_use") - expect_true(inherits(sdf, "SOMAArrayBase")) - - sdf$read_dense_matrix(iterated = TRUE) - - expect_false(sdf$read_complete()) - dat <- sdf$read_next() - d <- dim(dat) - expect_equal(d[2], 1838) - n <- d[1] - expect_true(n > 0) - - expect_false(sdf$read_complete()) - dat <- sdf$read_next() - d <- dim(dat) - expect_equal(d[2], 1838) - n <- n + d[1] - expect_true(n > 0) - - expect_false(sdf$read_complete()) - dat <- sdf$read_next() - d <- dim(dat) - expect_equal(d[2], 1838) - n <- n + d[1] - expect_true(n > 0) - - expect_false(sdf$read_complete()) - dat <- sdf$read_next() - d <- dim(dat) - expect_equal(d[2], 1838) - n <- n + d[1] - expect_true(n > 0) - - expect_equal(n, 2638) - expect_true(sdf$read_complete()) - - rm(sdf) - -}) diff --git a/apis/r/tests/testthat/test-SOMASparseNDArray.R b/apis/r/tests/testthat/test-SOMASparseNDArray.R index c41d3bf917..319aa29ead 100644 --- a/apis/r/tests/testthat/test-SOMASparseNDArray.R +++ b/apis/r/tests/testthat/test-SOMASparseNDArray.R @@ -83,9 +83,11 @@ test_that("SOMASparseNDArray read_sparse_matrix_zero_based", { mat2 <- ndarray$read_sparse_matrix_zero_based(repr="T") expect_true(inherits(mat2, "matrixZeroBasedView")) expect_s4_class(as.one.based(mat2), "sparseMatrix") - expect_equal(dim(mat2), c(10, 10)) - expect_equal(nrow(mat2), 10) - expect_equal(ncol(mat2), 10) + # 10 + 1 because 10 is the max 0-based dimension value + # and the following return 1-based values + expect_equal(dim(mat2), c(10 + 1, 10 + 1)) + expect_equal(nrow(mat2), 10 + 1) + expect_equal(ncol(mat2), 10 + 1) ## not sure why all.equal(mat, mat2) does not pass expect_true(all.equal(as.numeric(mat), as.numeric(mat2[0:8,0:8]))) expect_equal(sum(mat), sum(as.one.based(mat2))) @@ -95,9 +97,11 @@ test_that("SOMASparseNDArray read_sparse_matrix_zero_based", { mat2 <- ndarray$read_next() expect_true(inherits(mat2, "matrixZeroBasedView")) expect_s4_class(as.one.based(mat2), "sparseMatrix") - expect_equal(dim(mat2), c(10, 10)) - expect_equal(nrow(mat2), 10) - expect_equal(ncol(mat2), 10) + # 10 + 1 because 10 is the max 0-based dimension value + # and the following return 1-based values + expect_equal(dim(mat2), c(10 + 1, 10 + 1)) + expect_equal(nrow(mat2), 10 + 1) + expect_equal(ncol(mat2), 10 + 1) expect_true(all.equal(as.numeric(mat), as.numeric(mat2[0:8,0:8]))) expect_equal(sum(mat), sum(as.one.based(mat2))) }) diff --git a/apis/r/tests/testthat/test-utils-matrixZeroBasedView.R b/apis/r/tests/testthat/test-utils-matrixZeroBasedView.R index 3b9fe9f845..96a8da2971 100644 --- a/apis/r/tests/testthat/test-utils-matrixZeroBasedView.R +++ b/apis/r/tests/testthat/test-utils-matrixZeroBasedView.R @@ -52,8 +52,9 @@ test_that("matrixZeroBasedView", { expect_error(mat[1, ] <- c(0, 99, 0), rdo) expect_error(mat[, 1] <- c(0, 99, 0), rdo) - # reject arithmetic - expect_error(mat + 1) - expect_error(mat + mat) + # test arithmetic + expect_equal (as.one.based(mat + 1), mat1 + 1) + expect_equal (as.one.based(mat + mat), mat1 + mat1) + expect_equal (as.one.based(mat == 41), mat1 == 41) } }) From 5d1ca4d31d311e3127c6eb2ea31d57bde95c5e37 Mon Sep 17 00:00:00 2001 From: Pablo E Garcia-Nieto Date: Wed, 17 May 2023 22:05:51 -0700 Subject: [PATCH 12/18] Fix bugs --- apis/r/R/SOMASparseNDArray.R | 3 +- apis/r/R/SparseReadIter.R | 7 +++-- apis/r/R/utils-readerTransformers.R | 31 ++++++++++++------- apis/r/man/arrow_table_to_dense.Rd | 21 +++++++++++++ apis/r/man/arrow_table_to_sparse.Rd | 23 ++++++++++++++ apis/r/man/soma_array_to_arrow_table.Rd | 18 +++++++++++ .../testthat/test-SOMAArrayReader-Iterated.R | 7 +++-- .../r/tests/testthat/test-SOMASparseNDArray.R | 20 +++++------- 8 files changed, 99 insertions(+), 31 deletions(-) create mode 100644 apis/r/man/arrow_table_to_dense.Rd create mode 100644 apis/r/man/arrow_table_to_sparse.Rd create mode 100644 apis/r/man/soma_array_to_arrow_table.Rd diff --git a/apis/r/R/SOMASparseNDArray.R b/apis/r/R/SOMASparseNDArray.R index 0c8b98ce7b..d4d63cb5be 100644 --- a/apis/r/R/SOMASparseNDArray.R +++ b/apis/r/R/SOMASparseNDArray.R @@ -164,6 +164,7 @@ SOMASparseNDArray <- R6::R6Class( #' length equal to the number of values to read. If `NULL`, all values are #' read. List elements can be named when specifying a subset of dimensions. #' @template param-result-order + #' @param repr Optional one-character code for sparse matrix representation type #' @param iterated Option boolean indicated whether data is read in call (when #' `FALSE`, the default value) or in several iterated steps. #' @param log_level Optional logging level with default value of `"warn"`. @@ -204,7 +205,7 @@ SOMASparseNDArray <- R6::R6Class( if (isFALSE(iterated)) { tbl <- self$read_arrow_table(coords = coords, result_order = result_order, log_level = log_level) - arrow_table_to_sparse(tbl, repr = repr, dims_one_based = shape + 1) + arrow_table_to_sparse(tbl, repr = repr, shape = shape) } else { stopifnot(repr == "T") cfg <- as.character(tiledb::config(self$tiledbsoma_ctx$context())) diff --git a/apis/r/R/SparseReadIter.R b/apis/r/R/SparseReadIter.R index 1c521fdc5a..139fdf44a2 100644 --- a/apis/r/R/SparseReadIter.R +++ b/apis/r/R/SparseReadIter.R @@ -23,6 +23,7 @@ SparseReadIter <- R6::R6Class( #' @param loglevel Character value with the desired logging level, defaults to \sQuote{auto} #' @param repr Optional one-character code for sparse matrix representation type #' which lets prior setting prevail, any other value is set as new logging level. + #' @param shape Numerical vector with two elements. initialize = function(uri, config, colnames = NULL, @@ -37,7 +38,7 @@ SparseReadIter <- R6::R6Class( dim_points = dim_points, loglevel = loglevel) private$repr <- repr - private$dims_one_based <- shape + 1 + private$shape <- shape }, #' @description Concatenate remainder of iterator. @@ -62,11 +63,11 @@ SparseReadIter <- R6::R6Class( private = list( repr=NULL, - dims_one_based=NULL, + shape=NULL, ## refined from base class soma_reader_transform = function(x) { - arrow_table_to_sparse(soma_array_to_arrow_table(x), repr = private$repr, dims_one_based = private$dims_one_based) + arrow_table_to_sparse(soma_array_to_arrow_table(x), repr = private$repr, shape = private$shape) } ) diff --git a/apis/r/R/utils-readerTransformers.R b/apis/r/R/utils-readerTransformers.R index ca7b020fb2..308bc30851 100644 --- a/apis/r/R/utils-readerTransformers.R +++ b/apis/r/R/utils-readerTransformers.R @@ -1,18 +1,23 @@ +#' Transformer function: SOMAArray to Arrow table +#' #' @description Converts the results of a \link{soma_array_reader} or \link{sr_next} to #' an arrow::\link[arrow]{Table} +#' @param x A List object with two pointers to Arrow array data and schema #' @return arrow::\link[arrow]{Table} soma_array_to_arrow_table <- function(x) { arrow::as_arrow_table(arrow::RecordBatch$import_from_c(x[[1]], x[[2]])) } +#' Transformer function: Arrow table to Matrix::sparseMatrix +#' #' @description Converts a \link[Arrow]{Table} of sparse format (columns: "soma_dim_0", #' "soma_dim_1", "soma_data") to a \link{matrixZeroBasedView} #' @param tbl \link[Arrow]{Table} with columns "soma_dim_0", "soma_dim_1", and "soma_datah" #' @param repr Optional one-character code for sparse matrix representation type -#' @param dims_one_based Numerical vectors with two elements, one for each dimension. If -#' \code{NULL}, then the following is used \code{c(max(tbl["soma_dim_0"]), max(tbl["soma_dim_1"]))} +#' @param shape Numerical vector with two elements, one for each dimension. If +#' \code{NULL}, then the following is used \code{1 + c(max(tbl["soma_dim_0"]), max(tbl["soma_dim_1"]))} #' @return \link{matrixZeroBasedView} of Matrix::\link[Matrix]{SparseMatrix} -arrow_table_to_sparse <- function(tbl, repr = c("C", "T", "R"), dims_one_based = NULL) { +arrow_table_to_sparse <- function(tbl, repr = c("C", "T", "R"), shape = NULL) { # To instantiate the one-based Matrix::sparseMatrix, we need to add 1 to the # zero-based soma_dim_0 and soma_dim_1 (done by arrow_table_to_sparse). But, because these dimensions are @@ -27,27 +32,29 @@ arrow_table_to_sparse <- function(tbl, repr = c("C", "T", "R"), dims_one_based = soma_data <- as.numeric(tbl$GetColumnByName("soma_data")) - if (is.null(dims_one_based)) { - dims_one_based <- c(max(soma_dim_0_one_based), max(soma_dim_1_one_based)) + if (is.null(shape)) { + shape <- c(max(soma_dim_0_one_based), max(soma_dim_1_one_based)) } - if(any(dims_one_based > .Machine$integer.max)) { - error("The dimensions of the array are larger than supported by Matrix::sparseMatrix") + if(any(shape > .Machine$integer.max)) { + error("The shape of the array is larger than supported by Matrix::sparseMatrix") } mat <- Matrix::sparseMatrix(i = soma_dim_0_one_based, j = soma_dim_1_one_based, x = soma_data, - dims = dims_one_based, repr = repr) + dims = shape, repr = repr) matrixZeroBasedView(mat) } + +#' Transformer function: Arrow table to matrix +#' #' @description Converts a \link[Arrow]{Table} of sparse format (columns: "soma_dim_0", -#' "soma_dim_1", "soma_data") to a \link{matrixZeroBasedView} +#' "soma_dim_1", "soma_data") to a \link{matrixZeroBasedView} of a \link{matrix}. #' @param tbl \link[Arrow]{Table} with columns "soma_dim_0", "soma_dim_1", and "soma_datah" -#' @param repr Optional one-character code for sparse matrix representation type -#' @param dims_one_based Numerical vectors with two elements, one for each dimension. If -#' \code{NULL}, then the following is used \code{c(max(tbl["soma_dim_0"]), max(tbl["soma_dim_1"]))} +#' @param byrow Logical, TRUE if "soma_data" is ordered by row, this argument is directly passed +#' to the argument \code{byrow} of \link{matrix} #' @return \link{matrixZeroBasedView} of \link[base]{matrix} arrow_table_to_dense <- function(tbl, byrow) { diff --git a/apis/r/man/arrow_table_to_dense.Rd b/apis/r/man/arrow_table_to_dense.Rd new file mode 100644 index 0000000000..e1b95a04a0 --- /dev/null +++ b/apis/r/man/arrow_table_to_dense.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils-readerTransformers.R +\name{arrow_table_to_dense} +\alias{arrow_table_to_dense} +\title{Transformer function: Arrow table to matrix} +\usage{ +arrow_table_to_dense(tbl, byrow) +} +\arguments{ +\item{tbl}{\link[Arrow]{Table} with columns "soma_dim_0", "soma_dim_1", and "soma_datah"} + +\item{byrow}{Logical, TRUE if "soma_data" is ordered by row, this argument is directly passed +to the argument \code{byrow} of \link{matrix}} +} +\value{ +\link{matrixZeroBasedView} of \link[base]{matrix} +} +\description{ +Converts a \link[Arrow]{Table} of sparse format (columns: "soma_dim_0", +"soma_dim_1", "soma_data") to a \link{matrixZeroBasedView} of a \link{matrix}. +} diff --git a/apis/r/man/arrow_table_to_sparse.Rd b/apis/r/man/arrow_table_to_sparse.Rd new file mode 100644 index 0000000000..e14ffa87a4 --- /dev/null +++ b/apis/r/man/arrow_table_to_sparse.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils-readerTransformers.R +\name{arrow_table_to_sparse} +\alias{arrow_table_to_sparse} +\title{Transformer function: Arrow table to Matrix::sparseMatrix} +\usage{ +arrow_table_to_sparse(tbl, repr = c("C", "T", "R"), shape = NULL) +} +\arguments{ +\item{tbl}{\link[Arrow]{Table} with columns "soma_dim_0", "soma_dim_1", and "soma_datah"} + +\item{repr}{Optional one-character code for sparse matrix representation type} + +\item{shape}{Numerical vector with two elements, one for each dimension. If +\code{NULL}, then the following is used \code{1 + c(max(tbl["soma_dim_0"]), max(tbl["soma_dim_1"]))}} +} +\value{ +\link{matrixZeroBasedView} of Matrix::\link[Matrix]{SparseMatrix} +} +\description{ +Converts a \link[Arrow]{Table} of sparse format (columns: "soma_dim_0", +"soma_dim_1", "soma_data") to a \link{matrixZeroBasedView} +} diff --git a/apis/r/man/soma_array_to_arrow_table.Rd b/apis/r/man/soma_array_to_arrow_table.Rd new file mode 100644 index 0000000000..901e82bf5f --- /dev/null +++ b/apis/r/man/soma_array_to_arrow_table.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils-readerTransformers.R +\name{soma_array_to_arrow_table} +\alias{soma_array_to_arrow_table} +\title{Transformer function: SOMAArray to Arrow table} +\usage{ +soma_array_to_arrow_table(x) +} +\arguments{ +\item{x}{A List object with two pointers to Arrow array data and schema} +} +\value{ +arrow::\link[arrow]{Table} +} +\description{ +Converts the results of a \link{soma_array_reader} or \link{sr_next} to +an arrow::\link[arrow]{Table} +} diff --git a/apis/r/tests/testthat/test-SOMAArrayReader-Iterated.R b/apis/r/tests/testthat/test-SOMAArrayReader-Iterated.R index f5ca726a43..93ce923e8d 100644 --- a/apis/r/tests/testthat/test-SOMAArrayReader-Iterated.R +++ b/apis/r/tests/testthat/test-SOMAArrayReader-Iterated.R @@ -84,6 +84,7 @@ test_that("Iterated Interface from SOMA Classes", { sparse = SOMASparseNDArray$new(uri, internal_use_only = "allowed_use"), dense = SOMADenseNDArray$new(uri, internal_use_only = "allowed_use")) expect_true(inherits(sdf, "SOMAArrayBase")) + sdf$open("READ", internal_use_only = "allowed_use") iterator <- switch(tc, data.frame = sdf$read(iterated = TRUE), @@ -142,6 +143,7 @@ test_that("Iterated Interface from SOMA Sparse Matrix", { sdf <- SOMASparseNDArray$new(uri, internal_use_only = "allowed_use") expect_true(inherits(sdf, "SOMAArrayBase")) + sdf$open("READ", internal_use_only = "allowed_use") expect_error(sdf$read_sparse_matrix_zero_based(repr = "x")) expect_error(sdf$read_sparse_matrix_zero_based(iterated = TRUE, repr = "C")) @@ -157,9 +159,8 @@ test_that("Iterated Interface from SOMA Sparse Matrix", { nnz <- Matrix::nnzero(dat) expect_gt(nnz, 0) nnzTotal <- nnzTotal + nnz - # the shard dims always match the shape of the whole sparse matrix + 1 - # + 1 beacause the shape is 0-based and dim() here returns 1-based - expect_equal(dim(dat), as.integer(sdf$shape()) + 1) + # the shard dims always match the shape of the whole sparse matrix + expect_equal(dim(dat), as.integer(sdf$shape())) } expect_true(iterator$read_complete()) diff --git a/apis/r/tests/testthat/test-SOMASparseNDArray.R b/apis/r/tests/testthat/test-SOMASparseNDArray.R index c2882955ab..fc05032e7c 100644 --- a/apis/r/tests/testthat/test-SOMASparseNDArray.R +++ b/apis/r/tests/testthat/test-SOMASparseNDArray.R @@ -88,11 +88,9 @@ test_that("SOMASparseNDArray read_sparse_matrix_zero_based", { mat2 <- ndarray$read_sparse_matrix_zero_based(repr="T") expect_true(inherits(mat2, "matrixZeroBasedView")) expect_s4_class(as.one.based(mat2), "sparseMatrix") - # 10 + 1 because 10 is the max 0-based dimension value - # and the following return 1-based values - expect_equal(dim(mat2), c(10 + 1, 10 + 1)) - expect_equal(nrow(mat2), 10 + 1) - expect_equal(ncol(mat2), 10 + 1) + expect_equal(dim(mat2), c(10, 10)) + expect_equal(nrow(mat2), 10) + expect_equal(ncol(mat2), 10) ## not sure why all.equal(mat, mat2) does not pass expect_true(all.equal(as.numeric(mat), as.numeric(mat2[0:8,0:8]))) expect_equal(sum(mat), sum(as.one.based(mat2))) @@ -100,15 +98,13 @@ test_that("SOMASparseNDArray read_sparse_matrix_zero_based", { ndarray <- SOMASparseNDArrayOpen(uri) # repeat with iterated reader - ndarray$read_sparse_matrix_zero_based(repr="T", iterated=TRUE) - mat2 <- ndarray$read_next() + iterator <- ndarray$read_sparse_matrix_zero_based(repr="T", iterated=TRUE) + mat2 <- iterator$read_next() expect_true(inherits(mat2, "matrixZeroBasedView")) expect_s4_class(as.one.based(mat2), "sparseMatrix") - # 10 + 1 because 10 is the max 0-based dimension value - # and the following return 1-based values - expect_equal(dim(mat2), c(10 + 1, 10 + 1)) - expect_equal(nrow(mat2), 10 + 1) - expect_equal(ncol(mat2), 10 + 1) + expect_equal(dim(mat2), c(10, 10)) + expect_equal(nrow(mat2), 10) + expect_equal(ncol(mat2), 10) expect_true(all.equal(as.numeric(mat), as.numeric(mat2[0:8,0:8]))) expect_equal(sum(mat), sum(as.one.based(mat2))) ndarray$close() From 7017d2790efdcf41d918259ca2c5f324a6a0dd41 Mon Sep 17 00:00:00 2001 From: Pablo E Garcia-Nieto Date: Thu, 18 May 2023 09:56:54 -0700 Subject: [PATCH 13/18] Update docs --- apis/r/man/SOMACollectionBase.Rd | 2 +- apis/r/man/SOMASparseNDArray.Rd | 4 +++- apis/r/man/SparseReadIter.Rd | 2 ++ apis/r/man/TileDBArray.Rd | 8 ++++++++ apis/r/man/TileDBGroup.Rd | 4 ++-- apis/r/man/write_soma.Rd | 2 +- apis/r/man/write_soma.Seurat.Rd | 6 +++--- apis/r/man/write_soma_objects.Rd | 2 +- apis/r/man/write_soma_seurat_sub.Rd | 6 +++--- 9 files changed, 24 insertions(+), 12 deletions(-) diff --git a/apis/r/man/SOMACollectionBase.Rd b/apis/r/man/SOMACollectionBase.Rd index 0d02b58197..4710ffd8fa 100644 --- a/apis/r/man/SOMACollectionBase.Rd +++ b/apis/r/man/SOMACollectionBase.Rd @@ -132,7 +132,7 @@ default), the object's URI is assumed to be relative unless it is a \subsection{Method \code{get()}}{ Retrieve a SOMA object by name. (lifecycle: experimental) \subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{SOMACollectionBase$get(name, mode = "READ")}\if{html}{\out{
    }} +\if{html}{\out{
    }}\preformatted{SOMACollectionBase$get(name)}\if{html}{\out{
    }} } \subsection{Arguments}{ diff --git a/apis/r/man/SOMASparseNDArray.Rd b/apis/r/man/SOMASparseNDArray.Rd index c4ab945739..9d20e03dfa 100644 --- a/apis/r/man/SOMASparseNDArray.Rd +++ b/apis/r/man/SOMASparseNDArray.Rd @@ -145,11 +145,11 @@ with zero-based indexes as well as \code{dim()},\code{nrow()}, and \code{ncol()} arithmetic operations as defined in \link[base]{groupGeneric}. Use \code{as.one.based()} to get a fully-featured sparse matrix object supporting more advanced operations (with one-based indexing). - \subsection{Usage}{ \if{html}{\out{
    }}\preformatted{SOMASparseNDArray$read_sparse_matrix_zero_based( coords = NULL, result_order = "auto", + repr = "T", iterated = FALSE, log_level = "warn" )}\if{html}{\out{
    }} @@ -168,6 +168,8 @@ read. List elements can be named when specifying a subset of dimensions.} \item{\code{result_order}}{Optional order of read results. This can be one of either \verb{"ROW_MAJOR, }"COL_MAJOR"\verb{, or }"auto"` (default).} +\item{\code{repr}}{Optional one-character code for sparse matrix representation type} + \item{\code{iterated}}{Option boolean indicated whether data is read in call (when \code{FALSE}, the default value) or in several iterated steps.} diff --git a/apis/r/man/SparseReadIter.Rd b/apis/r/man/SparseReadIter.Rd index de29f70aab..1c62d35fa1 100644 --- a/apis/r/man/SparseReadIter.Rd +++ b/apis/r/man/SparseReadIter.Rd @@ -64,6 +64,8 @@ dimension(s). Each dimension can be one entry in the list.} \item{\code{repr}}{Optional one-character code for sparse matrix representation type which lets prior setting prevail, any other value is set as new logging level.} + +\item{\code{shape}}{Numerical vector with two elements.} } \if{html}{\out{
    }} } diff --git a/apis/r/man/TileDBArray.Rd b/apis/r/man/TileDBArray.Rd index a3a5cefaa2..c8e6f51482 100644 --- a/apis/r/man/TileDBArray.Rd +++ b/apis/r/man/TileDBArray.Rd @@ -10,6 +10,14 @@ Base class for representing an individual TileDB array. \section{Super class}{ \code{\link[tiledbsoma:TileDBObject]{tiledbsoma::TileDBObject}} -> \code{TileDBArray} } +\section{Active bindings}{ +\if{html}{\out{
    }} +\describe{ +\item{\code{object}}{Access the underlying TileB object directly (either a +\code{\link[tiledb:tiledb_array]{tiledb::tiledb_array}} or \code{\link[tiledb:tiledb_group]{tiledb::tiledb_group}}).} +} +\if{html}{\out{
    }} +} \section{Methods}{ \subsection{Public methods}{ \itemize{ diff --git a/apis/r/man/TileDBGroup.Rd b/apis/r/man/TileDBGroup.Rd index 467a9ebc55..a0c681bd13 100644 --- a/apis/r/man/TileDBGroup.Rd +++ b/apis/r/man/TileDBGroup.Rd @@ -136,9 +136,9 @@ default), the object's URI is assumed to be relative unless it is a \if{latex}{\out{\hypertarget{method-TileDBGroup-get}{}}} \subsection{Method \code{get()}}{ Retrieve a group member by name. If the member isn't already -open, it is opened for read. (lifecycle: experimental) +open, it is opened in the same mode as the parent. (lifecycle: experimental) \subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{TileDBGroup$get(name, mode = "READ")}\if{html}{\out{
    }} +\if{html}{\out{
    }}\preformatted{TileDBGroup$get(name)}\if{html}{\out{
    }} } \subsection{Arguments}{ diff --git a/apis/r/man/write_soma.Rd b/apis/r/man/write_soma.Rd index e290ed9436..50cff90980 100644 --- a/apis/r/man/write_soma.Rd +++ b/apis/r/man/write_soma.Rd @@ -20,7 +20,7 @@ configuration}} } \value{ The URI to the resulting \code{\link{SOMAExperiment}} generated from -the data contained in \code{x} +the data contained in \code{x}, returned opened for write } \description{ Convert \R objects to their appropriate SOMA counterpart diff --git a/apis/r/man/write_soma.Seurat.Rd b/apis/r/man/write_soma.Seurat.Rd index 24dcf861d5..1709808aeb 100644 --- a/apis/r/man/write_soma.Seurat.Rd +++ b/apis/r/man/write_soma.Seurat.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/write_seurat.R \name{write_soma.Seurat} \alias{write_soma.Seurat} -\title{Write a \code{\link[SeuratObject]{Seurat}} object to a SOMA} +\title{Write a \code{\link[SeuratObject]{Seurat}} object to a SOMA, returned opened for write} \usage{ \method{write_soma}{Seurat}(x, uri, ..., platform_config = NULL, tiledbsoma_ctx = NULL) } @@ -20,10 +20,10 @@ configuration}} } \value{ The URI to the resulting \code{\link{SOMAExperiment}} generated from -the data contained in \code{x} +the data contained in \code{x}, returned opened for write } \description{ -Write a \code{\link[SeuratObject]{Seurat}} object to a SOMA +Write a \code{\link[SeuratObject]{Seurat}} object to a SOMA, returned opened for write } \section{Writing Cell-Level Meta Data}{ diff --git a/apis/r/man/write_soma_objects.Rd b/apis/r/man/write_soma_objects.Rd index e7b50e9b2f..934d1d14d3 100644 --- a/apis/r/man/write_soma_objects.Rd +++ b/apis/r/man/write_soma_objects.Rd @@ -96,7 +96,7 @@ determine arrow type with \code{\link[arrow:infer_type]{arrow::infer_type}()}} } \value{ The resulting SOMA \link[tiledbsoma:SOMASparseNDArray]{array} or -\link[tiledbsoma:SOMADataFrame]{data frame} +\link[tiledbsoma:SOMADataFrame]{data frame}, returned opened for write } \description{ Various helpers to write R objects to SOMA diff --git a/apis/r/man/write_soma_seurat_sub.Rd b/apis/r/man/write_soma_seurat_sub.Rd index ad3c993812..a4eac60ee0 100644 --- a/apis/r/man/write_soma_seurat_sub.Rd +++ b/apis/r/man/write_soma_seurat_sub.Rd @@ -5,7 +5,7 @@ \alias{write_soma.Assay} \alias{write_soma.DimReduc} \alias{write_soma.Graph} -\title{Convert a \pkg{Seurat} Sub-Object to a SOMA Object} +\title{Convert a \pkg{Seurat} Sub-Object to a SOMA Object, returned opened for write} \usage{ \method{write_soma}{Assay}( x, @@ -70,10 +70,10 @@ relative or aboslute} } \value{ \code{Assay} method: a \code{\link{SOMAMeasurement}} with the -data from \code{x} +data from \code{x}, returned opened for write \code{DimReduc} and \code{Graph} methods: invisibly returns -\code{soma_parent} with the values of \code{x} added to it +\code{soma_parent}, opened for write, with the values of \code{x} added to it } \description{ Various helpers to write \pkg{Seurat} sub-objects to SOMA objects. From bc5218f8ef77dda927edc9a4bd299b33ec59f1e8 Mon Sep 17 00:00:00 2001 From: Pablo E Garcia-Nieto Date: Thu, 25 May 2023 01:40:32 -0700 Subject: [PATCH 14/18] Refractor to follow spec; update tests --- apis/r/R/ReadIter.R | 28 +----- apis/r/R/SOMADataFrame.R | 29 ++---- apis/r/R/SOMAExperimentAxisQuery.R | 27 +++--- apis/r/R/SOMASparseNDArray.R | 96 +++---------------- apis/r/R/SOMASparseNDArrayRead.R | 44 +++++++++ apis/r/R/SparseReadIter.R | 36 ++----- apis/r/tests/testthat/test-Factory.R | 18 ++-- .../testthat/test-SOMAArrayReader-Basics.R | 35 ------- .../testthat/test-SOMAArrayReader-Iterated.R | 22 ++--- apis/r/tests/testthat/test-SOMADataFrame.R | 20 ++-- .../testthat/test-SOMAExperiment-query.R | 81 ++++++++-------- .../r/tests/testthat/test-SOMASparseNDArray.R | 63 ++++++------ .../test_dataframe_write_python_read_r.py | 2 +- .../test_sparsendarray_write_python_read_r.py | 4 +- 14 files changed, 198 insertions(+), 307 deletions(-) create mode 100644 apis/r/R/SOMASparseNDArrayRead.R delete mode 100644 apis/r/tests/testthat/test-SOMAArrayReader-Basics.R diff --git a/apis/r/R/ReadIter.R b/apis/r/R/ReadIter.R index c412889922..5cd60af654 100644 --- a/apis/r/R/ReadIter.R +++ b/apis/r/R/ReadIter.R @@ -8,31 +8,9 @@ ReadIter <- R6::R6Class( public = list( #' @description Create (lifecycle: experimental) - #' @param uri Character value with URI path to a SOMADataFrame or SOMASparseNDArray - #' @param config character vector containing TileDB config. - #' @param colnames Optional vector of character value with the name of the columns to retrieve - #' @param qc Optional external Pointer object to TileDB Query Condition, defaults to \sQuote{NULL} i.e. - #' no query condition - #' @param dim_points Optional named list with vector of data points to select on the given - #' dimension(s). Each dimension can be one entry in the list. - #' @param loglevel Character value with the desired logging level, defaults to \sQuote{auto} - #' which lets prior setting prevail, any other value is set as new logging level. - initialize = function(uri, - config, - colnames = NULL, - qc = NULL, - dim_points = NULL, - loglevel = "auto") { - - # Instantiate soma_reader_pointer with a soma_array_reader object - private$soma_reader_pointer <- sr_setup( - uri = uri, - config = config, - colnames = colnames, - qc = qc, - dim_points = dim_points, - loglevel = loglevel - ) + #' @param sr + initialize = function(sr) { + private$soma_reader_pointer <- sr }, #' @description Check if iterated read is complete or not. (lifecycle: experimental) diff --git a/apis/r/R/SOMADataFrame.R b/apis/r/R/SOMADataFrame.R index af6e8f70b3..65284c6302 100644 --- a/apis/r/R/SOMADataFrame.R +++ b/apis/r/R/SOMADataFrame.R @@ -216,26 +216,15 @@ SOMADataFrame <- R6::R6Class( } cfg <- as.character(tiledb::config(self$tiledbsoma_ctx$context())) - if (isFALSE(iterated)) { - rl <- soma_array_reader(uri = self$uri, - config = cfg, - colnames = column_names, # NULL dealt with by sr_setup() - qc = value_filter, # idem - dim_points = coords, - loglevel = log_level - ) - - soma_array_to_arrow_table(rl) - } else { - read_iter <- TableReadIter$new(uri = self$uri, - config = cfg, - colnames = column_names, # NULL dealt with by sr_setup() - qc = value_filter, # idem - dim_points = coords, # idem - loglevel = log_level # idem - ) - return(read_iter) - } + sr <- sr_setup(uri = self$uri, + config = cfg, + colnames = column_names, + qc = value_filter, + dim_points = coords, + loglevel = log_level) + + TableReadIter$new(sr) + } ), diff --git a/apis/r/R/SOMAExperimentAxisQuery.R b/apis/r/R/SOMAExperimentAxisQuery.R index adc5500710..f7f1c7a39d 100644 --- a/apis/r/R/SOMAExperimentAxisQuery.R +++ b/apis/r/R/SOMAExperimentAxisQuery.R @@ -69,7 +69,7 @@ SOMAExperimentAxisQuery <- R6::R6Class( coords = recursively_make_integer64(obs_query$coords), value_filter = obs_query$value_filter, column_names = column_names - ) + )$concat() }, #' @description Retrieve var [`arrow::Table`] @@ -80,7 +80,7 @@ SOMAExperimentAxisQuery <- R6::R6Class( coords = recursively_make_integer64(var_query$coords), value_filter = var_query$value_filter, column_names = column_names - ) + )$concat() }, #' @description Retrieve `soma_joinids` as an [`arrow::Array`] for `obs`. @@ -93,7 +93,7 @@ SOMAExperimentAxisQuery <- R6::R6Class( arrow::concat_arrays(private$.joinids$var()) }, - #' @description Retrieves an `X` layer as an [`arrow::Table`]. + #' @description Retrieves an `X` layer as a link{SOMASparseNDArrayRead} #' @param layer_name The name of the layer to retrieve. X = function(layer_name) { stopifnot( @@ -109,10 +109,10 @@ SOMAExperimentAxisQuery <- R6::R6Class( ) # TODO: Stop converting to vectors when SOMAArrayReader supports arrow arrays - x_layer$read_arrow_table(coords = list( + x_layer$read(coords = list( self$obs_joinids()$as_vector(), self$var_joinids()$as_vector() - )) + ))$tables()$concat() }, #' @description Reads the entire query result as a list of @@ -158,10 +158,10 @@ SOMAExperimentAxisQuery <- R6::R6Class( var_ft <- self$var(var_column_names) x_matrices <- lapply(x_arrays, function(x_array) { - x_array$read_arrow_table(coords = list( + x_array$read(coords = list( self$obs_joinids()$as_vector(), self$var_joinids()$as_vector() - )) + ))$tables()$concat() } ) @@ -590,7 +590,9 @@ SOMAExperimentAxisQuery <- R6::R6Class( dims = seq_len(as.integer(embed$shape()[2L])) - 1L ) embed_mat <- if (inherits(embed, 'SOMASparseNDArray')) { - as.matrix(embed$read_sparse_matrix_zero_based(repr = "C")[coords$cells, coords$dims]) + this_mat <- embed$read()$sparse_matrix(zero_based=TRUE)$concat() + this_mat <- as(this_mat[coords$cells, coords$dims], "CsparseMatrix") + as.matrix(this_mat) } else if (inherits(embed, 'SOMADenseNDArray')) { warning( paste( @@ -639,7 +641,9 @@ SOMAExperimentAxisQuery <- R6::R6Class( dims = seq_len(as.integer(loads$shape()[2L])) - 1L ) load_mat <- if (inherits(loads, 'SOMASparseNDArray')) { - as.matrix(loads$read_sparse_matrix_zero_based(repr = "C")[coords$features, coords$dims]) + this_mat <- loads$read()$sparse_matrix(zero_based=TRUE)$concat() + this_mat <- as(this_mat[coords$features, coords$dims], "CsparseMatrix") + as.matrix(this_mat) } else if (inherits(loads, 'SOMADenseNDArray')) { warning( paste( @@ -704,7 +708,8 @@ SOMAExperimentAxisQuery <- R6::R6Class( } # Check provided graph name obsp_layer <- match.arg(arg = obsp_layer, choices = ms_graph) - mat <- as.one.based(self$ms$obsp$get(obsp_layer)$read_sparse_matrix_zero_based(repr = 'C')) + mat <- as.one.based(self$ms$obsp$get(obsp_layer)$read()$sparse_matrix(zero_based=FALSE)$concat()) + mat <- as(mat, "CsparseMatrix") idx <- self$obs_joinids()$as_vector() + 1L mat <- mat[idx, idx] mat <- as(mat, 'Graph') @@ -967,7 +972,7 @@ JoinIDCache <- R6::R6Class( coords = axis_query$coords, value_filter = axis_query$value_filter, column_names = "soma_joinid", - ) + )$concat() tbl$soma_joinid } ) diff --git a/apis/r/R/SOMASparseNDArray.R b/apis/r/R/SOMASparseNDArray.R index d4d63cb5be..460b03f7e8 100644 --- a/apis/r/R/SOMASparseNDArray.R +++ b/apis/r/R/SOMASparseNDArray.R @@ -106,7 +106,7 @@ SOMASparseNDArray <- R6::R6Class( self }, - #' @description Read as an 'arrow::Table' (lifecycle: experimental) + #' @description Reads a user-defined slice of the \code{SOMASparseNDArray} #' @param coords Optional `list` of integer vectors, one for each dimension, with a #' length equal to the number of values to read. If `NULL`, all values are #' read. List elements can be named when specifying a subset of dimensions. @@ -115,17 +115,19 @@ SOMASparseNDArray <- R6::R6Class( #' `FALSE`, the default value) or in several iterated steps. #' @param log_level Optional logging level with default value of `"warn"`. #' @return arrow::\link[arrow]{Table} or \link{TableReadIter} - read_arrow_table = function( + read = function( coords = NULL, result_order = "auto", - iterated = FALSE, log_level = "warn" ) { private$check_open_for_read() uri <- self$uri - stopifnot("Array must have non-zero elements less than '.Machine$integer.max'" = self$nnz() < .Machine$integer.max) + if (self$nnz() > .Machine$integer.max) { + warning("Iteration results cannot be concatenated on its entirerity beceause ", + "array has non-zero elements greater than '.Machine$integer.max'.") + } result_order <- map_query_layout(match_query_layout(result_order)) @@ -134,89 +136,15 @@ SOMASparseNDArray <- R6::R6Class( } cfg <- as.character(tiledb::config(self$tiledbsoma_ctx$context())) - if (isFALSE(iterated)) { - rl <- soma_array_reader(uri = self$uri, - config = cfg, - dim_points = coords, - result_order = result_order, - loglevel = log_level - ) + sr <- sr_setup(uri = uri, + config = cfg, + dim_points = coords, + #result_order = result_order, + loglevel = log_level) - soma_array_to_arrow_table(rl) - } else { - read_iter <- TableReadIter$new(uri = self$uri, - config = cfg, - dim_points = coords, - loglevel = log_level - ) - read_iter - } + SOMASparseNDArrayRead$new(sr, shape = self$shape()) }, - #' @description Read as a sparse matrix (lifecycle: experimental). Returns - #' a `matrix`-like object accessed using zero-based indexes or an iterator - #' of those. The matrix-like objects supports only basic access operations - #' with zero-based indexes as well as `dim()`,`nrow()`, and `ncol()` and - #' arithmetic operations as defined in \link[base]{groupGeneric}. - #' Use `as.one.based()` to get a fully-featured sparse matrix object supporting - #' more advanced operations (with one-based indexing). - #' @param coords Optional `list` of integer vectors, one for each dimension, with a - #' length equal to the number of values to read. If `NULL`, all values are - #' read. List elements can be named when specifying a subset of dimensions. - #' @template param-result-order - #' @param repr Optional one-character code for sparse matrix representation type - #' @param iterated Option boolean indicated whether data is read in call (when - #' `FALSE`, the default value) or in several iterated steps. - #' @param log_level Optional logging level with default value of `"warn"`. - #' @return \link{matrixZeroBasedView} of Matrix::\link[Matrix]{SparseMatrix} or - #' \link{SparseReadIter} - read_sparse_matrix_zero_based = function( - coords = NULL, - result_order = "auto", - repr = "T", - iterated = FALSE, - log_level = "warn" - ) { - - dims <- self$dimensions() - attr <- self$attributes() - shape <- self$shape() - - stopifnot("'repr' must be a sinlge character string" = length(repr) == 1 | mode(repr) == "character", - "'repr' can only be one of 'C', 'R', or 'T', for dgCMatrix, dgRMatrix, or dgTMatrix, respectively" = - repr == "C" | repr == "R" | repr == "T") - - if (repr %in% c("C", "R") & iterated) { - stop("When `repr` is 'C' (dgCMatrix) or 'R' (dgRMatrix), iteration mode is not possible") - } - - - stopifnot("Array must have two dimensions" = length(dims) == 2, - "Array must contain columns 'soma_dim_0' and 'soma_dim_1'" = - all.equal(c("soma_dim_0", "soma_dim_1"), names(dims)), - "Array must have non-zero elements less than '.Machine$integer.max'" = self$nnz() < .Machine$integer.max, - "Array dimensions must not exceed '.Machine$integer.max'" = any(shape < .Machine$integer.max), - "Array must contain column 'soma_data'" = all.equal("soma_data", names(attr)) - ) - - if (!is.null(coords)) { - coords <- private$convert_coords(coords) - } - - if (isFALSE(iterated)) { - tbl <- self$read_arrow_table(coords = coords, result_order = result_order, log_level = log_level) - arrow_table_to_sparse(tbl, repr = repr, shape = shape) - } else { - stopifnot(repr == "T") - cfg <- as.character(tiledb::config(self$tiledbsoma_ctx$context())) - SparseReadIter$new(uri = self$uri, - config = cfg, - dim_points = coords, - loglevel = log_level, - repr = repr, - shape = shape) - } - }, #' @description Write matrix-like data to the array. (lifecycle: experimental) #' diff --git a/apis/r/R/SOMASparseNDArrayRead.R b/apis/r/R/SOMASparseNDArrayRead.R new file mode 100644 index 0000000000..74c2830e95 --- /dev/null +++ b/apis/r/R/SOMASparseNDArrayRead.R @@ -0,0 +1,44 @@ +#' SOMASparseNDArrayRead +#' +#' @description +#' Intermediate type to choose result format when reading a sparse array +#' @export + +SOMASparseNDArrayRead <- R6::R6Class( + classname = "SOMASparseNDArrayRead", + + public = list( + + initialize = function(sr, shape) { + private$sr <- sr + private$shape <- shape + }, + + #sparse_matrix = function(){ + # "sdf" + #}, + + #' @description Read as a sparse matrix (lifecycle: experimental). Returns + #' an iterator of Matrix::\link[Matrix]{dgTMatrix} or \link{matrixZeroBasedView} of it. + #' @param zero_based Logical, if \code{TRUE} returns iterator of \link{matrixZeroBasedView} + #' if \code{FALSE} returns iterator of Matrix::\link[Matrix]{dgTMatrix}. + #' @return \link{SparseReadIter} + sparse_matrix = function(zero_based=FALSE) { + #TODO implement zero_based argument, currently doesn't do anything + SparseReadIter$new(sr = private$sr, shape = private$shape, zero_based=FALSE) + }, + + #' @description Read as a arrow::\link[arrow]{Table} (lifecycle: experimental). + #' Returns an iterator of arrow::\link[arrow]{Table}. + #' @return \link{TableReadIter} + tables = function() { + TableReadIter$new(private$sr) + } + ), + + private = list( + sr=NULL, + shape=NULL + ) + +) diff --git a/apis/r/R/SparseReadIter.R b/apis/r/R/SparseReadIter.R index 139fdf44a2..6e76df89af 100644 --- a/apis/r/R/SparseReadIter.R +++ b/apis/r/R/SparseReadIter.R @@ -13,32 +13,16 @@ SparseReadIter <- R6::R6Class( public = list( #' @description Create (lifecycle: experimental) - #' @param uri Character value with URI path to a SOMADataFrame or SOMASparseNDArray - #' @param config character vector containing TileDB config. - #' @param colnames Optional vector of character value with the name of the columns to retrieve - #' @param qc Optional external Pointer object to TileDB Query Condition, defaults to \sQuote{NULL} i.e. - #' no query condition - #' @param dim_points Optional named list with vector of data points to select on the given - #' dimension(s). Each dimension can be one entry in the list. - #' @param loglevel Character value with the desired logging level, defaults to \sQuote{auto} - #' @param repr Optional one-character code for sparse matrix representation type - #' which lets prior setting prevail, any other value is set as new logging level. - #' @param shape Numerical vector with two elements. - initialize = function(uri, - config, - colnames = NULL, - qc = NULL, - dim_points = NULL, - loglevel = "auto", - repr = c("C", "T", "R"), - shape) { - - # Initiate super class - super$initialize (uri = uri, config = config, colnames = colnames, qc = qc, - dim_points = dim_points, loglevel = loglevel) - - private$repr <- repr - private$shape <- shape + initialize = function(sr, shape, zero_based=FALSE) { + #TODO implement zero_based argument, currently doesn't do anything + stopifnot("Array must have two dimensions" = length(shape) == 2, + "Array dimensions must not exceed '.Machine$integer.max'" = any(shape < .Machine$integer.max)) + + + # Initiate super class + super$initialize(sr) + private$repr <- "T" + private$shape <- shape }, #' @description Concatenate remainder of iterator. diff --git a/apis/r/tests/testthat/test-Factory.R b/apis/r/tests/testthat/test-Factory.R index f7348a20c1..982680f944 100644 --- a/apis/r/tests/testthat/test-Factory.R +++ b/apis/r/tests/testthat/test-Factory.R @@ -14,7 +14,7 @@ test_that("DataFrame Factory", { # Check opening to read expect_silent(d3 <- SOMADataFrameOpen(uri)) - expect_silent(chk <- d3$read()) + expect_silent(chk <- d3$read()$concat()) expect_equal(tbl, chk) }) @@ -32,7 +32,7 @@ test_that("DataFrame Factory with specified index_column_names", { # Check opening to read expect_silent(d3 <- SOMADataFrameOpen(uri)) expect_equal(d3$mode(), "READ") - expect_silent(chk <- d3$read()) + expect_silent(chk <- d3$read()$concat()) expect_equal(tbl, chk) d3$close() expect_equal(d3$mode(), "CLOSED") @@ -55,12 +55,14 @@ test_that("SparseNDArray Factory", { # check opening to read expect_silent(s3 <- SOMASparseNDArrayOpen(uri)) expect_equal(s3$mode(), "READ") - expect_silent(chk <- s3$read_arrow_table(result_order = "COL_MAJOR")) - expect_identical( - as.numeric(chk$GetColumnByName("soma_data")), - ## need to convert to Csparsematrix first to get x values sorted appropriately - as.numeric(as(mat, "CsparseMatrix")@x) - ) + + #TODO test when sr_setup has an argument "result_order" + #expect_silent(chk <- s3$read(result_order = "COL_MAJOR")$tables()$concat()) + #expect_identical( + # as.numeric(chk$GetColumnByName("soma_data")), + # ## need to convert to Csparsematrix first to get x values sorted appropriately + # as.numeric(as(mat, "CsparseMatrix")@x) + #) s3$close() expect_equal(s3$mode(), "CLOSED") }) diff --git a/apis/r/tests/testthat/test-SOMAArrayReader-Basics.R b/apis/r/tests/testthat/test-SOMAArrayReader-Basics.R deleted file mode 100644 index 06c6125c16..0000000000 --- a/apis/r/tests/testthat/test-SOMAArrayReader-Basics.R +++ /dev/null @@ -1,35 +0,0 @@ -test_that("Basic SOMAArrayReader", { - uri <- extract_dataset("soma-dataframe-pbmc3k-processed-obs") - - df <- arrow_to_dt(soma_array_reader(uri)) - expect_equal(nrow(df), 2638L) - expect_equal(ncol(df), 6L) - - columns <- c("n_counts", "n_genes", "louvain") - z <- soma_array_reader(uri, columns) - expect_true(inherits(z, "list")) -}) - -test_that("Errors on large data, passes with budget", { - skip_if_not_installed("pbmc3k.tiledb") # a Suggests: pre-package 3k PBMC data - # see https://ghrr.github.io/drat/ - - tdir <- tempfile() - tgzfile <- system.file("raw-data", "soco-pbmc3k.tar.gz", package="pbmc3k.tiledb") - untar(tarfile = tgzfile, exdir = tdir) - - uri <- file.path(tdir, "soco", "pbmc3k_processed", "ms", "RNA", "X", "data") - expect_true(dir.exists(uri)) - - ## error under 'normal' read with 16mb default budget - sdf1 <- SOMADataFrameOpen(uri) - expect_error(sdf1$read()) - - ## pass with budget of 45mb - ctx <- tiledbsoma::SOMATileDBContext$new(c(soma.init_buffer_bytes="45000000")) - sdf2 <- SOMADataFrameOpen(uri, tiledbsoma_ctx = ctx) - expect_equal(sdf2$mode(), "READ") - expect_silent(sdf2$read()) - sdf2$close() - expect_equal(sdf2$mode(), "CLOSED") -}) diff --git a/apis/r/tests/testthat/test-SOMAArrayReader-Iterated.R b/apis/r/tests/testthat/test-SOMAArrayReader-Iterated.R index 93ce923e8d..6bfd8b69be 100644 --- a/apis/r/tests/testthat/test-SOMAArrayReader-Iterated.R +++ b/apis/r/tests/testthat/test-SOMAArrayReader-Iterated.R @@ -81,15 +81,14 @@ test_that("Iterated Interface from SOMA Classes", { for (tc in test_cases) { sdf <- switch(tc, data.frame = SOMADataFrame$new(uri, internal_use_only = "allowed_use"), - sparse = SOMASparseNDArray$new(uri, internal_use_only = "allowed_use"), - dense = SOMADenseNDArray$new(uri, internal_use_only = "allowed_use")) + sparse = SOMASparseNDArray$new(uri, internal_use_only = "allowed_use")) expect_true(inherits(sdf, "SOMAArrayBase")) sdf$open("READ", internal_use_only = "allowed_use") iterator <- switch(tc, - data.frame = sdf$read(iterated = TRUE), - sparse = sdf$read_arrow_table(iterated = TRUE), - dense = sdf$read_arrow_table(iterated = TRUE)) + data.frame = sdf$read(), + sparse = sdf$read()$tables()) + expect_true(inherits(iterator, "ReadIter")) expect_true(inherits(iterator, "TableReadIter")) @@ -105,9 +104,8 @@ test_that("Iterated Interface from SOMA Classes", { # Test $read_next() iterator <- switch(tc, - data.frame = sdf$read(iterated = TRUE), - sparse = sdf$read_arrow_table(iterated = TRUE), - dense = sdf$read_arrow_table(iterated = TRUE)) + data.frame = sdf$read(), + sparse = sdf$read()$tables()) expect_false(iterator$read_complete()) for (i in 1:2) { @@ -144,12 +142,8 @@ test_that("Iterated Interface from SOMA Sparse Matrix", { sdf <- SOMASparseNDArray$new(uri, internal_use_only = "allowed_use") expect_true(inherits(sdf, "SOMAArrayBase")) sdf$open("READ", internal_use_only = "allowed_use") - - expect_error(sdf$read_sparse_matrix_zero_based(repr = "x")) - expect_error(sdf$read_sparse_matrix_zero_based(iterated = TRUE, repr = "C")) - expect_error(sdf$read_sparse_matrix_zero_based(iterated = TRUE, repr = "R")) - iterator <- sdf$read_sparse_matrix_zero_based(iterated = TRUE) + iterator <- sdf$read()$sparse_matrix(zero_based = T) nnzTotal <- 0 rowsTotal <- 0 @@ -167,7 +161,7 @@ test_that("Iterated Interface from SOMA Sparse Matrix", { expect_null(iterator$read_next()) expect_warning(iterator$read_next()) - expect_equal(nnzTotal, Matrix::nnzero(as.one.based(sdf$read_sparse_matrix_zero_based(iterated=T)$concat()))) + expect_equal(nnzTotal, Matrix::nnzero(as.one.based(sdf$read()$sparse_matrix(T)$concat()))) expect_equal(nnzTotal, 2238732) rm(sdf) diff --git a/apis/r/tests/testthat/test-SOMADataFrame.R b/apis/r/tests/testthat/test-SOMADataFrame.R index 9d85f2aa13..8f361eae49 100644 --- a/apis/r/tests/testthat/test-SOMADataFrame.R +++ b/apis/r/tests/testthat/test-SOMADataFrame.R @@ -45,7 +45,7 @@ test_that("Basic mechanics", { ) # Read result should recreate the original Table - tbl1 <- sdf$read() + tbl1 <- sdf$read()$concat() expect_true(tbl1$Equals(tbl0)) sdf$close() @@ -72,15 +72,15 @@ test_that("Basic mechanics", { ) # Read result should recreate the original RecordBatch (when seen as a tibble) - rb1 <- arrow::as_record_batch(sdf$read()) + rb1 <- arrow::as_record_batch(sdf$read()$concat()) expect_equivalent(dplyr::collect(rb0), dplyr::collect(rb1)) # Slicing by foo - tbl1 <- sdf$read(coords = list(foo = 1L:2L)) + tbl1 <- sdf$read(coords = list(foo = 1L:2L))$concat() expect_true(tbl1$Equals(tbl0$Slice(offset = 0, length = 2))) # Slicing unnamed also work - tbl1 <- sdf$read(coords = 1L:2L) + tbl1 <- sdf$read(coords = 1L:2L)$concat() expect_true(tbl1$Equals(tbl0$Slice(offset = 0, length = 2))) # Subselecting columns @@ -89,11 +89,11 @@ test_that("Basic mechanics", { "'column_names' must only contain valid dimension or attribute columns" ) - tbl1 <- sdf$read(column_names = "bar") + tbl1 <- sdf$read(column_names = "bar")$concat() expect_true(tbl1$Equals(tbl0$SelectColumns(2L))) # Attribute filters - tbl1 <- sdf$read(value_filter = "bar < 5") + tbl1 <- sdf$read(value_filter = "bar < 5")$concat() expect_true(tbl1$Equals(tbl0$Filter(tbl0$bar < 5))) # Validate TileDB array schema @@ -199,7 +199,7 @@ test_that("int64 values are stored correctly", { sdf$close() sdf <- SOMADataFrameOpen(uri) - tbl1 <- sdf$read() + tbl1 <- sdf$read()$concat() expect_true(tbl1$Equals(tbl0)) # verify int64_downcast option was restored @@ -211,14 +211,14 @@ test_that("SOMADataFrame read", { uri <- extract_dataset("soma-dataframe-pbmc3k-processed-obs") sdf <- SOMADataFrameOpen(uri) - z <- sdf$read() + z <- sdf$read()$concat() expect_equal(z$num_rows, 2638L) expect_equal(z$num_columns, 6L) sdf$close() columns <- c("n_counts", "n_genes", "louvain") sdf <- SOMADataFrameOpen(uri) - z <- sdf$read(column_names=columns) + z <- sdf$read(column_names=columns)$concat() expect_equal(z$num_columns, 3L) expect_equal(z$ColumnNames(), columns) sdf$close() @@ -230,7 +230,7 @@ test_that("SOMADataFrame read", { coords <- bit64::as.integer64(seq(100, 109)) sdf <- SOMADataFrameOpen(uri) - z <- sdf$read(coords = list(soma_joinid=coords)) + z <- sdf$read(coords = list(soma_joinid=coords))$concat() expect_equal(z$num_rows, 10L) sdf$close() }) diff --git a/apis/r/tests/testthat/test-SOMAExperiment-query.R b/apis/r/tests/testthat/test-SOMAExperiment-query.R index f0940a1e1e..d612efbb82 100644 --- a/apis/r/tests/testthat/test-SOMAExperiment-query.R +++ b/apis/r/tests/testthat/test-SOMAExperiment-query.R @@ -18,17 +18,17 @@ test_that("returns all coordinates by default", { ) # obs/var tables - expect_true(query$obs()$Equals(experiment$obs$read())) - expect_true(query$var()$Equals(experiment$ms$get("RNA")$var$read())) + expect_true(query$obs()$Equals(experiment$obs$read()$concat())) + expect_true(query$var()$Equals(experiment$ms$get("RNA")$var$read()$concat())) # obs/var joinids expect_equal( query$obs_joinids(), - arrow::concat_arrays(experiment$obs$read()$soma_joinid) + arrow::concat_arrays(experiment$obs$read()$concat()$soma_joinid) ) expect_equal( query$var_joinids(), - arrow::concat_arrays(experiment$ms$get("RNA")$var$read()$soma_joinid) + arrow::concat_arrays(experiment$ms$get("RNA")$var$read()$concat()$soma_joinid) ) expect_equal(query$n_obs, n_obs) @@ -41,7 +41,7 @@ test_that("returns all coordinates by default", { expect_true( query$X("counts")$Equals( - experiment$ms$get("RNA")$X$get("counts")$read_arrow_table() + experiment$ms$get("RNA")$X$get("counts")$read()$tables()$concat() ) ) @@ -87,9 +87,9 @@ test_that("querying by dimension coordinates", { as.integer(var_slice) ) - raw_X <- experiment$ms$get("RNA")$X$get("counts")$read_arrow_table( + raw_X <- experiment$ms$get("RNA")$X$get("counts")$read( coords = list(obs_slice, var_slice) - ) + )$tables()$concat() expect_true(query$X("counts")$Equals(raw_X)) experiment$close() @@ -214,11 +214,11 @@ test_that("querying by both coordinates and value filters", { ) # Determine expected results - obs_df <- experiment$obs$read()$to_data_frame() + obs_df <- experiment$obs$read()$concat()$to_data_frame() obs_hits <- obs_df$soma_joinid %in% as.integer(obs_slice) & obs_df$baz %in% obs_label_values - var_df <- experiment$ms$get("RNA")$var$read()$to_data_frame() + var_df <- experiment$ms$get("RNA")$var$read()$concat()$to_data_frame() var_hits <- var_df$soma_joinid %in% as.integer(var_slice) & var_df$quux %in% var_label_values @@ -228,37 +228,38 @@ test_that("querying by both coordinates and value filters", { experiment$close() }) -test_that("queries with empty results", { - uri <- withr::local_tempdir("soma-experiment-query-empty-results") - n_obs <- 1001L - n_var <- 99L - - experiment <- create_and_populate_experiment( - uri = uri, - n_obs = n_obs, - n_var = n_var, - X_layer_names = c("counts", "logcounts"), - mode = "READ" - ) - on.exit(experiment$close()) - - # obs/var slice and value filter - query <- SOMAExperimentAxisQuery$new( - experiment = experiment, - measurement_name = "RNA", - obs_query = SOMAAxisQuery$new( - value_filter = "baz == 'does-not-exist'" - ), - var_query = SOMAAxisQuery$new( - value_filter = "quux == 'does-not-exist'" - ) - ) - - expect_equal(query$obs()$num_rows, 0) - expect_equal(query$var()$num_rows, 0) - - experiment$close() -}) +# TODO include when sr_setup and sr_next support empty results +#test_that("queries with empty results", { +# uri <- withr::local_tempdir("soma-experiment-query-empty-results") +# n_obs <- 1001L +# n_var <- 99L +# +# experiment <- create_and_populate_experiment( +# uri = uri, +# n_obs = n_obs, +# n_var = n_var, +# X_layer_names = c("counts", "logcounts"), +# mode = "READ" +# ) +# on.exit(experiment$close()) +# +# # obs/var slice and value filter +# query <- SOMAExperimentAxisQuery$new( +# experiment = experiment, +# measurement_name = "RNA", +# obs_query = SOMAAxisQuery$new( +# value_filter = "baz == 'does-not-exist'" +# ), +# var_query = SOMAAxisQuery$new( +# value_filter = "quux == 'does-not-exist'" +# ) +# ) +# +# expect_equal(query$obs()$num_rows, 0) +# expect_equal(query$var()$num_rows, 0) +# +# experiment$close() +#}) test_that("retrieving query results in supported formats", { uri <- withr::local_tempdir("soma-experiment-query-results-formats1") diff --git a/apis/r/tests/testthat/test-SOMASparseNDArray.R b/apis/r/tests/testthat/test-SOMASparseNDArray.R index fc05032e7c..b381232343 100644 --- a/apis/r/tests/testthat/test-SOMASparseNDArray.R +++ b/apis/r/tests/testthat/test-SOMASparseNDArray.R @@ -14,35 +14,36 @@ test_that("SOMASparseNDArray creation", { ndarray <- SOMASparseNDArrayOpen(uri) - tbl <- ndarray$read_arrow_table(result_order = "COL_MAJOR") - expect_true(is_arrow_table(tbl)) - expect_equal(tbl$ColumnNames(), c("soma_dim_0", "soma_dim_1", "soma_data")) - - expect_identical( - as.numeric(tbl$GetColumnByName("soma_data")), - ## need to convert to Csparsematrix first to get x values sorted appropriately - as.numeric(as(mat, "CsparseMatrix")@x) - ) - - # Subset both dims - tbl <- ndarray$read_arrow_table( - coords = list(soma_dim_0=0, soma_dim_1=0:2), - result_order = "COL_MAJOR" - ) - expect_identical( - as.numeric(tbl$GetColumnByName("soma_data")), - as.numeric(mat[1, 1:3]) - ) - - # Subset both dims, unnamed - tbl <- ndarray$read_arrow_table( - coords = list(0, 0:2), - result_order = "COL_MAJOR" - ) - expect_identical( - as.numeric(tbl$GetColumnByName("soma_data")), - as.numeric(mat[1, 1:3]) - ) + # TODO include when sr_setup allows for result_order + #tbl <- ndarray$read(result_order = "COL_MAJOR")$tables()$concat() + #expect_true(is_arrow_table(tbl)) + #expect_equal(tbl$ColumnNames(), c("soma_dim_0", "soma_dim_1", "soma_data")) + + #expect_identical( + # as.numeric(tbl$GetColumnByName("soma_data")), + # ## need to convert to Csparsematrix first to get x values sorted appropriately + # as.numeric(as(mat, "CsparseMatrix")@x) + #) + + ## Subset both dims + #tbl <- ndarray$read( + # coords = list(soma_dim_0=0, soma_dim_1=0:2), + # result_order = "COL_MAJOR" + #)$tables()$concat() + #expect_identical( + # as.numeric(tbl$GetColumnByName("soma_data")), + # as.numeric(mat[1, 1:3]) + #) + + ## Subset both dims, unnamed + #tbl <- ndarray$read( + # coords = list(0, 0:2), + # result_order = "COL_MAJOR" + #)$tables()$concat() + #expect_identical( + # as.numeric(tbl$GetColumnByName("soma_data")), + # as.numeric(mat[1, 1:3]) + #) # Validate TileDB array schema arr <- tiledb::tiledb_array(uri) @@ -85,7 +86,7 @@ test_that("SOMASparseNDArray read_sparse_matrix_zero_based", { # read_sparse_matrix ndarray <- SOMASparseNDArrayOpen(uri) - mat2 <- ndarray$read_sparse_matrix_zero_based(repr="T") + mat2 <- ndarray$read()$sparse_matrix(zero_based = T)$concat() expect_true(inherits(mat2, "matrixZeroBasedView")) expect_s4_class(as.one.based(mat2), "sparseMatrix") expect_equal(dim(mat2), c(10, 10)) @@ -98,7 +99,7 @@ test_that("SOMASparseNDArray read_sparse_matrix_zero_based", { ndarray <- SOMASparseNDArrayOpen(uri) # repeat with iterated reader - iterator <- ndarray$read_sparse_matrix_zero_based(repr="T", iterated=TRUE) + iterator <- ndarray$read()$sparse_matrix(zero_based = T) mat2 <- iterator$read_next() expect_true(inherits(mat2, "matrixZeroBasedView")) expect_s4_class(as.one.based(mat2), "sparseMatrix") diff --git a/apis/system/tests/test_dataframe_write_python_read_r.py b/apis/system/tests/test_dataframe_write_python_read_r.py index 0cc301f363..d2ab36deff 100644 --- a/apis/system/tests/test_dataframe_write_python_read_r.py +++ b/apis/system/tests/test_dataframe_write_python_read_r.py @@ -39,7 +39,7 @@ def base_R_script(self): return f""" library("tiledbsoma") soma_df <- SOMADataFrameOpen("{self.uri}") - table = soma_df$read() + table = soma_df$read()$concat() df = as.data.frame(table) """ diff --git a/apis/system/tests/test_sparsendarray_write_python_read_r.py b/apis/system/tests/test_sparsendarray_write_python_read_r.py index 672c82dee6..8696827fb5 100644 --- a/apis/system/tests/test_sparsendarray_write_python_read_r.py +++ b/apis/system/tests/test_sparsendarray_write_python_read_r.py @@ -24,8 +24,8 @@ def base_R_script(self): return f""" library("tiledbsoma") soma_ndarray <- SOMASparseNDArrayOpen("{self.uri}") - table <- soma_ndarray$read_arrow_table() - M <- as.one.based(soma_ndarray$read_sparse_matrix_zero_based()) + table <- soma_ndarray$read()$tables()$concat() + M <- as.one.based(soma_ndarray$read()$sparse_matrix(zero_based=T)$concat()) df <- as.data.frame(table) """ From 85cca423c1aa2491b9738a4b0377605e5c66d92f Mon Sep 17 00:00:00 2001 From: Pablo E Garcia-Nieto Date: Thu, 25 May 2023 21:15:30 -0700 Subject: [PATCH 15/18] Fix bugs after main merge --- apis/r/R/SOMAExperimentAxisQuery.R | 3 +-- apis/r/R/SOMASparseNDArray.R | 2 +- apis/r/R/SOMASparseNDArrayRead.R | 6 +---- apis/r/R/SparseReadIter.R | 18 ++++++++++++--- apis/r/R/utils-readerTransformers.R | 13 ++++++++--- .../testthat/test-SOMAArrayReader-Iterated.R | 2 +- .../r/tests/testthat/test-SOMASparseNDArray.R | 23 ++++++------------- 7 files changed, 36 insertions(+), 31 deletions(-) diff --git a/apis/r/R/SOMAExperimentAxisQuery.R b/apis/r/R/SOMAExperimentAxisQuery.R index e3f01b06e4..c4f6a0c487 100644 --- a/apis/r/R/SOMAExperimentAxisQuery.R +++ b/apis/r/R/SOMAExperimentAxisQuery.R @@ -644,7 +644,6 @@ SOMAExperimentAxisQuery <- R6::R6Class( ) load_mat <- if (inherits(loads, 'SOMASparseNDArray')) { this_mat <- loads$read()$sparse_matrix(zero_based=TRUE)$concat() - this_mat <- embed$read_sparse_matrix_zero_based() this_mat <- this_mat$take(coords$features, coords$dims) this_mat <- this_mat$get_one_based_matrix() this_mat <- as(this_mat, "CsparseMatrix") @@ -713,7 +712,7 @@ SOMAExperimentAxisQuery <- R6::R6Class( } # Check provided graph name obsp_layer <- match.arg(arg = obsp_layer, choices = ms_graph) - mat <- self$ms$obsp$get(obsp_layer)$read()$sparse_matrix(zero_based=FALSE)$concat()$get_one_based_matrix() + mat <- self$ms$obsp$get(obsp_layer)$read()$sparse_matrix(zero_based=TRUE)$concat()$get_one_based_matrix() mat <- as(mat, "CsparseMatrix") idx <- self$obs_joinids()$as_vector() + 1L mat <- mat[idx, idx] diff --git a/apis/r/R/SOMASparseNDArray.R b/apis/r/R/SOMASparseNDArray.R index ebc8d45b0c..0721069e1a 100644 --- a/apis/r/R/SOMASparseNDArray.R +++ b/apis/r/R/SOMASparseNDArray.R @@ -114,7 +114,7 @@ SOMASparseNDArray <- R6::R6Class( #' @param iterated Option boolean indicated whether data is read in call (when #' `FALSE`, the default value) or in several iterated steps. #' @param log_level Optional logging level with default value of `"warn"`. - #' @return arrow::\link[arrow]{Table} or \link{TableReadIter} + #' @return \link{SOMASparseNDArrayRead} read = function( coords = NULL, result_order = "auto", diff --git a/apis/r/R/SOMASparseNDArrayRead.R b/apis/r/R/SOMASparseNDArrayRead.R index 74c2830e95..ac261dde81 100644 --- a/apis/r/R/SOMASparseNDArrayRead.R +++ b/apis/r/R/SOMASparseNDArrayRead.R @@ -14,10 +14,6 @@ SOMASparseNDArrayRead <- R6::R6Class( private$shape <- shape }, - #sparse_matrix = function(){ - # "sdf" - #}, - #' @description Read as a sparse matrix (lifecycle: experimental). Returns #' an iterator of Matrix::\link[Matrix]{dgTMatrix} or \link{matrixZeroBasedView} of it. #' @param zero_based Logical, if \code{TRUE} returns iterator of \link{matrixZeroBasedView} @@ -25,7 +21,7 @@ SOMASparseNDArrayRead <- R6::R6Class( #' @return \link{SparseReadIter} sparse_matrix = function(zero_based=FALSE) { #TODO implement zero_based argument, currently doesn't do anything - SparseReadIter$new(sr = private$sr, shape = private$shape, zero_based=FALSE) + SparseReadIter$new(sr = private$sr, shape = private$shape, zero_based=zero_based) }, #' @description Read as a arrow::\link[arrow]{Table} (lifecycle: experimental). diff --git a/apis/r/R/SparseReadIter.R b/apis/r/R/SparseReadIter.R index 6e76df89af..763764242f 100644 --- a/apis/r/R/SparseReadIter.R +++ b/apis/r/R/SparseReadIter.R @@ -13,17 +13,21 @@ SparseReadIter <- R6::R6Class( public = list( #' @description Create (lifecycle: experimental) + #' @param shape Shape of the full matrix + #' @param zero_based Logical, if TRUE will make iterator for Matrix::\link[Matrix]{dgTMatrix} + #' otherwise \link{matrixZeroBasedView}. initialize = function(sr, shape, zero_based=FALSE) { #TODO implement zero_based argument, currently doesn't do anything stopifnot("Array must have two dimensions" = length(shape) == 2, "Array dimensions must not exceed '.Machine$integer.max'" = any(shape < .Machine$integer.max)) - # Initiate super class super$initialize(sr) private$repr <- "T" private$shape <- shape + private$zero_based <- zero_based }, + #' @description Concatenate remainder of iterator. #' @return \link{matrixZeroBasedView} of Matrix::\link[Matrix]{SparseMatrix} @@ -37,7 +41,11 @@ SparseReadIter <- R6::R6Class( mat <- self$read_next() while (!self$read_complete()) { - mat <- mat + self$read_next() + if(private$zero_based) { + mat <- mat$sum(self$read_next()) + } else { + mat <- mat + self$read_next() + } } mat @@ -48,10 +56,14 @@ SparseReadIter <- R6::R6Class( repr=NULL, shape=NULL, + zero_based=NULL, ## refined from base class soma_reader_transform = function(x) { - arrow_table_to_sparse(soma_array_to_arrow_table(x), repr = private$repr, shape = private$shape) + arrow_table_to_sparse(soma_array_to_arrow_table(x), + repr = private$repr, + shape = private$shape, + zero_based = private$zero_based) } ) diff --git a/apis/r/R/utils-readerTransformers.R b/apis/r/R/utils-readerTransformers.R index 308bc30851..d1289a1249 100644 --- a/apis/r/R/utils-readerTransformers.R +++ b/apis/r/R/utils-readerTransformers.R @@ -16,8 +16,11 @@ soma_array_to_arrow_table <- function(x) { #' @param repr Optional one-character code for sparse matrix representation type #' @param shape Numerical vector with two elements, one for each dimension. If #' \code{NULL}, then the following is used \code{1 + c(max(tbl["soma_dim_0"]), max(tbl["soma_dim_1"]))} -#' @return \link{matrixZeroBasedView} of Matrix::\link[Matrix]{SparseMatrix} -arrow_table_to_sparse <- function(tbl, repr = c("C", "T", "R"), shape = NULL) { +#' @param repr Optional one-character code for sparse matrix representation type +#' @param zero_based Logical, if TRUE returns a Matrix::\link{sparse_matrix} +#' otherwise \link{matrixZeroBasedView} of Matrix::\link[Matrix]{SparseMatrix} +#' @return Matrix::\link{sparse_matrix} or \link{matrixZeroBasedView} of Matrix::\link[Matrix]{SparseMatrix} +arrow_table_to_sparse <- function(tbl, repr = c("C", "T", "R"), shape = NULL, zero_based = FALSE) { # To instantiate the one-based Matrix::sparseMatrix, we need to add 1 to the # zero-based soma_dim_0 and soma_dim_1 (done by arrow_table_to_sparse). But, because these dimensions are @@ -44,7 +47,11 @@ arrow_table_to_sparse <- function(tbl, repr = c("C", "T", "R"), shape = NULL) { j = soma_dim_1_one_based, x = soma_data, dims = shape, repr = repr) - matrixZeroBasedView(mat) + if(zero_based) { + matrixZeroBasedView$new(mat) + } else { + mat + } } diff --git a/apis/r/tests/testthat/test-SOMAArrayReader-Iterated.R b/apis/r/tests/testthat/test-SOMAArrayReader-Iterated.R index de794da70b..346ae6b646 100644 --- a/apis/r/tests/testthat/test-SOMAArrayReader-Iterated.R +++ b/apis/r/tests/testthat/test-SOMAArrayReader-Iterated.R @@ -161,7 +161,7 @@ test_that("Iterated Interface from SOMA Sparse Matrix", { expect_null(iterator$read_next()) expect_warning(iterator$read_next()) - expect_equal(nnzTotal, Matrix::nnzero(as.one.based(sdf$read()$sparse_matrix(T)$concat()))) + expect_equal(nnzTotal, Matrix::nnzero(sdf$read()$sparse_matrix(T)$concat()$get_one_based_matrix())) expect_equal(nnzTotal, 2238732) rm(sdf) diff --git a/apis/r/tests/testthat/test-SOMASparseNDArray.R b/apis/r/tests/testthat/test-SOMASparseNDArray.R index 63535ac025..6d20d8139e 100644 --- a/apis/r/tests/testthat/test-SOMASparseNDArray.R +++ b/apis/r/tests/testthat/test-SOMASparseNDArray.R @@ -88,25 +88,16 @@ test_that("SOMASparseNDArray read_sparse_matrix", { ndarray <- SOMASparseNDArrayOpen(uri) mat2 <- ndarray$read()$sparse_matrix(zero_based = T)$concat() expect_true(inherits(mat2, "matrixZeroBasedView")) - expect_s4_class(as.one.based(mat2), "sparseMatrix") - expect_equal(dim(mat2), c(10, 10)) - expect_equal(nrow(mat2), 10) - expect_equal(ncol(mat2), 10) + expect_s4_class(mat2$get_one_based_matrix(), "sparseMatrix") + expect_equal(mat2$dim(), c(10, 10)) + expect_equal(mat2$nrow(), 10) + expect_equal(mat2$ncol(), 10) ## not sure why all.equal(mat, mat2) does not pass - expect_true(all.equal(as.numeric(mat), as.numeric(mat2[1:9, 1:9]))) - expect_equal(sum(mat), sum(mat2)) + expect_true(all.equal(as.numeric(mat[1:9, 1:9]), as.numeric(mat2$take(0:8, 0:8)$get_one_based_matrix()))) + expect_equal(sum(mat), sum(mat2$get_one_based_matrix())) ndarray <- SOMASparseNDArrayOpen(uri) - # repeat with iterated reader - ndarray$read_sparse_matrix(repr = "T", iterated = TRUE) - mat3 <- ndarray$read_next() - expect_s4_class(mat3, "TsparseMatrix") - expect_equal(dim(mat3), c(10, 10)) - expect_equal(nrow(mat3), 10) - expect_equal(ncol(mat3), 10) - expect_true(all.equal(as.numeric(mat), as.numeric(mat3[1:9, 1:9]))) - expect_equal(sum(mat), sum(mat3)) ndarray$close() }) @@ -123,7 +114,7 @@ test_that("SOMASparseNDArray read_sparse_matrix_zero_based", { # read_sparse_matrix ndarray <- SOMASparseNDArrayOpen(uri) - mat2 <- ndarray$read_sparse_matrix_zero_based(repr="T") + mat2 <- ndarray$read()$sparse_matrix(zero_based=T)$concat() expect_true(inherits(mat2, "matrixZeroBasedView")) expect_s4_class(mat2$get_one_based_matrix(), "sparseMatrix") expect_equal(mat2$dim(), c(10, 10)) From 1917755baeba116571043ca88c343c4b2f483132 Mon Sep 17 00:00:00 2001 From: Pablo E Garcia-Nieto Date: Thu, 25 May 2023 21:23:26 -0700 Subject: [PATCH 16/18] Update docs --- apis/r/NAMESPACE | 3 +- apis/r/R/ReadIter.R | 2 +- apis/r/R/SOMASparseNDArrayRead.R | 3 + apis/r/R/SparseReadIter.R | 1 + apis/r/man/Ops.matrixZeroBasedView.Rd | 23 ----- apis/r/man/ReadIter.Rd | 24 +----- apis/r/man/SOMADataFrame.Rd | 2 +- apis/r/man/SOMAExperimentAxisQuery.Rd | 2 +- apis/r/man/SOMASparseNDArray.Rd | 110 ++---------------------- apis/r/man/SOMASparseNDArrayRead.Rd | 90 +++++++++++++++++++ apis/r/man/SparseReadIter.Rd | 31 ++----- apis/r/man/arrow_table_to_sparse.Rd | 12 ++- apis/r/man/print.matrixZeroBasedView.Rd | 17 ---- 13 files changed, 124 insertions(+), 196 deletions(-) delete mode 100644 apis/r/man/Ops.matrixZeroBasedView.Rd create mode 100644 apis/r/man/SOMASparseNDArrayRead.Rd delete mode 100644 apis/r/man/print.matrixZeroBasedView.Rd diff --git a/apis/r/NAMESPACE b/apis/r/NAMESPACE index 915110e719..7ea807804e 100644 --- a/apis/r/NAMESPACE +++ b/apis/r/NAMESPACE @@ -2,13 +2,11 @@ S3method("[[",MappingBase) S3method("[[<-",MappingBase) -S3method(Ops,matrixZeroBasedView) S3method(as.list,MappingBase) S3method(length,MappingBase) S3method(names,MappingBase) S3method(pad_matrix,default) S3method(pad_matrix,matrix) -S3method(print,matrixZeroBasedView) S3method(write_soma,Assay) S3method(write_soma,DimReduc) S3method(write_soma,Graph) @@ -46,6 +44,7 @@ export(SOMAOpen) export(SOMASparseNDArray) export(SOMASparseNDArrayCreate) export(SOMASparseNDArrayOpen) +export(SOMASparseNDArrayRead) export(SOMATileDBContext) export(ScalarMap) export(SparseReadIter) diff --git a/apis/r/R/ReadIter.R b/apis/r/R/ReadIter.R index 5cd60af654..079ff94a6c 100644 --- a/apis/r/R/ReadIter.R +++ b/apis/r/R/ReadIter.R @@ -8,7 +8,7 @@ ReadIter <- R6::R6Class( public = list( #' @description Create (lifecycle: experimental) - #' @param sr + #' @param sr soma read pointer initialize = function(sr) { private$soma_reader_pointer <- sr }, diff --git a/apis/r/R/SOMASparseNDArrayRead.R b/apis/r/R/SOMASparseNDArrayRead.R index ac261dde81..8963752932 100644 --- a/apis/r/R/SOMASparseNDArrayRead.R +++ b/apis/r/R/SOMASparseNDArrayRead.R @@ -9,6 +9,9 @@ SOMASparseNDArrayRead <- R6::R6Class( public = list( + #' @description Create (lifecycle: experimental) + #' @param sr soma read pointer + #' @param shape Shape of the full matrix initialize = function(sr, shape) { private$sr <- sr private$shape <- shape diff --git a/apis/r/R/SparseReadIter.R b/apis/r/R/SparseReadIter.R index 763764242f..6834ec5b66 100644 --- a/apis/r/R/SparseReadIter.R +++ b/apis/r/R/SparseReadIter.R @@ -13,6 +13,7 @@ SparseReadIter <- R6::R6Class( public = list( #' @description Create (lifecycle: experimental) + #' @param sr Soma reader pointer #' @param shape Shape of the full matrix #' @param zero_based Logical, if TRUE will make iterator for Matrix::\link[Matrix]{dgTMatrix} #' otherwise \link{matrixZeroBasedView}. diff --git a/apis/r/man/Ops.matrixZeroBasedView.Rd b/apis/r/man/Ops.matrixZeroBasedView.Rd deleted file mode 100644 index 554414fd38..0000000000 --- a/apis/r/man/Ops.matrixZeroBasedView.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils-matrixZeroBasedView.R -\name{Ops.matrixZeroBasedView} -\alias{Ops.matrixZeroBasedView} -\title{\itemize{ -\item -}} -\usage{ -\method{Ops}{matrixZeroBasedView}(e1, e2 = NULL) -} -\arguments{ -\item{e1}{left side} - -\item{e2}{right sidet} -} -\value{ -results of sum -} -\description{ -\itemize{ -\item -} -} diff --git a/apis/r/man/ReadIter.Rd b/apis/r/man/ReadIter.Rd index 9070af48a8..6b76b9ea8c 100644 --- a/apis/r/man/ReadIter.Rd +++ b/apis/r/man/ReadIter.Rd @@ -27,33 +27,13 @@ Class that allows for read iteration of SOMA reads. \subsection{Method \code{new()}}{ Create (lifecycle: experimental) \subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{ReadIter$new( - uri, - config, - colnames = NULL, - qc = NULL, - dim_points = NULL, - loglevel = "auto" -)}\if{html}{\out{
    }} +\if{html}{\out{
    }}\preformatted{ReadIter$new(sr)}\if{html}{\out{
    }} } \subsection{Arguments}{ \if{html}{\out{
    }} \describe{ -\item{\code{uri}}{Character value with URI path to a SOMADataFrame or SOMASparseNDArray} - -\item{\code{config}}{character vector containing TileDB config.} - -\item{\code{colnames}}{Optional vector of character value with the name of the columns to retrieve} - -\item{\code{qc}}{Optional external Pointer object to TileDB Query Condition, defaults to \sQuote{NULL} i.e. -no query condition} - -\item{\code{dim_points}}{Optional named list with vector of data points to select on the given -dimension(s). Each dimension can be one entry in the list.} - -\item{\code{loglevel}}{Character value with the desired logging level, defaults to \sQuote{auto} -which lets prior setting prevail, any other value is set as new logging level.} +\item{\code{sr}}{soma read pointer} } \if{html}{\out{
    }} } diff --git a/apis/r/man/SOMADataFrame.Rd b/apis/r/man/SOMADataFrame.Rd index 2a88789c5e..655aab50db 100644 --- a/apis/r/man/SOMADataFrame.Rd +++ b/apis/r/man/SOMADataFrame.Rd @@ -138,7 +138,7 @@ more information.} \item{\code{iterated}}{Option boolean indicated whether data is read in call (when \code{FALSE}, the default value) or in several iterated steps.} -\item{\code{log_level}}{Optional logging level with default value of \code{"auto"}.} +\item{\code{log_level}}{Optional logging level with default value of \code{"warn"}.} } \if{html}{\out{
    }} } diff --git a/apis/r/man/SOMAExperimentAxisQuery.Rd b/apis/r/man/SOMAExperimentAxisQuery.Rd index 8bf634331c..3c59d88839 100644 --- a/apis/r/man/SOMAExperimentAxisQuery.Rd +++ b/apis/r/man/SOMAExperimentAxisQuery.Rd @@ -152,7 +152,7 @@ Retrieve \code{soma_joinids} as an \code{\link[arrow:array]{arrow::Array}} for \ \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-SOMAExperimentAxisQuery-X}{}}} \subsection{Method \code{X()}}{ -Retrieves an \code{X} layer as an \code{\link[arrow:Table]{arrow::Table}}. +Retrieves an \code{X} layer as a link{SOMASparseNDArrayRead} \subsection{Usage}{ \if{html}{\out{
    }}\preformatted{SOMAExperimentAxisQuery$X(layer_name)}\if{html}{\out{
    }} } diff --git a/apis/r/man/SOMASparseNDArray.Rd b/apis/r/man/SOMASparseNDArray.Rd index 62dd065209..4919b80dd9 100644 --- a/apis/r/man/SOMASparseNDArray.Rd +++ b/apis/r/man/SOMASparseNDArray.Rd @@ -29,10 +29,7 @@ the object are overwritten and new index values are added. (lifecycle: experimen \subsection{Public methods}{ \itemize{ \item \href{#method-SOMASparseNDArray-create}{\code{SOMASparseNDArray$create()}} -\item \href{#method-SOMASparseNDArray-read_arrow_table}{\code{SOMASparseNDArray$read_arrow_table()}} -\item \href{#method-SOMASparseNDArray-read_sparse_matrix}{\code{SOMASparseNDArray$read_sparse_matrix()}} -\item \href{#method-SOMASparseNDArray-read_sparse_matrix_zero_based}{\code{SOMASparseNDArray$read_sparse_matrix_zero_based()}} -\item \href{#method-SOMASparseNDArray-read_next}{\code{SOMASparseNDArray$read_next()}} +\item \href{#method-SOMASparseNDArray-read}{\code{SOMASparseNDArray$read()}} \item \href{#method-SOMASparseNDArray-write}{\code{SOMASparseNDArray$write()}} \item \href{#method-SOMASparseNDArray-nnz}{\code{SOMASparseNDArray$nnz()}} \item \href{#method-SOMASparseNDArray-clone}{\code{SOMASparseNDArray$clone()}} @@ -99,16 +96,15 @@ as \code{create()} is considered internal and should not be called directly.} } } \if{html}{\out{
    }} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-SOMASparseNDArray-read_arrow_table}{}}} -\subsection{Method \code{read_arrow_table()}}{ -Read as an 'arrow::Table' (lifecycle: experimental) +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-SOMASparseNDArray-read}{}}} +\subsection{Method \code{read()}}{ +Reads a user-defined slice of the \code{SOMASparseNDArray} \subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{SOMASparseNDArray$read_arrow_table( +\if{html}{\out{
    }}\preformatted{SOMASparseNDArray$read( coords = NULL, result_order = "auto", - iterated = FALSE, - log_level = "auto" + log_level = "warn" )}\if{html}{\out{
    }} } @@ -122,104 +118,16 @@ read. List elements can be named when specifying a subset of dimensions.} \item{\code{result_order}}{Optional order of read results. This can be one of either \verb{"ROW_MAJOR, }"COL_MAJOR"\verb{, or }"auto"` (default).} -\item{\code{iterated}}{Option boolean indicated whether data is read in call (when -\code{FALSE}, the default value) or in several iterated steps.} - -\item{\code{log_level}}{Optional logging level with default value of \code{"auto"}.} -} -\if{html}{\out{
    }} -} -\subsection{Returns}{ -An \code{\link[arrow:Table]{arrow::Table}}. -} -} -\if{html}{\out{
    }} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-SOMASparseNDArray-read_sparse_matrix}{}}} -\subsection{Method \code{read_sparse_matrix()}}{ -Read as a sparse matrix (lifecycle: experimental) -\subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{SOMASparseNDArray$read_sparse_matrix( - coords = NULL, - result_order = "auto", - repr = c("C", "T", "R"), - iterated = FALSE, - log_level = "auto" -)}\if{html}{\out{
    }} -} - -\subsection{Arguments}{ -\if{html}{\out{
    }} -\describe{ -\item{\code{coords}}{Optional \code{list} of integer vectors, one for each dimension, with a -length equal to the number of values to read. If \code{NULL}, all values are -read. List elements can be named when specifying a subset of dimensions.} - -\item{\code{result_order}}{Optional order of read results. This can be one of either -\verb{"ROW_MAJOR, }"COL_MAJOR"\verb{, or }"auto"` (default).} - -\item{\code{repr}}{Optional one-character code for sparse matrix representation type} +\item{\code{log_level}}{Optional logging level with default value of \code{"warn"}.} \item{\code{iterated}}{Option boolean indicated whether data is read in call (when \code{FALSE}, the default value) or in several iterated steps.} - -\item{\code{log_level}}{Optional logging level with default value of \code{"auto"}.} } \if{html}{\out{
    }} } \subsection{Returns}{ -arrow::\link[arrow]{Table} or \link{TableReadIter} +\link{SOMASparseNDArrayRead} } -} -\if{html}{\out{
    }} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-SOMASparseNDArray-read_sparse_matrix_zero_based}{}}} -\subsection{Method \code{read_sparse_matrix_zero_based()}}{ -d operations (with one-based indexing). -Read as a zero-indexed sparse matrix (lifecycle: experimental) -\subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{SOMASparseNDArray$read_sparse_matrix_zero_based( - coords = NULL, - result_order = "auto", - repr = "T", - iterated = FALSE, - log_level = "auto" -)}\if{html}{\out{
    }} -} - -\subsection{Arguments}{ -\if{html}{\out{
    }} -\describe{ -\item{\code{coords}}{Optional \code{list} of integer vectors, one for each dimension, with a -length equal to the number of values to read. If \code{NULL}, all values are -read. List elements can be named when specifying a subset of dimensions.} - -\item{\code{result_order}}{Optional order of read results. This can be one of either -\verb{"ROW_MAJOR, }"COL_MAJOR"\verb{, or }"auto"` (default).} - -\item{\code{repr}}{Optional one-character code for sparse matrix representation type} - -\item{\code{iterated}}{Option boolean indicated whether data is read in call (when -\code{FALSE}, the default value) or in several iterated steps.} - -\item{\code{log_level}}{Optional logging level with default value of \code{"auto"}.} -} -\if{html}{\out{
    }} -} -\subsection{Returns}{ -\link{matrixZeroBasedView} of Matrix::\link[Matrix]{SparseMatrix} or -\link{SparseReadIter} - -} -\if{html}{\out{
    }} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-SOMASparseNDArray-read_next}{}}} -\subsection{Method \code{read_next()}}{ -Read the next chunk of an iterated read. (lifecycle: experimental) -\subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{SOMASparseNDArray$read_next()}\if{html}{\out{
    }} -} - } \if{html}{\out{
    }} \if{html}{\out{}} diff --git a/apis/r/man/SOMASparseNDArrayRead.Rd b/apis/r/man/SOMASparseNDArrayRead.Rd new file mode 100644 index 0000000000..044815c758 --- /dev/null +++ b/apis/r/man/SOMASparseNDArrayRead.Rd @@ -0,0 +1,90 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/SOMASparseNDArrayRead.R +\name{SOMASparseNDArrayRead} +\alias{SOMASparseNDArrayRead} +\title{SOMASparseNDArrayRead} +\description{ +Intermediate type to choose result format when reading a sparse array +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-SOMASparseNDArrayRead-new}{\code{SOMASparseNDArrayRead$new()}} +\item \href{#method-SOMASparseNDArrayRead-sparse_matrix}{\code{SOMASparseNDArrayRead$sparse_matrix()}} +\item \href{#method-SOMASparseNDArrayRead-tables}{\code{SOMASparseNDArrayRead$tables()}} +\item \href{#method-SOMASparseNDArrayRead-clone}{\code{SOMASparseNDArrayRead$clone()}} +} +} +\if{html}{\out{
    }} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-SOMASparseNDArrayRead-new}{}}} +\subsection{Method \code{new()}}{ +Create (lifecycle: experimental) +\subsection{Usage}{ +\if{html}{\out{
    }}\preformatted{SOMASparseNDArrayRead$new(sr, shape)}\if{html}{\out{
    }} +} + +\subsection{Arguments}{ +\if{html}{\out{
    }} +\describe{ +\item{\code{sr}}{soma read pointer} + +\item{\code{shape}}{Shape of the full matrix} +} +\if{html}{\out{
    }} +} +} +\if{html}{\out{
    }} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-SOMASparseNDArrayRead-sparse_matrix}{}}} +\subsection{Method \code{sparse_matrix()}}{ +Read as a sparse matrix (lifecycle: experimental). Returns +an iterator of Matrix::\link[Matrix]{dgTMatrix} or \link{matrixZeroBasedView} of it. +\subsection{Usage}{ +\if{html}{\out{
    }}\preformatted{SOMASparseNDArrayRead$sparse_matrix(zero_based = FALSE)}\if{html}{\out{
    }} +} + +\subsection{Arguments}{ +\if{html}{\out{
    }} +\describe{ +\item{\code{zero_based}}{Logical, if \code{TRUE} returns iterator of \link{matrixZeroBasedView} +if \code{FALSE} returns iterator of Matrix::\link[Matrix]{dgTMatrix}.} +} +\if{html}{\out{
    }} +} +\subsection{Returns}{ +\link{SparseReadIter} +} +} +\if{html}{\out{
    }} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-SOMASparseNDArrayRead-tables}{}}} +\subsection{Method \code{tables()}}{ +Read as a arrow::\link[arrow]{Table} (lifecycle: experimental). +Returns an iterator of arrow::\link[arrow]{Table}. +\subsection{Usage}{ +\if{html}{\out{
    }}\preformatted{SOMASparseNDArrayRead$tables()}\if{html}{\out{
    }} +} + +\subsection{Returns}{ +\link{TableReadIter} +} +} +\if{html}{\out{
    }} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-SOMASparseNDArrayRead-clone}{}}} +\subsection{Method \code{clone()}}{ +The objects of this class are cloneable with this method. +\subsection{Usage}{ +\if{html}{\out{
    }}\preformatted{SOMASparseNDArrayRead$clone(deep = FALSE)}\if{html}{\out{
    }} +} + +\subsection{Arguments}{ +\if{html}{\out{
    }} +\describe{ +\item{\code{deep}}{Whether to make a deep clone.} +} +\if{html}{\out{
    }} +} +} +} diff --git a/apis/r/man/SparseReadIter.Rd b/apis/r/man/SparseReadIter.Rd index 1c62d35fa1..6ad079d7be 100644 --- a/apis/r/man/SparseReadIter.Rd +++ b/apis/r/man/SparseReadIter.Rd @@ -33,39 +33,18 @@ Iteration chunks are retrieved as 0-based Views \link{matrixZeroBasedView} of Ma \subsection{Method \code{new()}}{ Create (lifecycle: experimental) \subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{SparseReadIter$new( - uri, - config, - colnames = NULL, - qc = NULL, - dim_points = NULL, - loglevel = "auto", - repr = c("C", "T", "R"), - shape -)}\if{html}{\out{
    }} +\if{html}{\out{
    }}\preformatted{SparseReadIter$new(sr, shape, zero_based = FALSE)}\if{html}{\out{
    }} } \subsection{Arguments}{ \if{html}{\out{
    }} \describe{ -\item{\code{uri}}{Character value with URI path to a SOMADataFrame or SOMASparseNDArray} +\item{\code{sr}}{Soma reader pointer} -\item{\code{config}}{character vector containing TileDB config.} +\item{\code{shape}}{Shape of the full matrix} -\item{\code{colnames}}{Optional vector of character value with the name of the columns to retrieve} - -\item{\code{qc}}{Optional external Pointer object to TileDB Query Condition, defaults to \sQuote{NULL} i.e. -no query condition} - -\item{\code{dim_points}}{Optional named list with vector of data points to select on the given -dimension(s). Each dimension can be one entry in the list.} - -\item{\code{loglevel}}{Character value with the desired logging level, defaults to \sQuote{auto}} - -\item{\code{repr}}{Optional one-character code for sparse matrix representation type -which lets prior setting prevail, any other value is set as new logging level.} - -\item{\code{shape}}{Numerical vector with two elements.} +\item{\code{zero_based}}{Logical, if TRUE will make iterator for Matrix::\link[Matrix]{dgTMatrix} +otherwise \link{matrixZeroBasedView}.} } \if{html}{\out{
    }} } diff --git a/apis/r/man/arrow_table_to_sparse.Rd b/apis/r/man/arrow_table_to_sparse.Rd index e14ffa87a4..3698fc5252 100644 --- a/apis/r/man/arrow_table_to_sparse.Rd +++ b/apis/r/man/arrow_table_to_sparse.Rd @@ -4,7 +4,12 @@ \alias{arrow_table_to_sparse} \title{Transformer function: Arrow table to Matrix::sparseMatrix} \usage{ -arrow_table_to_sparse(tbl, repr = c("C", "T", "R"), shape = NULL) +arrow_table_to_sparse( + tbl, + repr = c("C", "T", "R"), + shape = NULL, + zero_based = FALSE +) } \arguments{ \item{tbl}{\link[Arrow]{Table} with columns "soma_dim_0", "soma_dim_1", and "soma_datah"} @@ -13,9 +18,12 @@ arrow_table_to_sparse(tbl, repr = c("C", "T", "R"), shape = NULL) \item{shape}{Numerical vector with two elements, one for each dimension. If \code{NULL}, then the following is used \code{1 + c(max(tbl["soma_dim_0"]), max(tbl["soma_dim_1"]))}} + +\item{zero_based}{Logical, if TRUE returns a Matrix::\link{sparse_matrix} +otherwise \link{matrixZeroBasedView} of Matrix::\link[Matrix]{SparseMatrix}} } \value{ -\link{matrixZeroBasedView} of Matrix::\link[Matrix]{SparseMatrix} +Matrix::\link{sparse_matrix} or \link{matrixZeroBasedView} of Matrix::\link[Matrix]{SparseMatrix} } \description{ Converts a \link[Arrow]{Table} of sparse format (columns: "soma_dim_0", diff --git a/apis/r/man/print.matrixZeroBasedView.Rd b/apis/r/man/print.matrixZeroBasedView.Rd deleted file mode 100644 index bece76ed93..0000000000 --- a/apis/r/man/print.matrixZeroBasedView.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils-matrixZeroBasedView.R -\name{print.matrixZeroBasedView} -\alias{print.matrixZeroBasedView} -\title{print} -\usage{ -\method{print}{matrixZeroBasedView}(x) -} -\arguments{ -\item{x}{The zero-based matrix view.} -} -\value{ -Matrix column count. -} -\description{ -print -} From b4aecf3f87ff5f69d0309db3d15b2ecb428eb3d8 Mon Sep 17 00:00:00 2001 From: Pablo E Garcia-Nieto Date: Thu, 25 May 2023 21:27:03 -0700 Subject: [PATCH 17/18] remove comment --- apis/r/R/utils-readerTransformers.R | 8 -------- 1 file changed, 8 deletions(-) diff --git a/apis/r/R/utils-readerTransformers.R b/apis/r/R/utils-readerTransformers.R index d1289a1249..c4fda5d22f 100644 --- a/apis/r/R/utils-readerTransformers.R +++ b/apis/r/R/utils-readerTransformers.R @@ -65,14 +65,6 @@ arrow_table_to_sparse <- function(tbl, repr = c("C", "T", "R"), shape = NULL, ze #' @return \link{matrixZeroBasedView} of \link[base]{matrix} arrow_table_to_dense <- function(tbl, byrow) { - # To instantiate the one-based Matrix::sparseMatrix, we need to add 1 to the - # zero-based soma_dim_0 and soma_dim_1 (done by arrow_table_to_sparse). But, because these dimensions are - # usually populated with soma_joinid, users will need to access the matrix - # using the original, possibly-zero IDs. Therefore, we'll wrap the one-based - # sparseMatrix with a shim providing basic access with zero-based indexes. - # If needed, user can then explicitly ask the shim for the underlying - # sparseMatrix using `as.one.based()`. - nrows <- length(unique(as.numeric(tbl$GetColumnByName("soma_dim_0")))) ncols <- length(unique(as.numeric(tbl$GetColumnByName("soma_dim_1")))) soma_data <- as.numeric(tbl$GetColumnByName("soma_data")) From 07c2e94663650258a552420711f7854fc9a2fa79 Mon Sep 17 00:00:00 2001 From: Pablo E Garcia-Nieto Date: Thu, 25 May 2023 22:24:47 -0700 Subject: [PATCH 18/18] update vignettes --- apis/r/vignettes/soma-objects.Rmd | 19 +++++++++---------- apis/r/vignettes/soma-reading.Rmd | 28 ++++++++++++++-------------- 2 files changed, 23 insertions(+), 24 deletions(-) diff --git a/apis/r/vignettes/soma-objects.Rmd b/apis/r/vignettes/soma-objects.Rmd index 832df21843..5747634562 100644 --- a/apis/r/vignettes/soma-objects.Rmd +++ b/apis/r/vignettes/soma-objects.Rmd @@ -71,10 +71,10 @@ experiment$obs$schema() Note that `soma_joinid` is a field that exists in every `SOMADataFrame` and acts as a join key for other objects in the dataset. -Again, when a SOMA object is accessed, only a pointer is returned and no data is read into memory. To load the data in memory, we call `read()`, which returns an [Arrow Table](https://arrow.apache.org/docs/r/reference/Table.html) and is easily converted to a data frame by appending `$to_data_frame()`. +Again, when a SOMA object is accessed, only a pointer is returned and no data is read into memory. To load the data in memory, we call `read()$concat()`, which returns an [Arrow Table](https://arrow.apache.org/docs/r/reference/Table.html) and is easily converted to a data frame by appending `$to_data_frame()`. ```{r} -experiment$obs$read() +experiment$obs$read()$concat() ``` The amount of data that can be read at once is determined by the `soma.init_buffer_bytes` configuration parameter, which, by default, is set to 16MB for each column. If the requested data is larger than this value an error will be thrown. @@ -82,7 +82,8 @@ The amount of data that can be read at once is determined by the `soma.init_buff If your system has more memory, you can increase this parameter to a larger value to read in more data at once. Alternatively, you can use the iterated reader, which retrieves data in chunks that are smaller than the `soma.init_buffer_bytes` parameter. The result of which is a list of Arrow Tables. ```{r} -experiment$obs$read(iterated = TRUE) +iterator <- experiment$obs$read() +iterator$read_next() ``` We can also select a subset of rows from the `SOMADataFrame` using the `coords` argument. This will retrieve only the required subset from disk to memory. In this example, we will select only the first 10 rows: @@ -90,19 +91,19 @@ We can also select a subset of rows from the `SOMADataFrame` using the `coords` *NOTE: The `coords` argument is 0-based.* ```{r} -experiment$obs$read(coords = 0:9) +experiment$obs$read(coords = 0:9)$concat() ``` As TileDB is a columnar format, we can also select a subset of the columns: ```{r} -experiment$obs$read(0:9, column_names = c("obs_id", "nCount_RNA")) +experiment$obs$read(0:9, column_names = c("obs_id", "nCount_RNA"))$concat() ``` Finally, we can use `value_filter` to retrieve a subset of rows that match a certain condition. ```{r} -experiment$obs$read(value_filter = "nCount_RNA > 100") +experiment$obs$read(value_filter = "nCount_RNA > 100")$concat() ``` And of course, you can combine all of these arguments together to get at only the data you need. @@ -187,13 +188,11 @@ zero-based underlying representation access but then accesses a one-based view as the sparse matrix functionality from package `Matrix` imposes this.* ```{r} -X_data$read_sparse_matrix_zero_based()$get_one_based_matrix()[1:5, 1:10] +X_data$read()$sparse_matrix()$concat()[1:5, 1:10] ``` Similarly to `SOMADataFrame`s, `read()` method we can define coordinates to slice obtain a subset of the matrix from disk: ```{r} -X_data$read_sparse_matrix_zero_based( - coords = list(soma_dim_0 = 0:4, soma_dim_1 = 0:9) -) +X_data$read(coords = list(soma_dim_0 = 0:4, soma_dim_1 = 0:9))$sparse_matrix()$concat() ``` diff --git a/apis/r/vignettes/soma-reading.Rmd b/apis/r/vignettes/soma-reading.Rmd index b0e8444580..0f22c5fa82 100644 --- a/apis/r/vignettes/soma-reading.Rmd +++ b/apis/r/vignettes/soma-reading.Rmd @@ -27,18 +27,18 @@ experiment <- load_dataset("soma-exp-pbmc-small") ## SOMA DataFrame -We'll start with the `obs` dataframe. Simply calling the `read()` method will load all of the data in memory as an [Arrow Table](https://arrow.apache.org/docs/r/reference/Table.html). +We'll start with the `obs` dataframe. Simply calling the `read()$concat()` method will load all of the data in memory as an [Arrow Table](https://arrow.apache.org/docs/r/reference/Table.html). ```{r} obs <- experiment$obs -obs$read() +obs$read()$concat() ``` This is easily converted into a `data.frame` using Arrow's methods: ```{r} -obs$read()$to_data_frame() +obs$read()$concat()$to_data_frame() ``` ### Slicing @@ -62,19 +62,19 @@ Let's look at a few ways to slice the dataframe. Select a single row: ```{r} -obs$read(coords = 0) +obs$read(coords = 0)$concat() ``` Select multiple, non-contiguous rows: ```{r} -obs$read(coords = c(0, 2)) +obs$read(coords = c(0, 2))$concat() ``` Select multiple, contiguous rows: ```{r} -obs$read(coords = 0:4) +obs$read(coords = 0:4)$concat() ``` ### Selecting columns @@ -82,7 +82,7 @@ obs$read(coords = 0:4) As TileDB is a columnar format, it is possible to select a subset of columns to read by using the `column_names` argument: ```{r} -obs$read(coords = 0:4, column_names = c("obs_id", "groups")) +obs$read(coords = 0:4, column_names = c("obs_id", "groups"))$concat() ``` ### Filtering @@ -92,25 +92,25 @@ In addition to slicing by coordinates you can also apply filters to the data usi Identify all cells in the `"g1"` group: ```{r} -obs$read(value_filter = "groups == 'g1'")$to_data_frame() +obs$read(value_filter = "groups == 'g1'")$concat()$to_data_frame() ``` Identify all cells in the `"g1"` or `"g2"` group: ```{r} -obs$read(value_filter = "groups == 'g1' | groups == 'g2'")$to_data_frame() +obs$read(value_filter = "groups == 'g1' | groups == 'g2'")$concat()$to_data_frame() ``` Altenratively, you can use the `%in%` operator: ```{r} -obs$read(value_filter = "groups %in% c('g1', 'g2')")$to_data_frame() +obs$read(value_filter = "groups %in% c('g1', 'g2')")$concat()$to_data_frame() ``` Identify all cells in the `"g1"` group with more than more than 60 features: ```{r} -obs$read(value_filter = "groups == 'g1' & nFeature_RNA > 60")$to_data_frame() +obs$read(value_filter = "groups == 'g1' & nFeature_RNA > 60")$concat()$to_data_frame() ``` ## SOMA SparseNDArray @@ -125,13 +125,13 @@ counts Similar to `SOMADataFrame`, we can load the data into memory as an Arrow Table: ```{r} -counts$read_arrow_table() +counts$read()$tables()$concat() ``` Or as a [`Matrix::sparseMatrix()`]: ```{r} -counts$read_sparse_matrix_zero_based(repr = "C") +counts$read()$sparse_matrix()$concat() ``` ### Slicing @@ -149,5 +149,5 @@ counts$schema() For example, here's how to fetch the first 5 rows of the matrix: ```{r} -counts$read_arrow_table(coords = list(soma_dim_0 = 0:4)) +counts$read(coords = list(soma_dim_0 = 0:4))$tables()$concat() ```