diff --git a/cpp/arcticdb/processing/clause.cpp b/cpp/arcticdb/processing/clause.cpp index b828cf27cc4..ed870d949f2 100644 --- a/cpp/arcticdb/processing/clause.cpp +++ b/cpp/arcticdb/processing/clause.cpp @@ -670,8 +670,10 @@ void RowRangeClause::set_processing_config(const ProcessingConfig& processing_co switch(row_range_type_) { case RowRangeType::HEAD: if (n_ >= 0) { + start_ = 0; end_ = std::min(n_, total_rows); } else { + start_ = 0; end_ = std::max(static_cast(0), total_rows + n_); } break; @@ -684,6 +686,13 @@ void RowRangeClause::set_processing_config(const ProcessingConfig& processing_co end_ = total_rows; } break; + case RowRangeType::RANGE: + if (start_ > end_) { + internal::raise("RowRangeClause start index {} is greater than end index {}", start_, end_); + } + n_ = end_ - start_; + break; + default: internal::raise("Unrecognised RowRangeType {}", static_cast(row_range_type_)); } diff --git a/cpp/arcticdb/processing/clause.hpp b/cpp/arcticdb/processing/clause.hpp index 821fdc5251d..6dc61824d5e 100644 --- a/cpp/arcticdb/processing/clause.hpp +++ b/cpp/arcticdb/processing/clause.hpp @@ -511,13 +511,14 @@ struct ColumnStatsGenerationClause { struct RowRangeClause { enum class RowRangeType: uint8_t { HEAD, - TAIL + TAIL, + RANGE }; ClauseInfo clause_info_; RowRangeType row_range_type_; // As passed into head or tail - int64_t n_; + int64_t n_{0}; // Row range to keep. Zero-indexed, inclusive of start, exclusive of end // Calculated from n, whether the RowRangeType is head or tail, and the total rows as passed in by set_processing_config @@ -529,6 +530,12 @@ struct RowRangeClause { n_(n) { } + explicit RowRangeClause(int64_t start, int64_t end): + row_range_type_(RowRangeType::RANGE), + start_(start), + end_(end) { + } + RowRangeClause() = delete; ARCTICDB_MOVE_COPY_DEFAULT(RowRangeClause) diff --git a/cpp/arcticdb/version/python_bindings.cpp b/cpp/arcticdb/version/python_bindings.cpp index 6c614831525..108d0ed0f7d 100644 --- a/cpp/arcticdb/version/python_bindings.cpp +++ b/cpp/arcticdb/version/python_bindings.cpp @@ -218,9 +218,11 @@ void register_bindings(py::module &version, py::exception>(version, "SignedRowRange") - .def(py::init([](int64_t start, int64_t end){ - return SignedRowRange{start, end}; - })); + .def(py::init([](int64_t start, int64_t end){ + return SignedRowRange{start, end}; + })) + .def_readwrite("start_", &pipelines::SignedRowRange::start_) + .def_readwrite("end_", &pipelines::SignedRowRange::end_); py::class_>(version, "ColRange") .def_property_readonly("start", &pipelines::ColRange::start) @@ -288,10 +290,12 @@ void register_bindings(py::module &version, py::exception(version, "RowRangeType") .value("HEAD", RowRangeClause::RowRangeType::HEAD) - .value("TAIL", RowRangeClause::RowRangeType::TAIL); + .value("TAIL", RowRangeClause::RowRangeType::TAIL) + .value("RANGE", RowRangeClause::RowRangeType::TAIL); py::class_>(version, "RowRangeClause") .def(py::init()) + .def(py::init()) .def("__str__", &RowRangeClause::to_string); py::class_>(version, "DateRangeClause") diff --git a/python/arcticdb/version_store/_store.py b/python/arcticdb/version_store/_store.py index b9ad0564f08..9cb2bbde2b3 100644 --- a/python/arcticdb/version_store/_store.py +++ b/python/arcticdb/version_store/_store.py @@ -1592,10 +1592,12 @@ def read( of the data that falls within the given range. The same effect can be achieved by using the date_range clause of the QueryBuilder class, which will be slower, but return data with a smaller memory footprint. See the QueryBuilder.date_range docstring for more details. + Only one of date_range or row_range can be provided. row_range: `Optional[Tuple[int, int]]`, default=None Row range to read data for. Inclusive of the lower bound, exclusive of the upper bound lib.read(symbol, row_range=(start, end)).data should behave the same as df.iloc[start:end], including in - the handling of negative start/end values. Only one of date_range or row_range can be provided. + the handling of negative start/end values. + Only one of date_range or row_range can be provided. columns: `Optional[List[str]]`, default=None Applicable only for Pandas data. Determines which columns to return data for. query_builder: 'Optional[QueryBuilder]', default=None @@ -1610,9 +1612,15 @@ def read( if row_range is not None: row_range = _SignedRowRange(row_range[0], row_range[1]) + if date_range is not None and query_builder is not None: q = QueryBuilder() query_builder = q.date_range(date_range).then(query_builder) + + if row_range is not None and query_builder is not None: + q = QueryBuilder() + query_builder = q._row_range(row_range).then(query_builder) + version_query, read_options, read_query = self._get_queries( as_of, date_range, row_range, columns, query_builder, **kwargs ) diff --git a/python/arcticdb/version_store/library.py b/python/arcticdb/version_store/library.py index 1c7336fc81a..af72d3ccd80 100644 --- a/python/arcticdb/version_store/library.py +++ b/python/arcticdb/version_store/library.py @@ -922,6 +922,7 @@ def read( symbol: str, as_of: Optional[AsOf] = None, date_range: Optional[Tuple[Optional[Timestamp], Optional[Timestamp]]] = None, + row_range: Optional[Tuple[int, int]] = None, columns: Optional[List[str]] = None, query_builder: Optional[QueryBuilder] = None, ) -> VersionedItem: @@ -952,6 +953,15 @@ def read( slower, but return data with a smaller memory footprint. See the QueryBuilder.date_range docstring for more details. + Only one of date_range or row_range can be provided. + + row_range: `Optional[Tuple[int, int]]`, default=None + Row range to read data for. Inclusive of the lower bound, exclusive of the upper bound + lib.read(symbol, row_range=(start, end)).data should behave the same as df.iloc[start:end], including in + the handling of negative start/end values. + + Only one of date_range or row_range can be provided. + columns: List[str], default=None Applicable only for Pandas data. Determines which columns to return data for. @@ -983,7 +993,12 @@ def read( 2 7 """ return self._nvs.read( - symbol=symbol, as_of=as_of, date_range=date_range, columns=columns, query_builder=query_builder + symbol=symbol, + as_of=as_of, + date_range=date_range, + row_range=row_range, + columns=columns, + query_builder=query_builder, ) def read_batch( diff --git a/python/arcticdb/version_store/processing.py b/python/arcticdb/version_store/processing.py index c2ebc1c93ec..20cc27d7350 100644 --- a/python/arcticdb/version_store/processing.py +++ b/python/arcticdb/version_store/processing.py @@ -266,7 +266,9 @@ def value_list_from_args(*args): PythonProjectionClause = namedtuple("PythonProjectionClause", ["name", "expr"]) PythonGroupByClause = namedtuple("PythonGroupByClause", ["name"]) PythonAggregationClause = namedtuple("PythonAggregationClause", ["aggregations"]) -PythonRowRangeClause = namedtuple("PythonRowRangeClause", ["row_range_type", "n"]) +PythonRowRangeClause = namedtuple( + "PythonRowRangeClause", ["row_range_type", "n", "start", "end"], defaults=(None, None, None, None) +) PythonDateRangeClause = namedtuple("PythonDateRangeClause", ["start", "end"]) @@ -519,9 +521,14 @@ def _head(self, n: int): def _tail(self, n: int): check(not len(self.clauses), "Tail only supported as first clause in the pipeline") self.clauses.append(_RowRangeClause(_RowRangeType.TAIL, n)) - self._python_clauses.append(PythonRowRangeClause(_RowRangeType.TAIL, n)) + self._python_clauses.append(PythonRowRangeClause(row_range_type=_RowRangeType.TAIL, n=n)) return self + def _row_range(self, signed_row_range): + check(not len(self.clauses), "Row range only supported as first clause in the pipeline") + self.clauses.append(_RowRangeClause(_RowRangeType.RANGE, signed_row_range.start_, signed_row_range.end_)) + self._python_clauses.append(PythonRowRangeClause(start=signed_row_range.start_, end=signed_row_range.end_)) + def date_range(self, date_range: DateRangeInput): """ DateRange to read data for. Applicable only for Pandas data with a DateTime index. Returns only the part @@ -551,6 +558,12 @@ def date_range(self, date_range: DateRangeInput): self._python_clauses.append(PythonDateRangeClause(start.value, end.value)) return self + def _row_range(self, start: int, end: int): + check(not len(self.clauses), "Row range only supported as first clause in the pipeline") + self.clauses.append(_RowRangeClause(_RowRangeType.RANGE, start, end)) + self._python_clauses.append(PythonRowRangeClause(start=start, end=end)) + return self + def __eq__(self, right): return self._optimisation == right._optimisation and self._python_clauses == right._python_clauses