diff --git a/ydb/core/formats/arrow/arrow_helpers.cpp b/ydb/core/formats/arrow/arrow_helpers.cpp index 2ee5626e5d40..a790af3015f7 100644 --- a/ydb/core/formats/arrow/arrow_helpers.cpp +++ b/ydb/core/formats/arrow/arrow_helpers.cpp @@ -892,24 +892,33 @@ std::shared_ptr MergeColumns(const std::vector> SliceToRecordBatches(const std::shared_ptr& t) { - std::set splitPositions; - const ui32 numRows = t->num_rows(); - for (auto&& i : t->columns()) { - ui32 pos = 0; - for (auto&& arr : i->chunks()) { - splitPositions.emplace(pos); - pos += arr->length(); + if (!t->num_rows()) { + return {}; + } + std::vector positions; + { + for (auto&& i : t->columns()) { + ui32 pos = 0; + for (auto&& arr : i->chunks()) { + positions.emplace_back(pos); + pos += arr->length(); + } + AFL_VERIFY(pos == t->num_rows()); } - AFL_VERIFY(pos == t->num_rows()); + positions.emplace_back(t->num_rows()); } + std::sort(positions.begin(), positions.end()); + positions.erase(std::unique(positions.begin(), positions.end()), positions.end()); + std::vector>> slicedData; - slicedData.resize(splitPositions.size()); - std::vector positions(splitPositions.begin(), splitPositions.end()); - for (auto&& i : t->columns()) { - for (ui32 idx = 0; idx < positions.size(); ++idx) { - auto slice = i->Slice(positions[idx], ((idx + 1 == positions.size()) ? numRows : positions[idx + 1]) - positions[idx]); - AFL_VERIFY(slice->num_chunks() == 1); - slicedData[idx].emplace_back(slice->chunks().front()); + slicedData.resize(positions.size() - 1); + { + for (auto&& i : t->columns()) { + for (ui32 idx = 0; idx + 1 < positions.size(); ++idx) { + auto slice = i->Slice(positions[idx], positions[idx + 1] - positions[idx]); + AFL_VERIFY(slice->num_chunks() == 1); + slicedData[idx].emplace_back(slice->chunks().front()); + } } } std::vector> result;