11#include " filter.h"
2+
3+ #include < ydb/core/formats/arrow/serializer/native.h>
4+
25#include < ydb/library/actors/core/log.h>
36
47namespace NKikimr ::NOlap {
@@ -14,43 +17,50 @@ NKikimr::NArrow::TColumnFilter TPKRangesFilter::BuildFilter(const arrow::Datum&
1417 return result;
1518}
1619
17- bool TPKRangesFilter::Add (std::shared_ptr<NOlap::TPredicate> f, std::shared_ptr<NOlap::TPredicate> t, const TIndexInfo* indexInfo) {
20+ TConclusionStatus TPKRangesFilter::Add (
21+ std::shared_ptr<NOlap::TPredicate> f, std::shared_ptr<NOlap::TPredicate> t, const std::shared_ptr<arrow::Schema>& pkSchema) {
1822 if ((!f || f->Empty ()) && (!t || t->Empty ())) {
19- return true ;
23+ return TConclusionStatus::Success ();
24+ }
25+ auto fromContainerConclusion = TPredicateContainer::BuildPredicateFrom (f, pkSchema);
26+ if (fromContainerConclusion.IsFail ()) {
27+ AFL_ERROR (NKikimrServices::TX_COLUMNSHARD_SCAN)(" event" , " add_range_filter" )(" problem" , " incorrect from container" )(
28+ " from" , fromContainerConclusion.GetErrorMessage ());
29+ return fromContainerConclusion;
2030 }
21- auto fromContainer = TPredicateContainer::BuildPredicateFrom (f, indexInfo );
22- auto toContainer = TPredicateContainer::BuildPredicateTo (t, indexInfo);
23- if (!fromContainer || !toContainer) {
24- AFL_ERROR (NKikimrServices::TX_COLUMNSHARD_SCAN)( " event " , " add_range_filter " )( " problem " , " incorrect from/to containers " )( " from" , !!fromContainer)( " to " , !!toContainer );
25- return false ;
31+ auto toContainerConclusion = TPredicateContainer::BuildPredicateTo (t, pkSchema );
32+ if (toContainerConclusion. IsFail ()) {
33+ AFL_ERROR (NKikimrServices::TX_COLUMNSHARD_SCAN)( " event " , " add_range_filter " )( " problem " , " incorrect to container " )(
34+ " from" , toContainerConclusion. GetErrorMessage () );
35+ return toContainerConclusion ;
2636 }
2737 if (SortedRanges.size () && !FakeRanges) {
2838 if (ReverseFlag) {
29- if (fromContainer ->CrossRanges (SortedRanges.front ().GetPredicateTo ())) {
39+ if (fromContainerConclusion ->CrossRanges (SortedRanges.front ().GetPredicateTo ())) {
3040 AFL_ERROR (NKikimrServices::TX_COLUMNSHARD_SCAN)(" event" , " add_range_filter" )(" problem" , " not sorted sequence" );
31- return false ;
41+ return TConclusionStatus::Fail ( " not sorted sequence " ) ;
3242 }
3343 } else {
34- if (fromContainer ->CrossRanges (SortedRanges.back ().GetPredicateTo ())) {
44+ if (fromContainerConclusion ->CrossRanges (SortedRanges.back ().GetPredicateTo ())) {
3545 AFL_ERROR (NKikimrServices::TX_COLUMNSHARD_SCAN)(" event" , " add_range_filter" )(" problem" , " not sorted sequence" );
36- return false ;
46+ return TConclusionStatus::Fail ( " not sorted sequence " ) ;
3747 }
3848 }
3949 }
40- auto pkRangeFilter = TPKRangeFilter::Build (std::move (*fromContainer ), std::move (*toContainer ));
41- if (!pkRangeFilter ) {
42- return false ;
50+ auto pkRangeFilterConclusion = TPKRangeFilter::Build (fromContainerConclusion. DetachResult ( ), toContainerConclusion. DetachResult ( ));
51+ if (pkRangeFilterConclusion. IsFail () ) {
52+ return pkRangeFilterConclusion ;
4353 }
4454 if (FakeRanges) {
4555 FakeRanges = false ;
4656 SortedRanges.clear ();
4757 }
4858 if (ReverseFlag) {
49- SortedRanges.emplace_front (std::move (*pkRangeFilter ));
59+ SortedRanges.emplace_front (pkRangeFilterConclusion. DetachResult ( ));
5060 } else {
51- SortedRanges.emplace_back (std::move (*pkRangeFilter ));
61+ SortedRanges.emplace_back (pkRangeFilterConclusion. DetachResult ( ));
5262 }
53- return true ;
63+ return TConclusionStatus::Success () ;
5464}
5565
5666TString TPKRangesFilter::DebugString () const {
@@ -84,6 +94,15 @@ bool TPKRangesFilter::IsPortionInUsage(const TPortionInfo& info) const {
8494 return SortedRanges.empty ();
8595}
8696
97+ bool TPKRangesFilter::CheckPoint (const NArrow::TReplaceKey& point) const {
98+ for (auto && i : SortedRanges) {
99+ if (i.CheckPoint (point)) {
100+ return true ;
101+ }
102+ }
103+ return SortedRanges.empty ();
104+ }
105+
87106TPKRangeFilter::EUsageClass TPKRangesFilter::IsPortionInPartialUsage (const NArrow::TReplaceKey& start, const NArrow::TReplaceKey& end) const {
88107 for (auto && i : SortedRanges) {
89108 switch (i.IsPortionInPartialUsage (start, end)) {
@@ -99,11 +118,101 @@ TPKRangeFilter::EUsageClass TPKRangesFilter::IsPortionInPartialUsage(const NArro
99118}
100119
101120TPKRangesFilter::TPKRangesFilter (const bool reverse)
102- : ReverseFlag(reverse)
103- {
121+ : ReverseFlag(reverse) {
104122 auto range = TPKRangeFilter::Build (TPredicateContainer::BuildNullPredicateFrom (), TPredicateContainer::BuildNullPredicateTo ());
105123 Y_ABORT_UNLESS (range);
106124 SortedRanges.emplace_back (*range);
107125}
108126
127+ std::shared_ptr<arrow::RecordBatch> TPKRangesFilter::SerializeToRecordBatch (const std::shared_ptr<arrow::Schema>& pkSchema) const {
128+ auto fullSchema = NArrow::TStatusValidator::GetValid (
129+ pkSchema->AddField (pkSchema->num_fields (), std::make_shared<arrow::Field>(" .ydb_operation_type" , arrow::uint32 ())));
130+ auto builders = NArrow::MakeBuilders (fullSchema, SortedRanges.size () * 2 );
131+ for (auto && i : SortedRanges) {
132+ for (ui32 idx = 0 ; idx < (ui32)pkSchema->num_fields (); ++idx) {
133+ if (idx < i.GetPredicateFrom ().GetReplaceKey ()->Size ()) {
134+ AFL_VERIFY (NArrow::Append (
135+ *builders[idx], i.GetPredicateFrom ().GetReplaceKey ()->Column (idx), i.GetPredicateFrom ().GetReplaceKey ()->GetPosition ()));
136+ } else {
137+ NArrow::TStatusValidator::Validate (builders[idx]->AppendNull ());
138+ }
139+ }
140+ NArrow::Append<arrow::UInt32Type>(*builders[pkSchema->num_fields ()], (ui32)i.GetPredicateFrom ().GetCompareType ());
141+
142+ for (ui32 idx = 0 ; idx < (ui32)pkSchema->num_fields (); ++idx) {
143+ if (idx < i.GetPredicateTo ().GetReplaceKey ()->Size ()) {
144+ AFL_VERIFY (NArrow::Append (
145+ *builders[idx], i.GetPredicateTo ().GetReplaceKey ()->Column (idx), i.GetPredicateTo ().GetReplaceKey ()->GetPosition ()));
146+ } else {
147+ NArrow::TStatusValidator::Validate (builders[idx]->AppendNull ());
148+ }
149+ }
150+ NArrow::Append<arrow::UInt32Type>(*builders[pkSchema->num_fields ()], (ui32)i.GetPredicateTo ().GetCompareType ());
151+ }
152+ return arrow::RecordBatch::Make (fullSchema, SortedRanges.size () * 2 , NArrow::Finish (std::move (builders)));
109153}
154+
155+ std::shared_ptr<NKikimr::NOlap::TPKRangesFilter> TPKRangesFilter::BuildFromRecordBatchLines (
156+ const std::shared_ptr<arrow::RecordBatch>& batch, const bool reverse) {
157+ std::shared_ptr<TPKRangesFilter> result = std::make_shared<TPKRangesFilter>(reverse);
158+ for (ui32 i = 0 ; i < batch->num_rows (); ++i) {
159+ auto batchRow = batch->Slice (i, 1 );
160+ auto pFrom = std::make_shared<NOlap::TPredicate>(NKernels::EOperation::GreaterEqual, batchRow);
161+ auto pTo = std::make_shared<NOlap::TPredicate>(NKernels::EOperation::LessEqual, batchRow);
162+ result->Add (pFrom, pTo, batch->schema ()).Validate ();
163+ }
164+ return result;
165+ }
166+
167+ std::shared_ptr<NKikimr::NOlap::TPKRangesFilter> TPKRangesFilter::BuildFromRecordBatchFull (
168+ const std::shared_ptr<arrow::RecordBatch>& batch, const std::shared_ptr<arrow::Schema>& pkSchema, const bool reverse) {
169+ std::shared_ptr<TPKRangesFilter> result = std::make_shared<TPKRangesFilter>(reverse);
170+ auto pkBatch = NArrow::TColumnOperator ().Adapt (batch, pkSchema).DetachResult ();
171+ auto c = batch->GetColumnByName (" .ydb_operation_type" );
172+ AFL_VERIFY (c);
173+ AFL_VERIFY (c->type_id () == arrow::Type::UINT32);
174+ auto cUi32 = static_pointer_cast<arrow::UInt32Array>(c);
175+ for (ui32 i = 0 ; i < batch->num_rows ();) {
176+ std::shared_ptr<NOlap::TPredicate> pFrom;
177+ std::shared_ptr<NOlap::TPredicate> pTo;
178+ {
179+ auto batchRow = TPredicate::CutNulls (batch->Slice (i, 1 ));
180+ NKernels::EOperation op = (NKernels::EOperation)cUi32->Value (i);
181+ if (op == NKernels::EOperation::GreaterEqual || op == NKernels::EOperation::Greater) {
182+ pFrom = std::make_shared<NOlap::TPredicate>(op, batchRow);
183+ } else if (op == NKernels::EOperation::Equal) {
184+ pFrom = std::make_shared<NOlap::TPredicate>(NKernels::EOperation::GreaterEqual, batchRow);
185+ } else {
186+ AFL_VERIFY (false );
187+ }
188+ if (op != NKernels::EOperation::Equal) {
189+ ++i;
190+ }
191+ }
192+ {
193+ auto batchRow = TPredicate::CutNulls (batch->Slice (i, 1 ));
194+ NKernels::EOperation op = (NKernels::EOperation)cUi32->Value (i);
195+ if (op == NKernels::EOperation::LessEqual || op == NKernels::EOperation::Less) {
196+ pTo = std::make_shared<NOlap::TPredicate>(op, batchRow);
197+ } else if (op == NKernels::EOperation::Equal) {
198+ pTo = std::make_shared<NOlap::TPredicate>(NKernels::EOperation::LessEqual, batchRow);
199+ } else {
200+ AFL_VERIFY (false );
201+ }
202+ }
203+ result->Add (pFrom, pTo, pkSchema).Validate ();
204+ }
205+ return result;
206+ }
207+
208+ std::shared_ptr<NKikimr::NOlap::TPKRangesFilter> TPKRangesFilter::BuildFromString (
209+ const TString& data, const std::shared_ptr<arrow::Schema>& pkSchema, const bool reverse) {
210+ auto batch = NArrow::TStatusValidator::GetValid (NArrow::NSerialization::TNativeSerializer ().Deserialize (data));
211+ return BuildFromRecordBatchFull (batch, pkSchema, reverse);
212+ }
213+
214+ TString TPKRangesFilter::SerializeToString (const std::shared_ptr<arrow::Schema>& pkSchema) const {
215+ return NArrow::NSerialization::TNativeSerializer ().SerializeFull (SerializeToRecordBatch (pkSchema));
216+ }
217+
218+ } // namespace NKikimr::NOlap
0 commit comments