Skip to content

Commit d91dc89

Browse files
authored
Merge 56a0926 into dfb50ad
2 parents dfb50ad + 56a0926 commit d91dc89

File tree

8 files changed

+49
-33
lines changed

8 files changed

+49
-33
lines changed

ydb/library/yql/minikql/comp_nodes/mkql_grace_join.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ void TGraceJoinPacker::Pack() {
272272
case NUdf::EDataSlot::Interval:
273273
WriteUnaligned<i64>(buffPtr, value.Get<i64>()); break;
274274
case NUdf::EDataSlot::Date32:
275-
WriteUnaligned<i64>(buffPtr, value.Get<i32>()); break;
275+
WriteUnaligned<i32>(buffPtr, value.Get<i32>()); break;
276276
case NUdf::EDataSlot::Datetime64:
277277
WriteUnaligned<i64>(buffPtr, value.Get<i64>()); break;
278278
case NUdf::EDataSlot::Timestamp64:

ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.cpp

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ void TTable::AddTuple( ui64 * intColumns, char ** stringColumns, ui32 * strings
7676
}
7777

7878

79-
XXH64_hash_t hash = XXH64(TempTuple.data(), TempTuple.size() * sizeof(ui64), 0);
79+
XXH64_hash_t hash = XXH64(TempTuple.data() + NullsBitmapSize_, (TempTuple.size() - NullsBitmapSize_) * sizeof(ui64), 0);
8080

8181
if (!hash) hash = 1;
8282

@@ -298,6 +298,8 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef
298298
std::swap(JoinTable1, JoinTable2);
299299
}
300300

301+
ui64 tuplesFound = 0;
302+
301303
std::vector<ui64, TMKQLAllocator<ui64, EMemorySubPool::Temporary>> joinSlots, spillSlots, slotToIdx;
302304
std::vector<ui32, TMKQLAllocator<ui32, EMemorySubPool::Temporary>> stringsOffsets1, stringsOffsets2;
303305
ui64 reservedSize = 6 * (DefaultTupleBytes * DefaultTuplesNum) / sizeof(ui64);
@@ -320,12 +322,19 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef
320322
ui64 nullsSize2 = JoinTable2->NullsBitmapSize_;
321323
ui64 keyIntOffset1 = HashSize + nullsSize1;
322324
ui64 keyIntOffset2 = HashSize + nullsSize2;
325+
bool table1HasKeyStringColumns = (JoinTable1->NumberOfKeyStringColumns != 0);
326+
bool table2HasKeyStringColumns = (JoinTable2->NumberOfKeyStringColumns != 0);
327+
bool table1HasKeyIColumns = (JoinTable1->NumberOfKeyIColumns != 0);
328+
bool table2HasKeyIColumns = (JoinTable2->NumberOfKeyIColumns != 0);
329+
323330

324331
if ( bucket2->TuplesNum > bucket1->TuplesNum ) {
325332
std::swap(bucket1, bucket2);
326333
std::swap(headerSize1, headerSize2);
327334
std::swap(nullsSize1, nullsSize2);
328335
std::swap(keyIntOffset1, keyIntOffset2);
336+
std::swap(table1HasKeyStringColumns, table2HasKeyStringColumns);
337+
std::swap(table1HasKeyIColumns, table2HasKeyIColumns);
329338
}
330339

331340
joinResults.reserve(3 * bucket1->TuplesNum );
@@ -334,7 +343,7 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef
334343

335344
ui64 avgStringsSize = ( 3 * (bucket2->KeyIntVals.size() - bucket2->TuplesNum * headerSize2) ) / ( 2 * bucket2->TuplesNum + 1) + 1;
336345

337-
if (JoinTable1->NumberOfKeyStringColumns != 0 || JoinTable1->NumberOfKeyIColumns != 0) {
346+
if (table2HasKeyStringColumns || table2HasKeyIColumns ) {
338347
slotSize = slotSize + avgStringsSize;
339348
}
340349

@@ -351,7 +360,7 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef
351360
while (it2 != bucket2->KeyIntVals.end() ) {
352361

353362
ui64 keysValSize;
354-
if ( JoinTable2->NumberOfKeyStringColumns > 0 || JoinTable2->NumberOfKeyIColumns > 0) {
363+
if ( table2HasKeyStringColumns || table2HasKeyIColumns) {
355364
keysValSize = headerSize2 + *(it2 + headerSize2 - 1) ;
356365
} else {
357366
keysValSize = headerSize2;
@@ -396,7 +405,7 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef
396405
while ( it1 < bucket1->KeyIntVals.end() ) {
397406

398407
ui64 keysValSize;
399-
if ( JoinTable1->NumberOfKeyStringColumns > 0 || JoinTable1->NumberOfKeyIColumns > 0) {
408+
if ( table1HasKeyStringColumns || table1HasKeyIColumns ) {
400409
keysValSize = headerSize1 + *(it1 + headerSize1 - 1) ;
401410
} else {
402411
keysValSize = headerSize1;
@@ -418,24 +427,26 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef
418427
{
419428

420429
bool matchFound = false;
421-
if (((keysValSize - nullsSize1) <= (slotSize - nullsSize2)) && !JoinTable1->NumberOfKeyIColumns ) {
430+
if (((keysValSize - nullsSize1) <= (slotSize - nullsSize2)) && !table1HasKeyIColumns ) {
422431
if (std::equal(it1 + keyIntOffset1, it1 + keysValSize, slotIt + keyIntOffset2)) {
432+
tuplesFound++;
423433
matchFound = true;
424434
}
425435
}
426436

427-
if (((keysValSize - nullsSize1) > (slotSize - nullsSize2)) && !JoinTable1->NumberOfKeyIColumns ) {
437+
if (((keysValSize - nullsSize1) > (slotSize - nullsSize2)) && !table1HasKeyIColumns) {
428438
if (std::equal(it1 + keyIntOffset1, it1 + headerSize1, slotIt + keyIntOffset2)) {
429439
ui64 stringsPos = *(slotIt + headerSize2);
430440
ui64 stringsSize = *(it1 + headerSize1 - 1);
431441
if (std::equal(it1 + headerSize1, it1 + headerSize1 + stringsSize, spillSlots.begin() + stringsPos)) {
442+
tuplesFound++;
432443
matchFound = true;
433444
}
434445
}
435446
}
436447

437448

438-
if (JoinTable1->NumberOfKeyIColumns)
449+
if (table1HasKeyIColumns)
439450
{
440451
bool headerMatch = false;
441452
bool stringsMatch = false;
@@ -452,7 +463,7 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef
452463
slotStringsStart = spillSlots.begin() + stringsPos;
453464
}
454465

455-
if ( JoinTable1->NumberOfKeyStringColumns == 0) {
466+
if ( !table1HasKeyStringColumns) {
456467
stringsMatch = true;
457468
} else {
458469
ui64 stringsSize = *(it1 + headerSize1 - 1);
@@ -479,6 +490,7 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef
479490
}
480491

481492
if (headerMatch && stringsMatch && iValuesMatch) {
493+
tuplesFound++;
482494
matchFound = true;
483495
}
484496

@@ -556,6 +568,7 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef
556568
HasMoreLeftTuples_ = hasMoreLeftTuples;
557569
HasMoreRightTuples_ = hasMoreRightTuples;
558570

571+
TuplesFound_ += tuplesFound;
559572

560573
}
561574

ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,8 @@ class TTable {
169169

170170
bool Table2Initialized_ = false; // True when iterator counters for second table already initialized
171171

172+
ui64 TuplesFound_ = 0; // Total number of matching keys found during join
173+
172174
public:
173175

174176
// Adds new tuple to the table. intColumns, stringColumns - data of columns,

ydb/library/yql/minikql/comp_nodes/ut/mkql_grace_join_ut.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1522,12 +1522,12 @@ Y_UNIT_TEST_SUITE(TMiniKQLGraceJoinTest) {
15221522
const auto iterator = graph->GetValue().GetListIterator();
15231523
NUdf::TUnboxedValue tuple;
15241524

1525-
UNIT_ASSERT(iterator.Next(tuple));
1526-
UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "X");
1527-
UNIT_ASSERT(!tuple.GetElement(1));
15281525
UNIT_ASSERT(iterator.Next(tuple));
15291526
UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "A");
15301527
UNIT_ASSERT_VALUES_EQUAL(tuple.GetElement(1).Get<ui32>(), 1);
1528+
UNIT_ASSERT(iterator.Next(tuple));
1529+
UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "X");
1530+
UNIT_ASSERT(!tuple.GetElement(1));
15311531
UNIT_ASSERT(!iterator.Next(tuple));
15321532
UNIT_ASSERT(!iterator.Next(tuple));
15331533
}

ydb/library/yql/tests/sql/dq_file/part3/canondata/result.json

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -956,30 +956,30 @@
956956
"test.test[join-count_bans--Results]": [],
957957
"test.test[join-grace_join2--Analyze]": [
958958
{
959-
"checksum": "45db7c8306c9626a640bcb81c9c76780",
960-
"size": 4462,
961-
"uri": "https://{canondata_backend}/1599023/ee6490b3365cf6b396283cb8bd07f94ceff767b4/resource.tar.gz#test.test_join-grace_join2--Analyze_/plan.txt"
959+
"checksum": "759025fd6317614a253eae816ff5941d",
960+
"size": 5059,
961+
"uri": "https://{canondata_backend}/1923547/c3f064ea25dafaabdc78d527cb888e8c29c155df/resource.tar.gz#test.test_join-grace_join2--Analyze_/plan.txt"
962962
}
963963
],
964964
"test.test[join-grace_join2--Debug]": [
965965
{
966-
"checksum": "0684948a27f55b655c998444a9060053",
967-
"size": 1890,
968-
"uri": "https://{canondata_backend}/1599023/ee6490b3365cf6b396283cb8bd07f94ceff767b4/resource.tar.gz#test.test_join-grace_join2--Debug_/opt.yql_patched"
966+
"checksum": "34fdff009f1cfcdc53164eeb5db58dd7",
967+
"size": 2171,
968+
"uri": "https://{canondata_backend}/1923547/c3f064ea25dafaabdc78d527cb888e8c29c155df/resource.tar.gz#test.test_join-grace_join2--Debug_/opt.yql_patched"
969969
}
970970
],
971971
"test.test[join-grace_join2--Plan]": [
972972
{
973-
"checksum": "45db7c8306c9626a640bcb81c9c76780",
974-
"size": 4462,
975-
"uri": "https://{canondata_backend}/1599023/ee6490b3365cf6b396283cb8bd07f94ceff767b4/resource.tar.gz#test.test_join-grace_join2--Plan_/plan.txt"
973+
"checksum": "759025fd6317614a253eae816ff5941d",
974+
"size": 5059,
975+
"uri": "https://{canondata_backend}/1923547/c3f064ea25dafaabdc78d527cb888e8c29c155df/resource.tar.gz#test.test_join-grace_join2--Plan_/plan.txt"
976976
}
977977
],
978978
"test.test[join-grace_join2--Results]": [
979979
{
980-
"checksum": "65a9b307bc9899b17f61962a5d4a49fb",
980+
"checksum": "2ad0b4f3207032d285d5f99430e9abaf",
981981
"size": 5737,
982-
"uri": "https://{canondata_backend}/1899731/149477001e0a8762e03fe5262dd2d939b716f0bf/resource.tar.gz#test.test_join-grace_join2--Results_/results.txt"
982+
"uri": "https://{canondata_backend}/1923547/c3f064ea25dafaabdc78d527cb888e8c29c155df/resource.tar.gz#test.test_join-grace_join2--Results_/results.txt"
983983
}
984984
],
985985
"test.test[join-inmem_by_uncomparable_structs--Analyze]": [

ydb/library/yql/tests/sql/sql2yql/canondata/result.json

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7512,9 +7512,9 @@
75127512
],
75137513
"test_sql2yql.test[join-grace_join2]": [
75147514
{
7515-
"checksum": "4909542187f7c74060abc053d5707f26",
7516-
"size": 1627,
7517-
"uri": "https://{canondata_backend}/1942278/d84f6d9ab025b27e11f463124468076d499ed9b3/resource.tar.gz#test_sql2yql.test_join-grace_join2_/sql.yql"
7515+
"checksum": "dec15765d9200297261bb22775ec5338",
7516+
"size": 1782,
7517+
"uri": "https://{canondata_backend}/1871182/e726c72e47d3c077e5ba351b53dba460544020da/resource.tar.gz#test_sql2yql.test_join-grace_join2_/sql.yql"
75187518
}
75197519
],
75207520
"test_sql2yql.test[join-group_compact_by]": [
@@ -25222,9 +25222,9 @@
2522225222
],
2522325223
"test_sql_format.test[join-grace_join2]": [
2522425224
{
25225-
"checksum": "4946227ff929407fc62f749ef756ef4d",
25226-
"size": 185,
25227-
"uri": "https://{canondata_backend}/1880306/64654158d6bfb1289c66c626a8162239289559d0/resource.tar.gz#test_sql_format.test_join-grace_join2_/formatted.sql"
25225+
"checksum": "7656454a9434ff51ab800908ae346c42",
25226+
"size": 233,
25227+
"uri": "https://{canondata_backend}/1871182/e726c72e47d3c077e5ba351b53dba460544020da/resource.tar.gz#test_sql_format.test_join-grace_join2_/formatted.sql"
2522825228
}
2522925229
],
2523025230
"test_sql_format.test[join-group_compact_by]": [
@@ -25992,9 +25992,9 @@
2599225992
],
2599325993
"test_sql_format.test[join-nopushdown_filter_with_depends_on]": [
2599425994
{
25995-
"checksum": "7c0b7c120f321f9b415663ece29a09cd",
25996-
"size": 247,
25997-
"uri": "https://{canondata_backend}/1880306/64654158d6bfb1289c66c626a8162239289559d0/resource.tar.gz#test_sql_format.test_join-nopushdown_filter_with_depends_on_/formatted.sql"
25995+
"checksum": "956eea7d7ef4126950ed02a322c6c492",
25996+
"size": 272,
25997+
"uri": "https://{canondata_backend}/212715/1c52a4632d14126361f7585c218d202718c6fa0f/resource.tar.gz#test_sql_format.test_join-nopushdown_filter_with_depends_on_/formatted.sql"
2599825998
}
2599925999
],
2600026000
"test_sql_format.test[join-opt_on_opt_side]": [

ydb/library/yql/tests/sql/suites/join/grace_join2.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,4 @@ from
77
plato.customers1 as c1
88
join
99
plato.customers1 as c2
10-
on c1.country_id = c2.country_id;
10+
on c1.country_id = c2.country_id order by c1.customer_id, c2.customer_id;

ydb/library/yql/tests/sql/suites/join/nopushdown_filter_with_depends_on.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
/* postgres can not */
2+
/* hybridfile can not */
23
/* custom check: len(yt_res_yson[0]['Write'][0]['Data']) < 4 */
34
use plato;
45

0 commit comments

Comments
 (0)