Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ydb/library/yql/minikql/comp_nodes/mkql_grace_join.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ void TGraceJoinPacker::Pack() {
case NUdf::EDataSlot::Interval:
WriteUnaligned<i64>(buffPtr, value.Get<i64>()); break;
case NUdf::EDataSlot::Date32:
WriteUnaligned<i64>(buffPtr, value.Get<i32>()); break;
WriteUnaligned<i32>(buffPtr, value.Get<i32>()); break;
case NUdf::EDataSlot::Datetime64:
WriteUnaligned<i64>(buffPtr, value.Get<i64>()); break;
case NUdf::EDataSlot::Timestamp64:
Expand Down
29 changes: 21 additions & 8 deletions ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ void TTable::AddTuple( ui64 * intColumns, char ** stringColumns, ui32 * strings
}


XXH64_hash_t hash = XXH64(TempTuple.data(), TempTuple.size() * sizeof(ui64), 0);
XXH64_hash_t hash = XXH64(TempTuple.data() + NullsBitmapSize_, (TempTuple.size() - NullsBitmapSize_) * sizeof(ui64), 0);

if (!hash) hash = 1;

Expand Down Expand Up @@ -298,6 +298,8 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef
std::swap(JoinTable1, JoinTable2);
}

ui64 tuplesFound = 0;

std::vector<ui64, TMKQLAllocator<ui64, EMemorySubPool::Temporary>> joinSlots, spillSlots, slotToIdx;
std::vector<ui32, TMKQLAllocator<ui32, EMemorySubPool::Temporary>> stringsOffsets1, stringsOffsets2;
ui64 reservedSize = 6 * (DefaultTupleBytes * DefaultTuplesNum) / sizeof(ui64);
Expand All @@ -320,12 +322,19 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef
ui64 nullsSize2 = JoinTable2->NullsBitmapSize_;
ui64 keyIntOffset1 = HashSize + nullsSize1;
ui64 keyIntOffset2 = HashSize + nullsSize2;
bool table1HasKeyStringColumns = (JoinTable1->NumberOfKeyStringColumns != 0);
bool table2HasKeyStringColumns = (JoinTable2->NumberOfKeyStringColumns != 0);
bool table1HasKeyIColumns = (JoinTable1->NumberOfKeyIColumns != 0);
bool table2HasKeyIColumns = (JoinTable2->NumberOfKeyIColumns != 0);


if ( bucket2->TuplesNum > bucket1->TuplesNum ) {
std::swap(bucket1, bucket2);
std::swap(headerSize1, headerSize2);
std::swap(nullsSize1, nullsSize2);
std::swap(keyIntOffset1, keyIntOffset2);
std::swap(table1HasKeyStringColumns, table2HasKeyStringColumns);
std::swap(table1HasKeyIColumns, table2HasKeyIColumns);
}

joinResults.reserve(3 * bucket1->TuplesNum );
Expand All @@ -334,7 +343,7 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef

ui64 avgStringsSize = ( 3 * (bucket2->KeyIntVals.size() - bucket2->TuplesNum * headerSize2) ) / ( 2 * bucket2->TuplesNum + 1) + 1;

if (JoinTable1->NumberOfKeyStringColumns != 0 || JoinTable1->NumberOfKeyIColumns != 0) {
if (table2HasKeyStringColumns || table2HasKeyIColumns ) {
slotSize = slotSize + avgStringsSize;
}

Expand All @@ -351,7 +360,7 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef
while (it2 != bucket2->KeyIntVals.end() ) {

ui64 keysValSize;
if ( JoinTable2->NumberOfKeyStringColumns > 0 || JoinTable2->NumberOfKeyIColumns > 0) {
if ( table2HasKeyStringColumns || table2HasKeyIColumns) {
keysValSize = headerSize2 + *(it2 + headerSize2 - 1) ;
} else {
keysValSize = headerSize2;
Expand Down Expand Up @@ -396,7 +405,7 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef
while ( it1 < bucket1->KeyIntVals.end() ) {

ui64 keysValSize;
if ( JoinTable1->NumberOfKeyStringColumns > 0 || JoinTable1->NumberOfKeyIColumns > 0) {
if ( table1HasKeyStringColumns || table1HasKeyIColumns ) {
keysValSize = headerSize1 + *(it1 + headerSize1 - 1) ;
} else {
keysValSize = headerSize1;
Expand All @@ -418,24 +427,26 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef
{

bool matchFound = false;
if (((keysValSize - nullsSize1) <= (slotSize - nullsSize2)) && !JoinTable1->NumberOfKeyIColumns ) {
if (((keysValSize - nullsSize1) <= (slotSize - nullsSize2)) && !table1HasKeyIColumns ) {
if (std::equal(it1 + keyIntOffset1, it1 + keysValSize, slotIt + keyIntOffset2)) {
tuplesFound++;
matchFound = true;
}
}

if (((keysValSize - nullsSize1) > (slotSize - nullsSize2)) && !JoinTable1->NumberOfKeyIColumns ) {
if (((keysValSize - nullsSize1) > (slotSize - nullsSize2)) && !table1HasKeyIColumns) {
if (std::equal(it1 + keyIntOffset1, it1 + headerSize1, slotIt + keyIntOffset2)) {
ui64 stringsPos = *(slotIt + headerSize2);
ui64 stringsSize = *(it1 + headerSize1 - 1);
if (std::equal(it1 + headerSize1, it1 + headerSize1 + stringsSize, spillSlots.begin() + stringsPos)) {
tuplesFound++;
matchFound = true;
}
}
}


if (JoinTable1->NumberOfKeyIColumns)
if (table1HasKeyIColumns)
{
bool headerMatch = false;
bool stringsMatch = false;
Expand All @@ -452,7 +463,7 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef
slotStringsStart = spillSlots.begin() + stringsPos;
}

if ( JoinTable1->NumberOfKeyStringColumns == 0) {
if ( !table1HasKeyStringColumns) {
stringsMatch = true;
} else {
ui64 stringsSize = *(it1 + headerSize1 - 1);
Expand All @@ -479,6 +490,7 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef
}

if (headerMatch && stringsMatch && iValuesMatch) {
tuplesFound++;
matchFound = true;
}

Expand Down Expand Up @@ -556,6 +568,7 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef
HasMoreLeftTuples_ = hasMoreLeftTuples;
HasMoreRightTuples_ = hasMoreRightTuples;

TuplesFound_ += tuplesFound;

}

Expand Down
2 changes: 2 additions & 0 deletions ydb/library/yql/minikql/comp_nodes/mkql_grace_join_imp.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,8 @@ class TTable {

bool Table2Initialized_ = false; // True when iterator counters for second table already initialized

ui64 TuplesFound_ = 0; // Total number of matching keys found during join

public:

// Adds new tuple to the table. intColumns, stringColumns - data of columns,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1522,12 +1522,12 @@ Y_UNIT_TEST_SUITE(TMiniKQLGraceJoinTest) {
const auto iterator = graph->GetValue().GetListIterator();
NUdf::TUnboxedValue tuple;

UNIT_ASSERT(iterator.Next(tuple));
UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "X");
UNIT_ASSERT(!tuple.GetElement(1));
UNIT_ASSERT(iterator.Next(tuple));
UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "A");
UNIT_ASSERT_VALUES_EQUAL(tuple.GetElement(1).Get<ui32>(), 1);
UNIT_ASSERT(iterator.Next(tuple));
UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "X");
UNIT_ASSERT(!tuple.GetElement(1));
UNIT_ASSERT(!iterator.Next(tuple));
UNIT_ASSERT(!iterator.Next(tuple));
}
Expand Down
22 changes: 11 additions & 11 deletions ydb/library/yql/tests/sql/dq_file/part3/canondata/result.json
Original file line number Diff line number Diff line change
Expand Up @@ -956,30 +956,30 @@
"test.test[join-count_bans--Results]": [],
"test.test[join-grace_join2--Analyze]": [
{
"checksum": "45db7c8306c9626a640bcb81c9c76780",
"size": 4462,
"uri": "https://{canondata_backend}/1599023/ee6490b3365cf6b396283cb8bd07f94ceff767b4/resource.tar.gz#test.test_join-grace_join2--Analyze_/plan.txt"
"checksum": "759025fd6317614a253eae816ff5941d",
"size": 5059,
"uri": "https://{canondata_backend}/1923547/c3f064ea25dafaabdc78d527cb888e8c29c155df/resource.tar.gz#test.test_join-grace_join2--Analyze_/plan.txt"
}
],
"test.test[join-grace_join2--Debug]": [
{
"checksum": "0684948a27f55b655c998444a9060053",
"size": 1890,
"uri": "https://{canondata_backend}/1599023/ee6490b3365cf6b396283cb8bd07f94ceff767b4/resource.tar.gz#test.test_join-grace_join2--Debug_/opt.yql_patched"
"checksum": "34fdff009f1cfcdc53164eeb5db58dd7",
"size": 2171,
"uri": "https://{canondata_backend}/1923547/c3f064ea25dafaabdc78d527cb888e8c29c155df/resource.tar.gz#test.test_join-grace_join2--Debug_/opt.yql_patched"
}
],
"test.test[join-grace_join2--Plan]": [
{
"checksum": "45db7c8306c9626a640bcb81c9c76780",
"size": 4462,
"uri": "https://{canondata_backend}/1599023/ee6490b3365cf6b396283cb8bd07f94ceff767b4/resource.tar.gz#test.test_join-grace_join2--Plan_/plan.txt"
"checksum": "759025fd6317614a253eae816ff5941d",
"size": 5059,
"uri": "https://{canondata_backend}/1923547/c3f064ea25dafaabdc78d527cb888e8c29c155df/resource.tar.gz#test.test_join-grace_join2--Plan_/plan.txt"
}
],
"test.test[join-grace_join2--Results]": [
{
"checksum": "65a9b307bc9899b17f61962a5d4a49fb",
"checksum": "2ad0b4f3207032d285d5f99430e9abaf",
"size": 5737,
"uri": "https://{canondata_backend}/1899731/149477001e0a8762e03fe5262dd2d939b716f0bf/resource.tar.gz#test.test_join-grace_join2--Results_/results.txt"
"uri": "https://{canondata_backend}/1923547/c3f064ea25dafaabdc78d527cb888e8c29c155df/resource.tar.gz#test.test_join-grace_join2--Results_/results.txt"
}
],
"test.test[join-inmem_by_uncomparable_structs--Analyze]": [
Expand Down
18 changes: 9 additions & 9 deletions ydb/library/yql/tests/sql/sql2yql/canondata/result.json
Original file line number Diff line number Diff line change
Expand Up @@ -7512,9 +7512,9 @@
],
"test_sql2yql.test[join-grace_join2]": [
{
"checksum": "4909542187f7c74060abc053d5707f26",
"size": 1627,
"uri": "https://{canondata_backend}/1942278/d84f6d9ab025b27e11f463124468076d499ed9b3/resource.tar.gz#test_sql2yql.test_join-grace_join2_/sql.yql"
"checksum": "dec15765d9200297261bb22775ec5338",
"size": 1782,
"uri": "https://{canondata_backend}/1871182/e726c72e47d3c077e5ba351b53dba460544020da/resource.tar.gz#test_sql2yql.test_join-grace_join2_/sql.yql"
}
],
"test_sql2yql.test[join-group_compact_by]": [
Expand Down Expand Up @@ -25222,9 +25222,9 @@
],
"test_sql_format.test[join-grace_join2]": [
{
"checksum": "4946227ff929407fc62f749ef756ef4d",
"size": 185,
"uri": "https://{canondata_backend}/1880306/64654158d6bfb1289c66c626a8162239289559d0/resource.tar.gz#test_sql_format.test_join-grace_join2_/formatted.sql"
"checksum": "7656454a9434ff51ab800908ae346c42",
"size": 233,
"uri": "https://{canondata_backend}/1871182/e726c72e47d3c077e5ba351b53dba460544020da/resource.tar.gz#test_sql_format.test_join-grace_join2_/formatted.sql"
}
],
"test_sql_format.test[join-group_compact_by]": [
Expand Down Expand Up @@ -25992,9 +25992,9 @@
],
"test_sql_format.test[join-nopushdown_filter_with_depends_on]": [
{
"checksum": "7c0b7c120f321f9b415663ece29a09cd",
"size": 247,
"uri": "https://{canondata_backend}/1880306/64654158d6bfb1289c66c626a8162239289559d0/resource.tar.gz#test_sql_format.test_join-nopushdown_filter_with_depends_on_/formatted.sql"
"checksum": "956eea7d7ef4126950ed02a322c6c492",
"size": 272,
"uri": "https://{canondata_backend}/212715/1c52a4632d14126361f7585c218d202718c6fa0f/resource.tar.gz#test_sql_format.test_join-nopushdown_filter_with_depends_on_/formatted.sql"
}
],
"test_sql_format.test[join-opt_on_opt_side]": [
Expand Down
2 changes: 1 addition & 1 deletion ydb/library/yql/tests/sql/suites/join/grace_join2.sql
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@ from
plato.customers1 as c1
join
plato.customers1 as c2
on c1.country_id = c2.country_id;
on c1.country_id = c2.country_id order by c1.customer_id, c2.customer_id;
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
/* postgres can not */
/* hybridfile can not */
/* custom check: len(yt_res_yson[0]['Write'][0]['Data']) < 4 */
use plato;

Expand Down