@@ -76,7 +76,7 @@ void TTable::AddTuple( ui64 * intColumns, char ** stringColumns, ui32 * strings
7676 }
7777
7878
79- XXH64_hash_t hash = XXH64 (TempTuple.data (), TempTuple.size () * sizeof (ui64), 0 );
79+ XXH64_hash_t hash = XXH64 (TempTuple.data () + NullsBitmapSize_, ( TempTuple.size () - NullsBitmapSize_ ) * sizeof (ui64), 0 );
8080
8181 if (!hash) hash = 1 ;
8282
@@ -298,6 +298,8 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef
298298 std::swap (JoinTable1, JoinTable2);
299299 }
300300
301+ ui64 tuplesFound = 0 ;
302+
301303 std::vector<ui64, TMKQLAllocator<ui64, EMemorySubPool::Temporary>> joinSlots, spillSlots, slotToIdx;
302304 std::vector<ui32, TMKQLAllocator<ui32, EMemorySubPool::Temporary>> stringsOffsets1, stringsOffsets2;
303305 ui64 reservedSize = 6 * (DefaultTupleBytes * DefaultTuplesNum) / sizeof (ui64);
@@ -320,12 +322,19 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef
320322 ui64 nullsSize2 = JoinTable2->NullsBitmapSize_ ;
321323 ui64 keyIntOffset1 = HashSize + nullsSize1;
322324 ui64 keyIntOffset2 = HashSize + nullsSize2;
325+ bool table1HasKeyStringColumns = (JoinTable1->NumberOfKeyStringColumns != 0 );
326+ bool table2HasKeyStringColumns = (JoinTable2->NumberOfKeyStringColumns != 0 );
327+ bool table1HasKeyIColumns = (JoinTable1->NumberOfKeyIColumns != 0 );
328+ bool table2HasKeyIColumns = (JoinTable2->NumberOfKeyIColumns != 0 );
329+
323330
324331 if ( bucket2->TuplesNum > bucket1->TuplesNum ) {
325332 std::swap (bucket1, bucket2);
326333 std::swap (headerSize1, headerSize2);
327334 std::swap (nullsSize1, nullsSize2);
328335 std::swap (keyIntOffset1, keyIntOffset2);
336+ std::swap (table1HasKeyStringColumns, table2HasKeyStringColumns);
337+ std::swap (table1HasKeyIColumns, table2HasKeyIColumns);
329338 }
330339
331340 joinResults.reserve (3 * bucket1->TuplesNum );
@@ -334,7 +343,7 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef
334343
335344 ui64 avgStringsSize = ( 3 * (bucket2->KeyIntVals .size () - bucket2->TuplesNum * headerSize2) ) / ( 2 * bucket2->TuplesNum + 1 ) + 1 ;
336345
337- if (JoinTable1-> NumberOfKeyStringColumns != 0 || JoinTable1-> NumberOfKeyIColumns != 0 ) {
346+ if (table2HasKeyStringColumns || table2HasKeyIColumns ) {
338347 slotSize = slotSize + avgStringsSize;
339348 }
340349
@@ -351,7 +360,7 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef
351360 while (it2 != bucket2->KeyIntVals .end () ) {
352361
353362 ui64 keysValSize;
354- if ( JoinTable2-> NumberOfKeyStringColumns > 0 || JoinTable2-> NumberOfKeyIColumns > 0 ) {
363+ if ( table2HasKeyStringColumns || table2HasKeyIColumns ) {
355364 keysValSize = headerSize2 + *(it2 + headerSize2 - 1 ) ;
356365 } else {
357366 keysValSize = headerSize2;
@@ -396,7 +405,7 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef
396405 while ( it1 < bucket1->KeyIntVals .end () ) {
397406
398407 ui64 keysValSize;
399- if ( JoinTable1-> NumberOfKeyStringColumns > 0 || JoinTable1-> NumberOfKeyIColumns > 0 ) {
408+ if ( table1HasKeyStringColumns || table1HasKeyIColumns ) {
400409 keysValSize = headerSize1 + *(it1 + headerSize1 - 1 ) ;
401410 } else {
402411 keysValSize = headerSize1;
@@ -418,24 +427,26 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef
418427 {
419428
420429 bool matchFound = false ;
421- if (((keysValSize - nullsSize1) <= (slotSize - nullsSize2)) && !JoinTable1-> NumberOfKeyIColumns ) {
430+ if (((keysValSize - nullsSize1) <= (slotSize - nullsSize2)) && !table1HasKeyIColumns ) {
422431 if (std::equal (it1 + keyIntOffset1, it1 + keysValSize, slotIt + keyIntOffset2)) {
432+ tuplesFound++;
423433 matchFound = true ;
424434 }
425435 }
426436
427- if (((keysValSize - nullsSize1) > (slotSize - nullsSize2)) && !JoinTable1-> NumberOfKeyIColumns ) {
437+ if (((keysValSize - nullsSize1) > (slotSize - nullsSize2)) && !table1HasKeyIColumns ) {
428438 if (std::equal (it1 + keyIntOffset1, it1 + headerSize1, slotIt + keyIntOffset2)) {
429439 ui64 stringsPos = *(slotIt + headerSize2);
430440 ui64 stringsSize = *(it1 + headerSize1 - 1 );
431441 if (std::equal (it1 + headerSize1, it1 + headerSize1 + stringsSize, spillSlots.begin () + stringsPos)) {
442+ tuplesFound++;
432443 matchFound = true ;
433444 }
434445 }
435446 }
436447
437448
438- if (JoinTable1-> NumberOfKeyIColumns )
449+ if (table1HasKeyIColumns )
439450 {
440451 bool headerMatch = false ;
441452 bool stringsMatch = false ;
@@ -452,7 +463,7 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef
452463 slotStringsStart = spillSlots.begin () + stringsPos;
453464 }
454465
455- if ( JoinTable1-> NumberOfKeyStringColumns == 0 ) {
466+ if ( !table1HasKeyStringColumns ) {
456467 stringsMatch = true ;
457468 } else {
458469 ui64 stringsSize = *(it1 + headerSize1 - 1 );
@@ -479,6 +490,7 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef
479490 }
480491
481492 if (headerMatch && stringsMatch && iValuesMatch) {
493+ tuplesFound++;
482494 matchFound = true ;
483495 }
484496
@@ -556,6 +568,7 @@ void TTable::Join( TTable & t1, TTable & t2, EJoinKind joinKind, bool hasMoreLef
556568 HasMoreLeftTuples_ = hasMoreLeftTuples;
557569 HasMoreRightTuples_ = hasMoreRightTuples;
558570
571+ TuplesFound_ += tuplesFound;
559572
560573}
561574
0 commit comments