Skip to content

Commit

Permalink
Optimize RspBitmap.andNotEqualsImpl (#3592)
Browse files Browse the repository at this point in the history
* Optimize RspBitmap.andNotEqualsImpl

* Fix an issue with the result being empty.

* Tweaks.

* Fix issue with RspArray new ctor; add comments.

* Add test; fix bug.

* Reset RspBitmapTest parameters.

* spotlessfy.

* MOAR tests for coverage.

* Spotless, cov.
  • Loading branch information
jcferretti authored Mar 28, 2023
1 parent 65fbeeb commit fc9de8f
Show file tree
Hide file tree
Showing 4 changed files with 445 additions and 67 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -218,4 +218,211 @@ public static void forAllInvertedLongRanges(final RowSet sourceRowSet, final Row
lrc.accept(pendingStart.longValue(), pendingEnd.longValue());
}
}

public static class CombinedRangeIterator implements RowSet.RangeIterator {
public enum RangeMembership {
FIRST_ONLY(1), SECOND_ONLY(2), BOTH(3);

private final int membershipBits;

RangeMembership(final int membershipBits) {
this.membershipBits = membershipBits;
}

public int membershipBits() {
return membershipBits;
}

public boolean hasFirst() {
return (membershipBits & 1) != 0;
}

public boolean hasSecond() {
return (membershipBits & 2) != 0;
}
}

private final RowSet.RangeIterator it1;
private final RowSet.RangeIterator it2;

// -1 we have used up all the current range for it1; this means we need to try to fetch
// another range from it1 if we haven't tried that already.
private long it1CurrStart;

// -2 means we have checked it1.hasNext() already and
// realized there are no more ranges available.
private long it1CurrEnd;

// -1 we have used up all the current range for it2; this means we need to try to fetch
// another range from it2 if we haven't tried that already.
private long it2CurrStart;

// -2 means we have checked it1.hasNext() already and
// realized there are no more ranges available.
private long it2CurrEnd;

private long currStart = -1, currEnd = -1;
private RangeMembership currMembership = null;

/***
*
* Provide the means to iterate over the combined ranges of two provided iterators. The resulting ranges in this
* object are tagged with first, second, or both, to indicate if the current range was from the first iterator,
* second iterator, or both respectively.
*
* @param it1 First iterator
* @param it2 Second iterator
*/
public CombinedRangeIterator(final RowSet.RangeIterator it1, final RowSet.RangeIterator it2) {
this.it1 = it1;
this.it2 = it2;
it1CurrStart = -1;
it1CurrEnd = -1;
it2CurrStart = -1;
it2CurrEnd = -1;
}

@Override
public boolean hasNext() {
if (it1CurrStart == -1 && it1CurrEnd != -2) {
primeIter1();
}
if (it2CurrStart == -1 && it2CurrEnd != -2) {
primeIter2();
}
return it1CurrEnd != -2 || it2CurrEnd != -2;
}

private void primeIter1() {
if (it1.hasNext()) {
it1.next();
it1CurrStart = it1.currentRangeStart();
it1CurrEnd = it1.currentRangeEnd();
} else {
it1CurrEnd = -2;
}
}

private void primeIter2() {
if (it2.hasNext()) {
it2.next();
it2CurrStart = it2.currentRangeStart();
it2CurrEnd = it2.currentRangeEnd();
} else {
it2CurrEnd = -2;
}
}

@Override
public long next() {
if (it1CurrStart != -1) {
if (it2CurrStart != -1) {
if (it1CurrEnd < it2CurrStart) { // it1's range is completely to the left of it2's range.
currMembership = RangeMembership.FIRST_ONLY;
currStart = it1CurrStart;
currEnd = it1CurrEnd;
it1CurrStart = -1; // we consumed the it1 range completely.
} else if (it2CurrEnd < it1CurrStart) { // it1's range is completely to the right of it2's range.
currMembership = RangeMembership.SECOND_ONLY;
currStart = it2CurrStart;
currEnd = it2CurrEnd;
it2CurrStart = -1; // we consumed the it2 range completely.
} else { // it1's range has a non-empty overlap with it2's range.
final boolean it1WasStart, it2WasStart, it1WasEnd, it2WasEnd;
if (it1CurrStart < it2CurrStart) {
currMembership = RangeMembership.FIRST_ONLY;
currStart = it1CurrStart;
currEnd = it2CurrStart - 1;
it1WasStart = true;
it2WasStart = false;
it1WasEnd = it2WasEnd = false;
} else if (it2CurrStart < it1CurrStart) {
currMembership = RangeMembership.SECOND_ONLY;
currStart = it2CurrStart;
currEnd = it1CurrStart - 1;
it1WasStart = false;
it2WasStart = true;
it1WasEnd = it2WasEnd = false;
} else { // it1CurrStart == it2CurrStart
currMembership = RangeMembership.BOTH;
currStart = it1CurrStart;
it1WasStart = it2WasStart = true;
if (it1CurrEnd < it2CurrEnd) {
currEnd = it1CurrEnd;
it1WasEnd = true;
it2WasEnd = false;
} else if (it2CurrEnd < it1CurrEnd) {
currEnd = it2CurrEnd;
it1WasEnd = false;
it2WasEnd = true;
} else { // it1CurrEnd == it2CurrEnd
currEnd = it2CurrEnd;
it1WasEnd = true;
it2WasEnd = true;
}
}
// Consume from each iterator range appropriately.
if (it1WasStart) {
if (it1WasEnd) {
it1CurrStart = -1;
} else {
it1CurrStart = currEnd + 1;
}
}
if (it2WasStart) {
if (it2WasEnd) {
it2CurrStart = -1;
} else {
it2CurrStart = currEnd + 1;
}
}
}
} else { // it2currStart == -1, which at this point means no more it2 ranges.
currMembership = RangeMembership.FIRST_ONLY;
currStart = it1CurrStart;
currEnd = it1CurrEnd;
it1CurrStart = -1; // we consumed the it1 range completely.
}
} else { // it1CurrStart == -1, which at this point means no more it1 ranges.
if (it2CurrStart == -1) {
throw new IllegalStateException("Internal invariant violated");
}
currMembership = RangeMembership.SECOND_ONLY;
currStart = it2CurrStart;
currEnd = it2CurrEnd;
it2CurrStart = -1; // we consumed the it2 range completely.
}
return currStart;
}

@Override
public void close() {
it1.close();
it2.close();
}

@Override
public long currentRangeStart() {
return currStart;
}

@Override
public long currentRangeEnd() {
return currEnd;
}

public RangeMembership currentRangeMembership() {
return currMembership;
}

@Override
public boolean advance(long notUsed) {
throw new UnsupportedOperationException();
}

@Override
public void postpone(long notUsed) {
throw new UnsupportedOperationException();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -714,6 +714,52 @@ private void maybeSetAcc(final int i, final long accumCard) {
acc[i] = accumCard;
}

public RspArray(
final RspArray src,
final int startIdx,
final int endIdx) {
size = endIdx - startIdx + 1;
spanInfos = new long[size];
spans = new Object[size];
long srcAccBeforeStart = -1;
if (size > accNullThreshold) {
acc = new long[size];
if (src.acc == null) {
cardData = -1;
} else {
srcAccBeforeStart = (startIdx == 0) ? 0 : src.acc[startIdx - 1];
cardData = size - 1;
}
} else {
acc = null;
}

for (int i = 0; i < size; ++i) {
final int isrc = startIdx + i;
if (srcAccBeforeStart != -1) {
acc[i] = src.acc[isrc] - srcAccBeforeStart;
}
spanInfos[i] = src.spanInfos[isrc];
final Object span = src.spans[isrc];
spans[i] = span;
if (span == null || span == FULL_BLOCK_SPAN_MARKER) {
continue;
}
if (span instanceof short[]) {
spanInfos[i] |= SPANINFO_ARRAYCONTAINER_SHARED_BITMASK;
continue;
}
// span instanceof Container
((Container) span).setCopyOnWrite();
}
if (acc == null) {
ensureCardData(false);
} else if (src.acc == null) {
ensureCardinalityCache(false);
}
ifDebugValidate();
}

public RspArray(
final RspArray src,
final int startIdx, final long startOffset,
Expand Down Expand Up @@ -1527,6 +1573,19 @@ T forceAcc() {
return ref;
}

void ensureCardData(final boolean optimizeContainers) {
acc = null;
long c = 0;
for (int i = 0; i < size; ++i) {
c += getSpanCardinalityAtIndex(i, optimizeContainers);
if (c > Integer.MAX_VALUE) {
cardData = -1;
return;
}
}
cardData = (int) c;
}

void ensureCardinalityCache(final boolean optimizeContainers) {
if (size == 0) {
acc = null;
Expand All @@ -1535,16 +1594,7 @@ void ensureCardinalityCache(final boolean optimizeContainers) {
return;
}
if (size <= accNullThreshold) {
acc = null;
long c = 0;
for (int i = 0; i < size; ++i) {
c += getSpanCardinalityAtIndex(i, optimizeContainers);
if (c > Integer.MAX_VALUE) {
cardData = -1;
return;
}
}
cardData = (int) c;
ensureCardData(optimizeContainers);
return;
}
if (acc == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ public RspBitmap(
super(src, startIdx, startOffset, endIdx, endOffset);
}

public RspBitmap(final RspArray src, final int startIdx, final int endIdx) {
super(src, startIdx, endIdx);
}

public static RspBitmap makeEmpty() {
return new RspBitmap();
}
Expand Down Expand Up @@ -1023,7 +1027,53 @@ public RspBitmap andEqualsUnsafe(final RspBitmap other) {
* @return r1 and not r2 as a new RspArray.
*/
public static RspBitmap andNotImpl(final RspBitmap r1, final RspBitmap r2) {
final RspBitmap r = r1.deepCopy();
final int minLen = Math.min(r1.size, r2.size);
// Detect if there is an "obvious" common prefix.
int startIndex;
for (startIndex = 0; startIndex < minLen; ++startIndex) {
final long r1SpanInfo = r1.spanInfos[startIndex];
final long r2SpanInfo = r2.spanInfos[startIndex];
if (r1SpanInfo != r2SpanInfo) {
// We do not detect the case where a full block span is encoded differently
// (with a marker object in the spans array and the lower 16 bits of spanInfo in one case,
// versus a Long object in the other).
// We also wouldn't detect a singleton container that is encoded as null span object in one
// case, with the lower 16 bits indicating the singleton value, and with an actual container
// with a single element in the other.
// Bottom line we need the exact same optimization applied to both RspBitmap arguments.
break;
}
final Object r1Span = r1.spans[startIndex];
final Object r2Span = r2.spans[startIndex];

if (r1Span == r2Span) {
// r1Span and r2Span are either:
// (a) Both null, representing singleton spans, so our check for spanInfo equality was enough
// to guarantee sameness
// (b) The same object, representing a shared container or full block span (either marker or Long; if
// marker our check for spanInfo equality was enough to guarantee sameness).
continue;
}
// r1Span != r2Span
if (r1Span instanceof Long && r2Span instanceof Long) {
if (((Long) r1Span).longValue() != ((Long) r2Span).longValue()) {
break;
}
} else {
// In the case of containers, we only detect same object being shared;
// we do not try to compare contents of containers otherwise.
break;
}
}
final RspBitmap r;
if (startIndex == 0) {
r = r1.deepCopy();
} else {
if (startIndex == r1.size) {
return makeEmpty();
}
r = new RspBitmap(r1, startIndex, r1.size - 1);
}
r.andNotEqualsUnsafeNoWriteCheck(r2);
return r;
}
Expand Down
Loading

0 comments on commit fc9de8f

Please sign in to comment.