Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HBASE-28621 PrefixFilter should use SEEK_NEXT_USING_HINT #6361

Draft
wants to merge 6 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import java.util.ArrayList;
import org.apache.hadoop.hbase.ByteBufferExtendedCell;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.PrivateCellUtil;
import org.apache.hadoop.hbase.exceptions.DeserializationException;
import org.apache.hadoop.hbase.util.ByteBufferUtils;
import org.apache.hadoop.hbase.util.Bytes;
Expand All @@ -35,10 +36,11 @@
* Pass results that have same row prefix.
*/
@InterfaceAudience.Public
public class PrefixFilter extends FilterBase {
public class PrefixFilter extends FilterBase implements HintingFilter {
protected byte[] prefix = null;
protected boolean passedPrefix = false;
protected boolean filterRow = true;
protected boolean provideHint = false;

public PrefixFilter(final byte[] prefix) {
this.prefix = prefix;
Expand All @@ -50,10 +52,16 @@ public byte[] getPrefix() {

@Override
public boolean filterRowKey(Cell firstRowCell) {
if (firstRowCell == null || this.prefix == null) return true;
if (filterAllRemaining()) return true;
if (firstRowCell == null || this.prefix == null) {
return true;
}
if (filterAllRemaining()) {
return true;
}
int length = firstRowCell.getRowLength();
if (length < prefix.length) return true;
Comment on lines -53 to -56
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are actually not strictly required to resolve the ticket. Just the checkstyle report contained these warnings:

(blocks) NeedBraces: 'if' construct must use '{}'s.
(blocks) NeedBraces: 'if' construct must use '{}'s.
(blocks) NeedBraces: 'if' construct must use '{}'s.
(blocks) NeedBraces: 'if' construct must use '{}'s.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually this if looks buggy.
If the firstRowCell is lexicographically smaller than the prefix, then we still want to provide the hint, regardless of the key length.

We can probably just drop this, I believe that the compare later will also handle this case.
This was supposed to be an optization, but it is not relevant when we provide hints, and in fact hurts performance.

Please fix and also add test cases for this.

if (length < prefix.length) {
return true;
}
// if they are equal, return false => pass row
// else return true, filter row
// if we are passed the prefix, set flag
Expand All @@ -69,13 +77,19 @@ public boolean filterRowKey(Cell firstRowCell) {
if ((!isReversed() && cmp > 0) || (isReversed() && cmp < 0)) {
passedPrefix = true;
}
filterRow = (cmp != 0);
filterRow = (!isReversed() && cmp > 0) || (isReversed() && cmp != 0);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change looks fishy

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Completely agree. But this fixes the previously failing TestFilter.testPrefixFilterWithReverseScan unit test. 😅

provideHint = (!isReversed() && cmp < 0) || (isReversed() && cmp > 0);
return filterRow;
}

@Override
public ReturnCode filterCell(final Cell c) {
if (filterRow) return ReturnCode.NEXT_ROW;
if (filterRow) {
return ReturnCode.NEXT_ROW;
}
if (provideHint) {
return ReturnCode.SEEK_NEXT_USING_HINT;
}
return ReturnCode.INCLUDE;
}

Expand Down Expand Up @@ -105,7 +119,9 @@ public static Filter createFilterFromArguments(ArrayList<byte[]> filterArguments
@Override
public byte[] toByteArray() {
FilterProtos.PrefixFilter.Builder builder = FilterProtos.PrefixFilter.newBuilder();
if (this.prefix != null) builder.setPrefix(UnsafeByteOperations.unsafeWrap(this.prefix));
if (this.prefix != null) {
builder.setPrefix(UnsafeByteOperations.unsafeWrap(this.prefix));
}
return builder.build().toByteArray();
}

Expand Down Expand Up @@ -142,6 +158,11 @@ boolean areSerializedFieldsEqual(Filter o) {
return Bytes.equals(this.getPrefix(), other.getPrefix());
}

@Override
public Cell getNextCellHint(Cell cell) {
return PrivateCellUtil.createFirstOnRow(prefix, 0, (short) prefix.length);
}

@Override
public String toString() {
return this.getClass().getSimpleName() + " " + Bytes.toStringBinary(this.prefix);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@

import static org.junit.Assert.*;

import java.io.IOException;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.testclassification.FilterTests;
import org.apache.hadoop.hbase.testclassification.SmallTests;
Expand All @@ -40,7 +43,6 @@ public class TestPrefixFilter {
static final char FIRST_CHAR = 'a';
static final char LAST_CHAR = 'e';
static final String HOST_PREFIX = "org.apache.site-";
static final byte[] GOOD_BYTES = Bytes.toBytes("abc");
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is also a bit unrelated - an unused constant field.


@Before
public void setUp() throws Exception {
Expand Down Expand Up @@ -91,4 +93,75 @@ private byte[] createRow(final char c) {
return Bytes.toBytes(HOST_PREFIX + Character.toString(c));
}

@Test
public void shouldReturnSeekNextUsingHintWhenKeyBefore() throws IOException {
KeyValue cell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("com.yahoo.www.a."));

// Should include this row so that filterCell() will be invoked.
assertFalse(mainFilter.filterRowKey(cell));
assertEquals(Filter.ReturnCode.SEEK_NEXT_USING_HINT, mainFilter.filterCell(cell));
Cell actualCellHint = mainFilter.getNextCellHint(cell);
assertNotNull(actualCellHint);
Cell expectedCellHint = KeyValueUtil.createFirstOnRow(Bytes.toBytes(HOST_PREFIX));
assertEquals(expectedCellHint, actualCellHint);
assertFalse(mainFilter.filterAllRemaining());
}

@Test
public void shouldReturnIncludeWhenKeyMatches() throws IOException {
KeyValue matchingCell = KeyValueUtil.createFirstOnRow(createRow('a'));

assertFalse(mainFilter.filterRowKey(matchingCell));
assertEquals(Filter.ReturnCode.INCLUDE, mainFilter.filterCell(matchingCell));
assertFalse(mainFilter.filterAllRemaining());
}

@Test
public void shouldReturnNextRowWhenKeyAfter() throws IOException {
KeyValue afterCell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("pt.example.www.1"));

assertTrue(mainFilter.filterRowKey(afterCell));
assertEquals(Filter.ReturnCode.NEXT_ROW, mainFilter.filterCell(afterCell));
assertTrue(mainFilter.filterAllRemaining());
}

@Test
public void shouldReturnSeekNextUsingHintWhenKeyBeforeReversed() throws IOException {
mainFilter.setReversed(true);

KeyValue cell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("pt.example.www.1"));

// TODO: Should include this row so that filterCell() will be invoked.
// assertFalse(mainFilter.filterRowKey(cell));
assertTrue(mainFilter.filterRowKey(cell));
// assertEquals(Filter.ReturnCode.SEEK_NEXT_USING_HINT, mainFilter.filterCell(cell)); TODO
assertEquals(Filter.ReturnCode.NEXT_ROW, mainFilter.filterCell(cell));
Cell actualCellHint = mainFilter.getNextCellHint(cell);
assertNotNull(actualCellHint);
Cell expectedCellHint = KeyValueUtil.createFirstOnRow(Bytes.toBytes(HOST_PREFIX));
assertEquals(expectedCellHint, actualCellHint);
assertFalse(mainFilter.filterAllRemaining());
}

@Test
public void shouldReturnIncludeWhenKeyMatchesReversed() throws IOException {
mainFilter.setReversed(true);

KeyValue matchingCell = KeyValueUtil.createFirstOnRow(createRow('a'));

assertFalse(mainFilter.filterRowKey(matchingCell));
assertEquals(Filter.ReturnCode.INCLUDE, mainFilter.filterCell(matchingCell));
assertFalse(mainFilter.filterAllRemaining());
}

@Test
public void shouldReturnNextRowWhenKeyAfterReversed() throws IOException {
mainFilter.setReversed(true);

KeyValue cell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("com.yahoo.www.a."));

assertTrue(mainFilter.filterRowKey(cell));
assertEquals(Filter.ReturnCode.NEXT_ROW, mainFilter.filterCell(cell));
assertTrue(mainFilter.filterAllRemaining());
}
}