-
Notifications
You must be signed in to change notification settings - Fork 3.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
HBASE-28621 PrefixFilter should use SEEK_NEXT_USING_HINT #6361
base: master
Are you sure you want to change the base?
Changes from all commits
2993901
ad83f40
02ed23a
9641e9b
5b1d3ee
5085fd8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,6 +20,7 @@ | |
import java.util.ArrayList; | ||
import org.apache.hadoop.hbase.ByteBufferExtendedCell; | ||
import org.apache.hadoop.hbase.Cell; | ||
import org.apache.hadoop.hbase.PrivateCellUtil; | ||
import org.apache.hadoop.hbase.exceptions.DeserializationException; | ||
import org.apache.hadoop.hbase.util.ByteBufferUtils; | ||
import org.apache.hadoop.hbase.util.Bytes; | ||
|
@@ -35,10 +36,11 @@ | |
* Pass results that have same row prefix. | ||
*/ | ||
@InterfaceAudience.Public | ||
public class PrefixFilter extends FilterBase { | ||
public class PrefixFilter extends FilterBase implements HintingFilter { | ||
protected byte[] prefix = null; | ||
protected boolean passedPrefix = false; | ||
protected boolean filterRow = true; | ||
protected boolean provideHint = false; | ||
|
||
public PrefixFilter(final byte[] prefix) { | ||
this.prefix = prefix; | ||
|
@@ -50,10 +52,16 @@ public byte[] getPrefix() { | |
|
||
@Override | ||
public boolean filterRowKey(Cell firstRowCell) { | ||
if (firstRowCell == null || this.prefix == null) return true; | ||
if (filterAllRemaining()) return true; | ||
if (firstRowCell == null || this.prefix == null) { | ||
return true; | ||
} | ||
if (filterAllRemaining()) { | ||
return true; | ||
} | ||
int length = firstRowCell.getRowLength(); | ||
if (length < prefix.length) return true; | ||
if (length < prefix.length) { | ||
return true; | ||
} | ||
// if they are equal, return false => pass row | ||
// else return true, filter row | ||
// if we are passed the prefix, set flag | ||
|
@@ -69,13 +77,19 @@ public boolean filterRowKey(Cell firstRowCell) { | |
if ((!isReversed() && cmp > 0) || (isReversed() && cmp < 0)) { | ||
passedPrefix = true; | ||
} | ||
filterRow = (cmp != 0); | ||
filterRow = (!isReversed() && cmp > 0) || (isReversed() && cmp != 0); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This change looks fishy There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Completely agree. But this fixes the previously failing TestFilter.testPrefixFilterWithReverseScan unit test. 😅 |
||
provideHint = (!isReversed() && cmp < 0) || (isReversed() && cmp > 0); | ||
return filterRow; | ||
} | ||
|
||
@Override | ||
public ReturnCode filterCell(final Cell c) { | ||
if (filterRow) return ReturnCode.NEXT_ROW; | ||
if (filterRow) { | ||
return ReturnCode.NEXT_ROW; | ||
} | ||
if (provideHint) { | ||
return ReturnCode.SEEK_NEXT_USING_HINT; | ||
} | ||
return ReturnCode.INCLUDE; | ||
} | ||
|
||
|
@@ -105,7 +119,9 @@ public static Filter createFilterFromArguments(ArrayList<byte[]> filterArguments | |
@Override | ||
public byte[] toByteArray() { | ||
FilterProtos.PrefixFilter.Builder builder = FilterProtos.PrefixFilter.newBuilder(); | ||
if (this.prefix != null) builder.setPrefix(UnsafeByteOperations.unsafeWrap(this.prefix)); | ||
if (this.prefix != null) { | ||
builder.setPrefix(UnsafeByteOperations.unsafeWrap(this.prefix)); | ||
} | ||
return builder.build().toByteArray(); | ||
} | ||
|
||
|
@@ -142,6 +158,11 @@ boolean areSerializedFieldsEqual(Filter o) { | |
return Bytes.equals(this.getPrefix(), other.getPrefix()); | ||
} | ||
|
||
@Override | ||
public Cell getNextCellHint(Cell cell) { | ||
return PrivateCellUtil.createFirstOnRow(prefix, 0, (short) prefix.length); | ||
} | ||
|
||
@Override | ||
public String toString() { | ||
return this.getClass().getSimpleName() + " " + Bytes.toStringBinary(this.prefix); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,7 +19,10 @@ | |
|
||
import static org.junit.Assert.*; | ||
|
||
import java.io.IOException; | ||
import org.apache.hadoop.hbase.Cell; | ||
import org.apache.hadoop.hbase.HBaseClassTestRule; | ||
import org.apache.hadoop.hbase.KeyValue; | ||
import org.apache.hadoop.hbase.KeyValueUtil; | ||
import org.apache.hadoop.hbase.testclassification.FilterTests; | ||
import org.apache.hadoop.hbase.testclassification.SmallTests; | ||
|
@@ -40,7 +43,6 @@ public class TestPrefixFilter { | |
static final char FIRST_CHAR = 'a'; | ||
static final char LAST_CHAR = 'e'; | ||
static final String HOST_PREFIX = "org.apache.site-"; | ||
static final byte[] GOOD_BYTES = Bytes.toBytes("abc"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is also a bit unrelated - an unused constant field. |
||
|
||
@Before | ||
public void setUp() throws Exception { | ||
|
@@ -91,4 +93,75 @@ private byte[] createRow(final char c) { | |
return Bytes.toBytes(HOST_PREFIX + Character.toString(c)); | ||
} | ||
|
||
@Test | ||
public void shouldReturnSeekNextUsingHintWhenKeyBefore() throws IOException { | ||
KeyValue cell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("com.yahoo.www.a.")); | ||
|
||
// Should include this row so that filterCell() will be invoked. | ||
assertFalse(mainFilter.filterRowKey(cell)); | ||
assertEquals(Filter.ReturnCode.SEEK_NEXT_USING_HINT, mainFilter.filterCell(cell)); | ||
Cell actualCellHint = mainFilter.getNextCellHint(cell); | ||
assertNotNull(actualCellHint); | ||
Cell expectedCellHint = KeyValueUtil.createFirstOnRow(Bytes.toBytes(HOST_PREFIX)); | ||
assertEquals(expectedCellHint, actualCellHint); | ||
assertFalse(mainFilter.filterAllRemaining()); | ||
} | ||
|
||
@Test | ||
public void shouldReturnIncludeWhenKeyMatches() throws IOException { | ||
KeyValue matchingCell = KeyValueUtil.createFirstOnRow(createRow('a')); | ||
|
||
assertFalse(mainFilter.filterRowKey(matchingCell)); | ||
assertEquals(Filter.ReturnCode.INCLUDE, mainFilter.filterCell(matchingCell)); | ||
assertFalse(mainFilter.filterAllRemaining()); | ||
} | ||
|
||
@Test | ||
public void shouldReturnNextRowWhenKeyAfter() throws IOException { | ||
KeyValue afterCell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("pt.example.www.1")); | ||
|
||
assertTrue(mainFilter.filterRowKey(afterCell)); | ||
assertEquals(Filter.ReturnCode.NEXT_ROW, mainFilter.filterCell(afterCell)); | ||
assertTrue(mainFilter.filterAllRemaining()); | ||
} | ||
|
||
@Test | ||
public void shouldReturnSeekNextUsingHintWhenKeyBeforeReversed() throws IOException { | ||
mainFilter.setReversed(true); | ||
|
||
KeyValue cell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("pt.example.www.1")); | ||
|
||
// TODO: Should include this row so that filterCell() will be invoked. | ||
// assertFalse(mainFilter.filterRowKey(cell)); | ||
assertTrue(mainFilter.filterRowKey(cell)); | ||
// assertEquals(Filter.ReturnCode.SEEK_NEXT_USING_HINT, mainFilter.filterCell(cell)); TODO | ||
assertEquals(Filter.ReturnCode.NEXT_ROW, mainFilter.filterCell(cell)); | ||
Cell actualCellHint = mainFilter.getNextCellHint(cell); | ||
assertNotNull(actualCellHint); | ||
Cell expectedCellHint = KeyValueUtil.createFirstOnRow(Bytes.toBytes(HOST_PREFIX)); | ||
assertEquals(expectedCellHint, actualCellHint); | ||
assertFalse(mainFilter.filterAllRemaining()); | ||
} | ||
|
||
@Test | ||
public void shouldReturnIncludeWhenKeyMatchesReversed() throws IOException { | ||
mainFilter.setReversed(true); | ||
|
||
KeyValue matchingCell = KeyValueUtil.createFirstOnRow(createRow('a')); | ||
|
||
assertFalse(mainFilter.filterRowKey(matchingCell)); | ||
assertEquals(Filter.ReturnCode.INCLUDE, mainFilter.filterCell(matchingCell)); | ||
assertFalse(mainFilter.filterAllRemaining()); | ||
} | ||
|
||
@Test | ||
public void shouldReturnNextRowWhenKeyAfterReversed() throws IOException { | ||
mainFilter.setReversed(true); | ||
|
||
KeyValue cell = KeyValueUtil.createFirstOnRow(Bytes.toBytes("com.yahoo.www.a.")); | ||
|
||
assertTrue(mainFilter.filterRowKey(cell)); | ||
assertEquals(Filter.ReturnCode.NEXT_ROW, mainFilter.filterCell(cell)); | ||
assertTrue(mainFilter.filterAllRemaining()); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These are actually not strictly required to resolve the ticket. Just the checkstyle report contained these warnings:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually this if looks buggy.
If the firstRowCell is lexicographically smaller than the prefix, then we still want to provide the hint, regardless of the key length.
We can probably just drop this, I believe that the compare later will also handle this case.
This was supposed to be an optization, but it is not relevant when we provide hints, and in fact hurts performance.
Please fix and also add test cases for this.