From 654ed82df8525d7f161c911a8dab51f88cb4aa56 Mon Sep 17 00:00:00 2001 From: Vineet Kumar Maheshwari Date: Sun, 28 Apr 2024 19:56:13 +0530 Subject: [PATCH] HBASE-28482 Reverse scan with tags throws ArrayIndexOutOfBoundsException with DBE in setCurrentBlock flow (#5792) Signed-off-by: Pankaj Kumar Signed-off-by: Bryan Beaudreault --- .../io/encoding/BufferedDataBlockEncoder.java | 10 ++++-- .../hadoop/hbase/regionserver/TestTags.java | 36 +++++++++++++------ 2 files changed, 32 insertions(+), 14 deletions(-) diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java index 04b95e5ef966..0f15151fe88b 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java @@ -834,6 +834,13 @@ public int compareKey(CellComparator comparator, Cell key) { public void setCurrentBuffer(ByteBuff buffer) { if (this.tagCompressionContext != null) { this.tagCompressionContext.clear(); + + // Prior seekToKeyInBlock may have reset this to false if we fell back to previous + // seeker state. This is an optimization so we don't have to uncompress tags again when + // reading last state. + // In seekBefore flow, if block change happens then rewind is not called and + // setCurrentBuffer is called, so need to uncompress any tags we see. + current.uncompressTags = true; } currentBuffer = buffer; current.currentBuffer = currentBuffer; @@ -876,9 +883,6 @@ public void rewind() { // reading last state. // In case of rewind, we are starting from the beginning of the buffer, so we need // to uncompress any tags we see. - // It may make sense to reset this in setCurrentBuffer as well, but we seem to only call - // setCurrentBuffer after StoreFileScanner.seekAtOrAfter which calls next to consume the - // seeker state. Rewind is called by seekBefore, which doesn't and leaves us in this state. current.uncompressTags = true; } decodeFirst(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestTags.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestTags.java index 388df2744784..f71bcce6b443 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestTags.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestTags.java @@ -125,32 +125,46 @@ public void testReverseScanWithDBE() throws IOException { try (Connection connection = ConnectionFactory.createConnection(conf)) { for (DataBlockEncoding encoding : DataBlockEncoding.values()) { - testReverseScanWithDBE(connection, encoding, family); + testReverseScanWithDBE(connection, encoding, family, HConstants.DEFAULT_BLOCKSIZE, 10); } } } - private void testReverseScanWithDBE(Connection conn, DataBlockEncoding encoding, byte[] family) - throws IOException { + /** + * Test that we can do reverse scans when writing tags and using DataBlockEncoding. Fails with an + * exception for PREFIX, DIFF, and FAST_DIFF + */ + @Test + public void testReverseScanWithDBEWhenCurrentBlockUpdates() throws IOException { + byte[] family = Bytes.toBytes("0"); + + Configuration conf = new Configuration(TEST_UTIL.getConfiguration()); + conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1); + + try (Connection connection = ConnectionFactory.createConnection(conf)) { + for (DataBlockEncoding encoding : DataBlockEncoding.values()) { + testReverseScanWithDBE(connection, encoding, family, 1024, 30000); + } + } + } + + private void testReverseScanWithDBE(Connection conn, DataBlockEncoding encoding, byte[] family, + int blockSize, int maxRows) throws IOException { LOG.info("Running test with DBE={}", encoding); TableName tableName = TableName.valueOf(TEST_NAME.getMethodName() + "-" + encoding); - TEST_UTIL.createTable(TableDescriptorBuilder.newBuilder(tableName) - .setColumnFamily( - ColumnFamilyDescriptorBuilder.newBuilder(family).setDataBlockEncoding(encoding).build()) - .build(), null); + TEST_UTIL.createTable( + TableDescriptorBuilder.newBuilder(tableName).setColumnFamily(ColumnFamilyDescriptorBuilder + .newBuilder(family).setDataBlockEncoding(encoding).setBlocksize(blockSize).build()).build(), + null); Table table = conn.getTable(tableName); - int maxRows = 10; byte[] val1 = new byte[10]; byte[] val2 = new byte[10]; Bytes.random(val1); Bytes.random(val2); for (int i = 0; i < maxRows; i++) { - if (i == maxRows / 2) { - TEST_UTIL.flush(tableName); - } table.put(new Put(Bytes.toBytes(i)).addColumn(family, Bytes.toBytes(1), val1) .addColumn(family, Bytes.toBytes(2), val2).setTTL(600_000)); }