From ed5d8461b5230e42ce71dbf67e5fcfcd056e8124 Mon Sep 17 00:00:00 2001 From: Bryan Beaudreault Date: Mon, 26 Jul 2021 15:21:24 -0400 Subject: [PATCH] HBASE-26122: Implement an optional maximum size for Gets, after which a partial result is returned --- .../org/apache/hadoop/hbase/client/Get.java | 23 +++++ .../hbase/shaded/protobuf/ProtobufUtil.java | 9 +- .../src/main/protobuf/client/Client.proto | 2 + .../hadoop/hbase/regionserver/HRegion.java | 40 ++++++--- .../hbase/regionserver/RSRpcServices.java | 12 ++- .../TestPartialResultsFromClientSide.java | 45 +++++++++- .../hbase/regionserver/TestHRegion.java | 85 +++++++++++++++++++ 7 files changed, 201 insertions(+), 15 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Get.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Get.java index 0f04407ac3e3..6de17c91df79 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Get.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Get.java @@ -75,6 +75,7 @@ public class Get extends Query implements Row { private TimeRange tr = TimeRange.allTime(); private boolean checkExistenceOnly = false; private Map> familyMap = new TreeMap<>(Bytes.BYTES_COMPARATOR); + private long maxResultSize = -1; /** * Create a Get operation for the specified row. @@ -281,6 +282,21 @@ public Get setFilter(Filter filter) { return this; } + /** + * Set the maximum result size. The default is -1; this means that no specific + * maximum result size will be set for this Get. + * + * If set to a value greater than zero, the server may respond with a Result where + * {@link Result#mayHaveMoreCellsInRow()} is true. The user is required to handle + * this case. + * + * @param maxResultSize The maximum result size in bytes + */ + public Get setMaxResultSize(long maxResultSize) { + this.maxResultSize = maxResultSize; + return this; + } + /* Accessors */ /** @@ -400,6 +416,13 @@ public Map getFingerprint() { return map; } + /** + * @return the maximum result size in bytes. See {@link #setMaxResultSize(long)} + */ + public long getMaxResultSize() { + return maxResultSize; + } + /** * Compile the details beyond the scope of getFingerprint (row, columns, * timestamps, etc.) into a Map along with the fingerprinted information. diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java index 329894cb8eb3..ef830e1e0b9d 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java @@ -640,6 +640,9 @@ public static Get toGet(final ClientProtos.Get proto) throws IOException { if (proto.hasLoadColumnFamiliesOnDemand()) { get.setLoadColumnFamiliesOnDemand(proto.getLoadColumnFamiliesOnDemand()); } + if (proto.hasMaxResultSize()) { + get.setMaxResultSize(proto.getMaxResultSize()); + } return get; } @@ -1296,6 +1299,9 @@ public static ClientProtos.Get toGet( if (loadColumnFamiliesOnDemand != null) { builder.setLoadColumnFamiliesOnDemand(loadColumnFamiliesOnDemand); } + if (get.getMaxResultSize() > 0) { + builder.setMaxResultSize(get.getMaxResultSize()); + } return builder.build(); } @@ -1497,6 +1503,7 @@ public static ClientProtos.Result toResultNoData(final Result result) { ClientProtos.Result.Builder builder = ClientProtos.Result.newBuilder(); builder.setAssociatedCellCount(size); builder.setStale(result.isStale()); + builder.setPartial(result.mayHaveMoreCellsInRow()); return builder.build(); } @@ -1587,7 +1594,7 @@ public static Result toResult(final ClientProtos.Result proto, final CellScanner return (cells == null || cells.isEmpty()) ? (proto.getStale() ? EMPTY_RESULT_STALE : EMPTY_RESULT) - : Result.create(cells, null, proto.getStale()); + : Result.create(cells, null, proto.getStale(), proto.getPartial()); } diff --git a/hbase-protocol-shaded/src/main/protobuf/client/Client.proto b/hbase-protocol-shaded/src/main/protobuf/client/Client.proto index 8a2988abf4a6..a32782fb12aa 100644 --- a/hbase-protocol-shaded/src/main/protobuf/client/Client.proto +++ b/hbase-protocol-shaded/src/main/protobuf/client/Client.proto @@ -90,6 +90,8 @@ message Get { optional Consistency consistency = 12 [default = STRONG]; repeated ColumnFamilyTimeRange cf_time_range = 13; optional bool load_column_families_on_demand = 14; /* DO NOT add defaults to load_column_families_on_demand. */ + + optional uint64 max_result_size = 15; } message Result { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index 8c4660cda586..a0721c948368 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -142,6 +142,7 @@ import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext; import org.apache.hadoop.hbase.regionserver.compactions.CompactionLifeCycleTracker; import org.apache.hadoop.hbase.regionserver.compactions.ForbidMajorCompactionChecker; +import org.apache.hadoop.hbase.regionserver.ScannerContext.LimitScope; import org.apache.hadoop.hbase.regionserver.throttle.CompactionThroughputControllerFactory; import org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController; import org.apache.hadoop.hbase.regionserver.throttle.StoreHotnessProtector; @@ -3871,8 +3872,7 @@ public void prepareMiniBatchOperations(MiniBatchOperationInProgress mi Result result; if (returnResults) { // convert duplicate increment/append to get - List results = region.get(toGet(mutation), false, nonceGroup, nonce); - result = Result.create(results); + result = region.get(toGet(mutation), false, nonceGroup, nonce); } else { result = Result.EMPTY_RESULT; } @@ -7524,9 +7524,7 @@ public static boolean rowIsInRange(RegionInfo info, final byte [] row, final int @Override public Result get(final Get get) throws IOException { prepareGet(get); - List results = get(get, true); - boolean stale = this.getRegionInfo().getReplicaId() != 0; - return Result.create(results, get.isCheckExistenceOnly() ? !results.isEmpty() : null, stale); + return get(get, true, HConstants.NO_NONCE, HConstants.NO_NONCE); } void prepareGet(final Get get) throws IOException { @@ -7545,17 +7543,37 @@ void prepareGet(final Get get) throws IOException { @Override public List get(Get get, boolean withCoprocessor) throws IOException { - return get(get, withCoprocessor, HConstants.NO_NONCE, HConstants.NO_NONCE); + return get(get, null, withCoprocessor, HConstants.NO_NONCE, HConstants.NO_NONCE); } - private List get(Get get, boolean withCoprocessor, long nonceGroup, long nonce) + private Result get(Get get, boolean withCoprocessor, long nonceGroup, long nonce) throws IOException { - return TraceUtil.trace(() -> getInternal(get, withCoprocessor, nonceGroup, nonce), + + ScannerContext scannerContext = get.getMaxResultSize() > 0 + ? ScannerContext.newBuilder() + .setSizeLimit(LimitScope.BETWEEN_CELLS, get.getMaxResultSize(), get.getMaxResultSize()) + .build() + : null; + + List result = get(get, scannerContext, withCoprocessor, nonceGroup, nonce); + boolean stale = this.getRegionInfo().getReplicaId() != 0; + + return Result.create( + result, + get.isCheckExistenceOnly() ? !result.isEmpty() : null, + stale, + scannerContext != null && scannerContext.mayHaveMoreCellsInRow()); + } + + private List get(Get get, ScannerContext scannerContext, boolean withCoprocessor, + long nonceGroup, long nonce) throws IOException { + return TraceUtil.trace( + () -> getInternal(get, scannerContext, withCoprocessor, nonceGroup, nonce), () -> createRegionSpan("Region.get")); } - private List getInternal(Get get, boolean withCoprocessor, long nonceGroup, long nonce) - throws IOException { + private List getInternal(Get get, ScannerContext scannerContext, boolean withCoprocessor, + long nonceGroup, long nonce) throws IOException { List results = new ArrayList<>(); long before = EnvironmentEdgeManager.currentTime(); @@ -7572,7 +7590,7 @@ private List getInternal(Get get, boolean withCoprocessor, long nonceGroup } try (RegionScanner scanner = getScanner(scan, null, nonceGroup, nonce)) { List tmp = new ArrayList<>(); - scanner.next(tmp); + scanner.next(tmp, scannerContext); // Copy EC to heap, then close the scanner. // This can be an EXPENSIVE call. It may make an extra copy from offheap to onheap buffers. // See more details in HBASE-26036. diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java index 56c53714bc47..883d95ed0312 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java @@ -2695,10 +2695,17 @@ private Result get(Get get, HRegion region, RegionScannersCloseCallBack closeCal if (scan.getLoadColumnFamiliesOnDemandValue() == null) { scan.setLoadColumnFamiliesOnDemand(region.isLoadingCfsOnDemandDefault()); } + + ScannerContext scannerContext = get.getMaxResultSize() > 0 + ? ScannerContext.newBuilder() + .setSizeLimit(LimitScope.BETWEEN_CELLS, get.getMaxResultSize(), get.getMaxResultSize()) + .build() + : null; + RegionScannerImpl scanner = null; try { scanner = region.getScanner(scan); - scanner.next(results); + scanner.next(results, scannerContext); } finally { if (scanner != null) { if (closeCallBack == null) { @@ -2723,7 +2730,8 @@ private Result get(Get get, HRegion region, RegionScannersCloseCallBack closeCal } region.metricsUpdateForGet(results, before); - return Result.create(results, get.isCheckExistenceOnly() ? !results.isEmpty() : null, stale); + return Result.create(results, get.isCheckExistenceOnly() ? !results.isEmpty() : null, stale, + scannerContext != null && scannerContext.mayHaveMoreCellsInRow()); } private void checkBatchSizeAndLogLargeSize(MultiRequest request) throws ServiceException { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPartialResultsFromClientSide.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPartialResultsFromClientSide.java index 776fd2ab2acf..a3303b4e5570 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPartialResultsFromClientSide.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPartialResultsFromClientSide.java @@ -24,20 +24,23 @@ import static org.junit.Assert.assertThrows; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; - import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.hadoop.hbase.client.Delete; +import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.filter.BinaryComparator; import org.apache.hadoop.hbase.filter.ColumnPrefixFilter; import org.apache.hadoop.hbase.filter.ColumnRangeFilter; +import org.apache.hadoop.hbase.filter.FamilyFilter; import org.apache.hadoop.hbase.filter.Filter; +import org.apache.hadoop.hbase.filter.FilterList; import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter; import org.apache.hadoop.hbase.filter.FirstKeyValueMatchingQualifiersFilter; import org.apache.hadoop.hbase.filter.RandomRowFilter; @@ -134,6 +137,46 @@ public static void tearDownAfterClass() throws Exception { TEST_UTIL.shutdownMiniCluster(); } + @Test + public void testGetPartialResults() throws Exception { + byte[] row = ROWS[0]; + + Result result; + int cf = 0; + int qf = 0; + int total = 0; + + do { + // this will ensure we always return only 1 result + Get get = new Get(row) + .setMaxResultSize(1); + + // we want to page through the entire row, this will ensure we always get the next + if (total > 0) { + get.setFilter(new FilterList(FilterList.Operator.MUST_PASS_ALL, + new ColumnRangeFilter(QUALIFIERS[qf], true, null, false), + new FamilyFilter(CompareOperator.GREATER_OR_EQUAL, new BinaryComparator(FAMILIES[cf])))); + } + + // all values are the same, but there should be a value + result = TABLE.get(get); + assertTrue(String.format("Value for family %s (# %s) and qualifier %s (# %s)", + Bytes.toStringBinary(FAMILIES[cf]), cf, Bytes.toStringBinary(QUALIFIERS[qf]), qf), + Bytes.equals(VALUE, result.getValue(FAMILIES[cf], QUALIFIERS[qf]))); + + total++; + if (++qf >= NUM_QUALIFIERS) { + cf++; + qf = 0; + } + } while (result.mayHaveMoreCellsInRow()); + + // ensure we iterated all cells in row + assertEquals(NUM_COLS, total); + assertEquals(NUM_FAMILIES, cf); + assertEquals(0, qf); + } + /** * Ensure that the expected key values appear in a result returned from a scanner that is * combining partial results into complete results diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java index a5584ff32599..f5c2270fa38f 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java @@ -7879,4 +7879,89 @@ public void run() { assertFalse("Region lock holder should not have been interrupted", holderInterrupted.get()); } + @Test + public void testOversizedGetsReturnPartialResult() throws IOException { + HRegion region = initHRegion(tableName, name.getMethodName(), CONF, fam1); + + Put p = new Put(row) + .addColumn(fam1, qual1, value1) + .addColumn(fam1, qual2, value2); + + region.put(p); + + Get get = new Get(row) + .addColumn(fam1, qual1) + .addColumn(fam1, qual2) + .setMaxResultSize(1); // 0 doesn't count as a limit, according to HBase + + Result r = region.get(get); + + assertTrue("Expected partial result, but result was not marked as partial", r.mayHaveMoreCellsInRow()); + } + + @Test + public void testGetsWithoutResultSizeLimitAreNotPartial() throws IOException { + HRegion region = initHRegion(tableName, name.getMethodName(), CONF, fam1); + + Put p = new Put(row) + .addColumn(fam1, qual1, value1) + .addColumn(fam1, qual2, value2); + + region.put(p); + + Get get = new Get(row) + .addColumn(fam1, qual1) + .addColumn(fam1, qual2); + + Result r = region.get(get); + + assertFalse("Expected full result, but it was marked as partial", r.mayHaveMoreCellsInRow()); + assertTrue(Bytes.equals(value1, r.getValue(fam1, qual1))); + assertTrue(Bytes.equals(value2, r.getValue(fam1, qual2))); + } + + @Test + public void testGetsWithinResultSizeLimitAreNotPartial() throws IOException { + HRegion region = initHRegion(tableName, name.getMethodName(), CONF, fam1); + + Put p = new Put(row) + .addColumn(fam1, qual1, value1) + .addColumn(fam1, qual2, value2); + + region.put(p); + + Get get = new Get(row) + .addColumn(fam1, qual1) + .addColumn(fam1, qual2) + .setMaxResultSize(Long.MAX_VALUE); + + Result r = region.get(get); + + assertFalse("Expected full result, but it was marked as partial", r.mayHaveMoreCellsInRow()); + assertTrue(Bytes.equals(value1, r.getValue(fam1, qual1))); + assertTrue(Bytes.equals(value2, r.getValue(fam1, qual2))); + } + + @Test + public void testGetsWithResultSizeLimitReturnPartialResults() throws IOException { + HRegion region = initHRegion(tableName, name.getMethodName(), CONF, fam1); + + Put p = new Put(row) + .addColumn(fam1, qual1, value1) + .addColumn(fam1, qual2, value2); + + region.put(p); + + Get get = new Get(row) + .addColumn(fam1, qual1) + .addColumn(fam1, qual2) + .setMaxResultSize(10); + + Result r = region.get(get); + + assertTrue("Expected partial result, but it was marked as complete", r.mayHaveMoreCellsInRow()); + assertTrue(Bytes.equals(value1, r.getValue(fam1, qual1))); + assertEquals("Got more results than expected", 1, r.size()); + } + }