diff --git a/bigtable-client-core-parent/bigtable-hbase/src/main/java/com/google/cloud/bigtable/hbase/wrappers/veneer/RowResultAdapter.java b/bigtable-client-core-parent/bigtable-hbase/src/main/java/com/google/cloud/bigtable/hbase/wrappers/veneer/RowResultAdapter.java index 8aa501db79..6a0c714a62 100644 --- a/bigtable-client-core-parent/bigtable-hbase/src/main/java/com/google/cloud/bigtable/hbase/wrappers/veneer/RowResultAdapter.java +++ b/bigtable-client-core-parent/bigtable-hbase/src/main/java/com/google/cloud/bigtable/hbase/wrappers/veneer/RowResultAdapter.java @@ -16,6 +16,8 @@ package com.google.cloud.bigtable.hbase.wrappers.veneer; import com.google.api.core.InternalApi; +import com.google.bigtable.v2.RowFilter.Interleave; +import com.google.cloud.bigtable.data.v2.models.Filters.InterleaveFilter; import com.google.cloud.bigtable.data.v2.models.RowAdapter; import com.google.cloud.bigtable.hbase.adapters.read.RowCell; import com.google.cloud.bigtable.hbase.util.ByteStringer; @@ -25,16 +27,26 @@ import com.google.common.collect.ImmutableList; import com.google.protobuf.ByteString; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.TreeMap; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.filter.FilterList.Operator; /** * Adapter for {@link RowAdapter} that uses {@link Result} to represent logical rows. * + *

This adapter is responsible for cell deduplication. bigtable-hbase will convert a {@link + * Operator#MUST_PASS_ONE} filter into an {@link Interleave}. Unfortunately there is a bit of a + * mismatch between the 2 filters: MUST_PASS_ONE will not duplicate the cell if it matches multiple + * branches of the MUST_PASS_ONE, but Interleave will.This adapter will pave over the difference by + * removing the duplicate cells while building the Result. However, HBase's WhileMatchFilter depends + * on duplicate labelled cells for its implementation. So this adapter will not deduplicate labelled + * cells. + * *

For internal use only - public for technical reasons. */ @InternalApi("For internal usage only") @@ -97,6 +109,7 @@ static class RowResultBuilder implements RowBuilder { private List labels; private long timestamp; private byte[] value; + private RowCell previousNoLabelCell; private Map> cells = new TreeMap<>(); private List currentFamilyCells = null; @@ -159,6 +172,9 @@ public void cellValue(ByteString newValue) { * * * A flattened version of the {@link RowCell} map will be sorted correctly. + * + *

Applying {@link InterleaveFilter} may result in a row to contain duplicated cells, where + * duplicates are grouped in sequences. */ @Override public void finishCell() { @@ -169,6 +185,7 @@ public void finishCell() { previousFamily = this.family; currentFamilyCells = new ArrayList<>(); cells.put(this.family, this.currentFamilyCells); + previousNoLabelCell = null; } RowCell rowCell = @@ -179,7 +196,20 @@ public void finishCell() { TimestampConverter.bigtable2hbase(this.timestamp), this.value, this.labels); - this.currentFamilyCells.add(rowCell); + + // dedupe user visible cells. Please see class javadoc for details + if (!this.labels.isEmpty()) { + this.currentFamilyCells.add(rowCell); + } else if (!keysMatch(previousNoLabelCell, rowCell)) { + this.currentFamilyCells.add(rowCell); + this.previousNoLabelCell = rowCell; + } + } + + private static boolean keysMatch(RowCell previousNoLabelCell, RowCell current) { + return previousNoLabelCell != null + && previousNoLabelCell.getTimestamp() == current.getTimestamp() + && Arrays.equals(previousNoLabelCell.getQualifierArray(), current.getQualifierArray()); } /** @@ -210,6 +240,7 @@ public void reset() { this.cells = new TreeMap<>(); this.currentFamilyCells = null; this.previousFamily = null; + this.previousNoLabelCell = null; } @Override diff --git a/bigtable-client-core-parent/bigtable-hbase/src/test/java/com/google/cloud/bigtable/hbase/wrappers/veneer/TestRowResultAdapter.java b/bigtable-client-core-parent/bigtable-hbase/src/test/java/com/google/cloud/bigtable/hbase/wrappers/veneer/TestRowResultAdapter.java index b3198de5dc..beda39a435 100644 --- a/bigtable-client-core-parent/bigtable-hbase/src/test/java/com/google/cloud/bigtable/hbase/wrappers/veneer/TestRowResultAdapter.java +++ b/bigtable-client-core-parent/bigtable-hbase/src/test/java/com/google/cloud/bigtable/hbase/wrappers/veneer/TestRowResultAdapter.java @@ -154,6 +154,66 @@ public void testOnlyValueIsDifferent() { assertResult(expected, actual); } + @Test + public void testDeduplicationLogic() { + List EMPTY_LABEL = ImmutableList.of(); + ByteString valueForNewQual = ByteString.copyFromUtf8("value for new qualifier"); + ByteString valueForCellWithoutLabel = ByteString.copyFromUtf8("value for Cell without label"); + ByteString valueForCellWithLabel = ByteString.copyFromUtf8("value for cell with labels"); + RowAdapter.RowBuilder rowBuilder = underTest.createRowBuilder(); + rowBuilder.startRow(ROW_KEY); + + // new qualifier + rowBuilder.startCell(COL_FAMILY, QUAL_ONE, 20000L, EMPTY_LABEL, valueForNewQual.size()); + rowBuilder.cellValue(valueForNewQual); + rowBuilder.finishCell(); + + // same qualifier, same timestamp, without label + rowBuilder.startCell(COL_FAMILY, QUAL_ONE, 20000L, EMPTY_LABEL, -1); + rowBuilder.cellValue(valueForCellWithoutLabel); + rowBuilder.finishCell(); + + // same qualifier, same timestamp, with label + rowBuilder.startCell(COL_FAMILY, QUAL_ONE, 20000L, LABELS, -1); + rowBuilder.cellValue(valueForCellWithLabel); + rowBuilder.finishCell(); + + // same qualifier, different timestamp + rowBuilder.startCell(COL_FAMILY, QUAL_ONE, 30000L, EMPTY_LABEL, VALUE.size()); + rowBuilder.cellValue(VALUE); + rowBuilder.finishCell(); + + Result actual = rowBuilder.finishRow(); + assertEquals(3, actual.size()); + + RowResult expected = + RowResult.create( + ROW_KEY, + ImmutableList.of( + new RowCell( + ROW_KEY.toByteArray(), + COL_FAMILY.getBytes(), + QUAL_ONE.toByteArray(), + 20L, + valueForNewQual.toByteArray(), + EMPTY_LABEL), + new RowCell( + ROW_KEY.toByteArray(), + COL_FAMILY.getBytes(), + QUAL_ONE.toByteArray(), + 20L, + valueForCellWithLabel.toByteArray(), + LABELS), + new RowCell( + ROW_KEY.toByteArray(), + COL_FAMILY.getBytes(), + QUAL_ONE.toByteArray(), + 30L, + VALUE.toByteArray(), + EMPTY_LABEL))); + assertResult(expected, actual); + } + @Test public void testFamilyOrdering() { RowAdapter.RowBuilder rowBuilder = underTest.createRowBuilder();