Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
package com.google.cloud.bigtable.hbase.wrappers.veneer;

import com.google.api.core.InternalApi;
import com.google.bigtable.v2.RowFilter.Interleave;
import com.google.cloud.bigtable.data.v2.models.Filters.InterleaveFilter;
import com.google.cloud.bigtable.data.v2.models.RowAdapter;
import com.google.cloud.bigtable.hbase.adapters.read.RowCell;
import com.google.cloud.bigtable.hbase.util.ByteStringer;
Expand All @@ -25,16 +27,26 @@
import com.google.common.collect.ImmutableList;
import com.google.protobuf.ByteString;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.TreeMap;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.filter.FilterList.Operator;

/**
* Adapter for {@link RowAdapter} that uses {@link Result} to represent logical rows.
*
* <p>This adapter is responsible for cell deduplication. bigtable-hbase will convert a {@link
* Operator#MUST_PASS_ONE} filter into an {@link Interleave}. Unfortunately there is a bit of a
* mismatch between the 2 filters: MUST_PASS_ONE will not duplicate the cell if it matches multiple
* branches of the MUST_PASS_ONE, but Interleave will.This adapter will pave over the difference by
* removing the duplicate cells while building the Result. However, HBase's WhileMatchFilter depends
* on duplicate labelled cells for its implementation. So this adapter will not deduplicate labelled
* cells.
*
* <p>For internal use only - public for technical reasons.
*/
@InternalApi("For internal usage only")
Expand Down Expand Up @@ -97,6 +109,7 @@ static class RowResultBuilder implements RowBuilder<Result> {
private List<String> labels;
private long timestamp;
private byte[] value;
private RowCell previousNoLabelCell;

private Map<String, List<RowCell>> cells = new TreeMap<>();
private List<RowCell> currentFamilyCells = null;
Expand Down Expand Up @@ -159,6 +172,9 @@ public void cellValue(ByteString newValue) {
* </ul>
*
* A flattened version of the {@link RowCell} map will be sorted correctly.
*
* <p>Applying {@link InterleaveFilter} may result in a row to contain duplicated cells, where
* duplicates are grouped in sequences.
*/

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

than -> then
it's not clear to me, what do you mean 'cells will appear one after another'

I would rephrase:

Applying {@link InterleaveFilter} may result in a row to contain duplicated cells.

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cells will appear one after another => I mean the duplicate cells will always be in group one after another (in that sequence)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no 'but' here.
may be:

, but the duplicate cells will appear in a group i.e. one after another.

, where duplicates are grouped in sequences.

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for catching it 🙏

@Override
public void finishCell() {
Expand All @@ -169,6 +185,7 @@ public void finishCell() {
previousFamily = this.family;
currentFamilyCells = new ArrayList<>();
cells.put(this.family, this.currentFamilyCells);
previousNoLabelCell = null;
}

RowCell rowCell =
Expand All @@ -179,7 +196,20 @@ public void finishCell() {
TimestampConverter.bigtable2hbase(this.timestamp),
this.value,
this.labels);
this.currentFamilyCells.add(rowCell);

// dedupe user visible cells. Please see class javadoc for details
if (!this.labels.isEmpty()) {
this.currentFamilyCells.add(rowCell);
} else if (!keysMatch(previousNoLabelCell, rowCell)) {
this.currentFamilyCells.add(rowCell);
this.previousNoLabelCell = rowCell;
}
}

private static boolean keysMatch(RowCell previousNoLabelCell, RowCell current) {
return previousNoLabelCell != null
&& previousNoLabelCell.getTimestamp() == current.getTimestamp()
&& Arrays.equals(previousNoLabelCell.getQualifierArray(), current.getQualifierArray());
}

/**
Expand Down Expand Up @@ -210,6 +240,7 @@ public void reset() {
this.cells = new TreeMap<>();
this.currentFamilyCells = null;
this.previousFamily = null;
this.previousNoLabelCell = null;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,66 @@ public void testOnlyValueIsDifferent() {
assertResult(expected, actual);
}

@Test
public void testDeduplicationLogic() {
List<String> EMPTY_LABEL = ImmutableList.of();
ByteString valueForNewQual = ByteString.copyFromUtf8("value for new qualifier");
ByteString valueForCellWithoutLabel = ByteString.copyFromUtf8("value for Cell without label");
ByteString valueForCellWithLabel = ByteString.copyFromUtf8("value for cell with labels");
RowAdapter.RowBuilder<Result> rowBuilder = underTest.createRowBuilder();
rowBuilder.startRow(ROW_KEY);

// new qualifier
rowBuilder.startCell(COL_FAMILY, QUAL_ONE, 20000L, EMPTY_LABEL, valueForNewQual.size());
rowBuilder.cellValue(valueForNewQual);
rowBuilder.finishCell();

// same qualifier, same timestamp, without label
rowBuilder.startCell(COL_FAMILY, QUAL_ONE, 20000L, EMPTY_LABEL, -1);
rowBuilder.cellValue(valueForCellWithoutLabel);
rowBuilder.finishCell();

// same qualifier, same timestamp, with label
rowBuilder.startCell(COL_FAMILY, QUAL_ONE, 20000L, LABELS, -1);
rowBuilder.cellValue(valueForCellWithLabel);
rowBuilder.finishCell();

// same qualifier, different timestamp
rowBuilder.startCell(COL_FAMILY, QUAL_ONE, 30000L, EMPTY_LABEL, VALUE.size());
rowBuilder.cellValue(VALUE);
rowBuilder.finishCell();

Result actual = rowBuilder.finishRow();
assertEquals(3, actual.size());

RowResult expected =
RowResult.create(
ROW_KEY,
ImmutableList.<Cell>of(
new RowCell(
ROW_KEY.toByteArray(),
COL_FAMILY.getBytes(),
QUAL_ONE.toByteArray(),
20L,
valueForNewQual.toByteArray(),
EMPTY_LABEL),
new RowCell(
ROW_KEY.toByteArray(),
COL_FAMILY.getBytes(),
QUAL_ONE.toByteArray(),
20L,
valueForCellWithLabel.toByteArray(),
LABELS),
new RowCell(
ROW_KEY.toByteArray(),
COL_FAMILY.getBytes(),
QUAL_ONE.toByteArray(),
30L,
VALUE.toByteArray(),
EMPTY_LABEL)));
assertResult(expected, actual);
}

@Test
public void testFamilyOrdering() {
RowAdapter.RowBuilder<Result> rowBuilder = underTest.createRowBuilder();
Expand Down