Skip to content

Commit

Permalink
Implementing Bigtable union/intersection row filters.
Browse files Browse the repository at this point in the history
  • Loading branch information
dhermes committed Jan 4, 2016
1 parent f220a36 commit 75eab06
Show file tree
Hide file tree
Showing 2 changed files with 211 additions and 0 deletions.
68 changes: 68 additions & 0 deletions gcloud/bigtable/row.py
Original file line number Diff line number Diff line change
Expand Up @@ -661,3 +661,71 @@ def to_pb(self):
:returns: The converted current object.
"""
return data_pb2.RowFilter(apply_label_transformer=self.label)


class _FilterCombination(RowFilter):
"""Chain of row filters.
Sends rows through several filters in sequence. The filters are "chained"
together to process a row. After the first filter is applied, the second
is applied to the filtered output and so on for subsequent filters.
:type filters: list
:param filters: List of :class:`RowFilter`
"""

def __init__(self, filters=None):
if filters is None:
filters = []
self.filters = filters

def __eq__(self, other):
if not isinstance(other, self.__class__):
return False
return other.filters == self.filters


class RowFilterChain(_FilterCombination):
"""Chain of row filters.
Sends rows through several filters in sequence. The filters are "chained"
together to process a row. After the first filter is applied, the second
is applied to the filtered output and so on for subsequent filters.
:type filters: list
:param filters: List of :class:`RowFilter`
"""

def to_pb(self):
"""Converts the row filter to a protobuf.
:rtype: :class:`.data_pb2.RowFilter`
:returns: The converted current object.
"""
chain = data_pb2.RowFilter.Chain(
filters=[row_filter.to_pb() for row_filter in self.filters])
return data_pb2.RowFilter(chain=chain)


class RowFilterUnion(_FilterCombination):
"""Union of row filters.
Sends rows through several filters simultaneously, then
merges / interleaves all the filtered results together.
If multiple cells are produced with the same column and timestamp,
they will all appear in the output row in an unspecified mutual order.
:type filters: list
:param filters: List of :class:`RowFilter`
"""

def to_pb(self):
"""Converts the row filter to a protobuf.
:rtype: :class:`.data_pb2.RowFilter`
:returns: The converted current object.
"""
interleave = data_pb2.RowFilter.Interleave(
filters=[row_filter.to_pb() for row_filter in self.filters])
return data_pb2.RowFilter(interleave=interleave)
143 changes: 143 additions & 0 deletions gcloud/bigtable/test_row.py
Original file line number Diff line number Diff line change
Expand Up @@ -779,3 +779,146 @@ def test_to_pb(self):
pb_val = row_filter.to_pb()
expected_pb = data_pb2.RowFilter(apply_label_transformer=label)
self.assertEqual(pb_val, expected_pb)


class Test_FilterCombination(unittest2.TestCase):

def _getTargetClass(self):
from gcloud.bigtable.row import _FilterCombination
return _FilterCombination

def _makeOne(self, *args, **kwargs):
return self._getTargetClass()(*args, **kwargs)

def test_constructor_defaults(self):
row_filter = self._makeOne()
self.assertEqual(row_filter.filters, [])

def test_constructor_explicit(self):
filters = object()
row_filter = self._makeOne(filters=filters)
self.assertTrue(row_filter.filters is filters)

def test___eq__(self):
filters = object()
row_filter1 = self._makeOne(filters=filters)
row_filter2 = self._makeOne(filters=filters)
self.assertEqual(row_filter1, row_filter2)

def test___eq__type_differ(self):
filters = object()
row_filter1 = self._makeOne(filters=filters)
row_filter2 = object()
self.assertNotEqual(row_filter1, row_filter2)


class TestRowFilterChain(unittest2.TestCase):

def _getTargetClass(self):
from gcloud.bigtable.row import RowFilterChain
return RowFilterChain

def _makeOne(self, *args, **kwargs):
return self._getTargetClass()(*args, **kwargs)

def test_to_pb(self):
from gcloud.bigtable._generated import bigtable_data_pb2 as data_pb2
from gcloud.bigtable.row import RowSampleFilter
from gcloud.bigtable.row import StripValueTransformerFilter

row_filter1 = StripValueTransformerFilter(True)
row_filter1_pb = row_filter1.to_pb()

row_filter2 = RowSampleFilter(0.25)
row_filter2_pb = row_filter2.to_pb()

row_filter3 = self._makeOne(filters=[row_filter1, row_filter2])
filter_pb = row_filter3.to_pb()

expected_pb = data_pb2.RowFilter(
chain=data_pb2.RowFilter.Chain(
filters=[row_filter1_pb, row_filter2_pb],
),
)
self.assertEqual(filter_pb, expected_pb)

def test_to_pb_nested(self):
from gcloud.bigtable._generated import bigtable_data_pb2 as data_pb2
from gcloud.bigtable.row import CellsRowLimitFilter
from gcloud.bigtable.row import RowSampleFilter
from gcloud.bigtable.row import StripValueTransformerFilter

row_filter1 = StripValueTransformerFilter(True)
row_filter2 = RowSampleFilter(0.25)

row_filter3 = self._makeOne(filters=[row_filter1, row_filter2])
row_filter3_pb = row_filter3.to_pb()

row_filter4 = CellsRowLimitFilter(11)
row_filter4_pb = row_filter4.to_pb()

row_filter5 = self._makeOne(filters=[row_filter3, row_filter4])
filter_pb = row_filter5.to_pb()

expected_pb = data_pb2.RowFilter(
chain=data_pb2.RowFilter.Chain(
filters=[row_filter3_pb, row_filter4_pb],
),
)
self.assertEqual(filter_pb, expected_pb)


class TestRowFilterUnion(unittest2.TestCase):

def _getTargetClass(self):
from gcloud.bigtable.row import RowFilterUnion
return RowFilterUnion

def _makeOne(self, *args, **kwargs):
return self._getTargetClass()(*args, **kwargs)

def test_to_pb(self):
from gcloud.bigtable._generated import bigtable_data_pb2 as data_pb2
from gcloud.bigtable.row import RowSampleFilter
from gcloud.bigtable.row import StripValueTransformerFilter

row_filter1 = StripValueTransformerFilter(True)
row_filter1_pb = row_filter1.to_pb()

row_filter2 = RowSampleFilter(0.25)
row_filter2_pb = row_filter2.to_pb()

row_filter3 = self._makeOne(filters=[row_filter1, row_filter2])
filter_pb = row_filter3.to_pb()

expected_pb = data_pb2.RowFilter(
interleave=data_pb2.RowFilter.Interleave(
filters=[row_filter1_pb, row_filter2_pb],
),
)
self.assertEqual(filter_pb, expected_pb)

def test_to_pb_nested(self):
from gcloud.bigtable._generated import bigtable_data_pb2 as data_pb2
from gcloud.bigtable.row import CellsRowLimitFilter
from gcloud.bigtable.row import RowSampleFilter
from gcloud.bigtable.row import StripValueTransformerFilter

row_filter1 = StripValueTransformerFilter(True)
row_filter2 = RowSampleFilter(0.25)

row_filter3 = self._makeOne(filters=[row_filter1, row_filter2])
row_filter3_pb = row_filter3.to_pb()

row_filter4 = CellsRowLimitFilter(11)
row_filter4_pb = row_filter4.to_pb()

row_filter5 = self._makeOne(filters=[row_filter3, row_filter4])
filter_pb = row_filter5.to_pb()

expected_pb = data_pb2.RowFilter(
interleave=data_pb2.RowFilter.Interleave(
filters=[row_filter3_pb, row_filter4_pb],
),
)
self.assertEqual(filter_pb, expected_pb)

0 comments on commit 75eab06

Please sign in to comment.