Skip to content

Commit

Permalink
Merge pull request #1320 from dhermes/bigtable-row-filter-7
Browse files Browse the repository at this point in the history
Implementing Bigtable row filters for sampling and labeling.
  • Loading branch information
dhermes committed Dec 22, 2015
2 parents 6d59044 + 2827324 commit c216a4e
Show file tree
Hide file tree
Showing 2 changed files with 133 additions and 0 deletions.
61 changes: 61 additions & 0 deletions gcloud/bigtable/row.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,31 @@ def to_pb(self):
return data_pb2.RowFilter(row_key_regex_filter=self.regex)


class RowSampleFilter(RowFilter):
"""Matches all cells from a row with probability p.
:type sample: float
:param sample: The probability of matching a cell (must be in the
interval ``[0, 1]``).
"""

def __init__(self, sample):
self.sample = sample

def __eq__(self, other):
if not isinstance(other, self.__class__):
return False
return other.sample == self.sample

def to_pb(self):
"""Converts the row filter to a protobuf.
:rtype: :class:`.data_pb2.RowFilter`
:returns: The converted current object.
"""
return data_pb2.RowFilter(row_sample_filter=self.sample)


class FamilyNameRegexFilter(_RegexFilter):
"""Row filter for a family name regular expression.
Expand Down Expand Up @@ -522,3 +547,39 @@ def to_pb(self):
:returns: The converted current object.
"""
return data_pb2.RowFilter(strip_value_transformer=self.flag)


class ApplyLabelFilter(RowFilter):
"""Filter to apply labels to cells.
Intended to be used as an intermediate filter on a pre-existing filtered
result set. This was if two sets are combined, the label can tell where
the cell(s) originated.This allows the client to determine which results
were produced from which part of the filter.
.. note::
Due to a technical limitation, it is not currently possible to apply
multiple labels to a cell.
:type label: str
:param label: Label to apply to cells in the output row. Values must be
at most 15 characters long, and match the pattern
``[a-z0-9\\-]+``.
"""

def __init__(self, label):
self.label = label

def __eq__(self, other):
if not isinstance(other, self.__class__):
return False
return other.label == self.label

def to_pb(self):
"""Converts the row filter to a protobuf.
:rtype: :class:`.data_pb2.RowFilter`
:returns: The converted current object.
"""
return data_pb2.RowFilter(apply_label_transformer=self.label)
72 changes: 72 additions & 0 deletions gcloud/bigtable/test_row.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,42 @@ def test_to_pb(self):
self.assertEqual(pb_val, expected_pb)


class TestRowSampleFilter(unittest2.TestCase):

def _getTargetClass(self):
from gcloud.bigtable.row import RowSampleFilter
return RowSampleFilter

def _makeOne(self, *args, **kwargs):
return self._getTargetClass()(*args, **kwargs)

def test_constructor(self):
sample = object()
row_filter = self._makeOne(sample)
self.assertTrue(row_filter.sample is sample)

def test___eq__type_differ(self):
sample = object()
row_filter1 = self._makeOne(sample)
row_filter2 = object()
self.assertNotEqual(row_filter1, row_filter2)

def test___eq__same_value(self):
sample = object()
row_filter1 = self._makeOne(sample)
row_filter2 = self._makeOne(sample)
self.assertEqual(row_filter1, row_filter2)

def test_to_pb(self):
from gcloud.bigtable._generated import bigtable_data_pb2 as data_pb2

sample = 0.25
row_filter = self._makeOne(sample)
pb_val = row_filter.to_pb()
expected_pb = data_pb2.RowFilter(row_sample_filter=sample)
self.assertEqual(pb_val, expected_pb)


class TestFamilyNameRegexFilter(unittest2.TestCase):

def _getTargetClass(self):
Expand Down Expand Up @@ -591,3 +627,39 @@ def test_to_pb(self):
pb_val = row_filter.to_pb()
expected_pb = data_pb2.RowFilter(strip_value_transformer=flag)
self.assertEqual(pb_val, expected_pb)


class TestApplyLabelFilter(unittest2.TestCase):

def _getTargetClass(self):
from gcloud.bigtable.row import ApplyLabelFilter
return ApplyLabelFilter

def _makeOne(self, *args, **kwargs):
return self._getTargetClass()(*args, **kwargs)

def test_constructor(self):
label = object()
row_filter = self._makeOne(label)
self.assertTrue(row_filter.label is label)

def test___eq__type_differ(self):
label = object()
row_filter1 = self._makeOne(label)
row_filter2 = object()
self.assertNotEqual(row_filter1, row_filter2)

def test___eq__same_value(self):
label = object()
row_filter1 = self._makeOne(label)
row_filter2 = self._makeOne(label)
self.assertEqual(row_filter1, row_filter2)

def test_to_pb(self):
from gcloud.bigtable._generated import bigtable_data_pb2 as data_pb2

label = u'label'
row_filter = self._makeOne(label)
pb_val = row_filter.to_pb()
expected_pb = data_pb2.RowFilter(apply_label_transformer=label)
self.assertEqual(pb_val, expected_pb)

0 comments on commit c216a4e

Please sign in to comment.