Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Projections follow up #454

Merged
merged 3 commits into from
Aug 20, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/api/modules.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ API Documentation
lifecycle
partition
predicate
projection
proxy/modules
security
serialization
Expand Down
4 changes: 4 additions & 0 deletions docs/api/projection.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Projection
==========

.. automodule:: hazelcast.projection
1 change: 1 addition & 0 deletions docs/features.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ features:
- Built-in Predicates
- Listener with Predicate
- Fast Aggregations
- Projections
- Near Cache Support
- Programmatic Configuration
- SSL Support (requires Enterprise server)
Expand Down
33 changes: 16 additions & 17 deletions docs/using_python_client_with_hazelcast_imdg.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2166,31 +2166,30 @@ See the following example.
import hazelcast

from hazelcast.core import HazelcastJsonValue
from hazelcast.predicate import greater_or_equal
from hazelcast.predicate import greater
from hazelcast.projection import single_attribute, multi_attribute

client = hazelcast.HazelcastClient()
employees = client.get_map("employees").blocking()

employees.put(1, HazelcastJsonValue('{"Age": 23, "Height": 180, "Weight": 60}'))
employees.put(2, HazelcastJsonValue('{"Age": 21, "Height": 170, "Weight": 70}'))
employees.put(1, HazelcastJsonValue({"age": 25, "height": 180, "weight": 60}))
employees.put(2, HazelcastJsonValue({"age": 21, "height": 170, "weight": 70}))
employees.put(3, HazelcastJsonValue({"age": 40, "height": 175, "weight": 75}))

employee_ages = employees.project(single_attribute("Age"))
# Prints:
# The ages of employees are [21, 23]
print("The ages of employees are %s" % employee_ages)
ages = employees.project(single_attribute("age"))

# Run Single Attribute With Predicate
employee_ages = employees.project(single_attribute("Age"), greater_or_equal("Age", 23))
# Prints:
# The employee age is 23
print("The employee age is: %s" % employee_ages[0])
# Prints: "Ages of the employees are [21, 25, 40]"
print("Ages of the employees are %s" % ages)

# Run Multi Attribute Projection
employee_multi_attribute = employees.project(multi_attribute("Age", "Height"))
# Prints:
# Employee 1 age and height: [21, 170] Employee 2 age and height: [23, 180]
print("Employee 1 age and height: %s Employee 2 age and height: %s" % (employee_multi_attribute[0], employee_multi_attribute[1]))
filtered_ages = employees.project(single_attribute("age"), greater("age", 23))

# Prints: "Ages of the filtered employees are [25, 40]"
print("Ages of the filtered employees are %s" % filtered_ages)

attributes = employees.project(multi_attribute("age", "height"))
Comment on lines +2184 to +2189
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about replacing single_attribute and multi_attribute with just attribute?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But they are two distinct things with distinct return types. One returns list[any], the other list[list[any]] when used with project method.

Also, I find such codes like the below a bit weird in the implementation. IMHO, a separation is better than this

def attribute(*attrs):
    if len(attrs) == 1:
        return _SingleAttribute(attrs[0])
    return _MultiAttribute(attrs)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we make it just attribute, the return type can be list[list[any]]. I think it's not weird at all to distinguish between _SingleAttribute and _MultiAttribute depending on the number of attributes. Those two are implementation details, the user doesn't need to know about them. (I didn't check that, but I guess they are different in the protocol as well, not sure about the reason of that decision)


# Prints: "Ages and heights of the employees are [[21, 170], [25, 180], [40, 175]]"
print("Ages and heights of the employees are %s" % attributes)


Performance
Expand Down
29 changes: 29 additions & 0 deletions examples/projections/projections_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import hazelcast

from hazelcast.core import HazelcastJsonValue
from hazelcast.predicate import less_or_equal
from hazelcast.projection import single_attribute, multi_attribute

client = hazelcast.HazelcastClient()

people = client.get_map("people").blocking()

people.put_all(
{
1: HazelcastJsonValue({"name": "Philip", "age": 46}),
2: HazelcastJsonValue({"name": "Elizabeth", "age": 44}),
3: HazelcastJsonValue({"name": "Henry", "age": 13}),
4: HazelcastJsonValue({"name": "Paige", "age": 15}),
}
)

names = people.project(single_attribute("name"))
print("Names of the people are %s." % names)

children_names = people.project(single_attribute("name"), less_or_equal("age", 18))
print("Names of the children are %s." % children_names)

names_and_ages = people.project(multi_attribute("name", "age"))
print("Names and ages of the people are %s." % names_and_ages)

client.shutdown()
2 changes: 2 additions & 0 deletions hazelcast/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,8 +388,10 @@ def __init__(self, key=None, value=None):

@property
def key(self):
"""Key of the entry."""
return self._key

@property
def value(self):
"""Value of the entry."""
return self._value
16 changes: 11 additions & 5 deletions hazelcast/projection.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def get_class_id(self):


def _validate_attribute_path(attribute_path):
# type: (str) -> None
if not attribute_path:
raise ValueError("attribute_path must not be None or empty")

Expand All @@ -37,6 +38,7 @@ def _validate_attribute_path(attribute_path):

class _SingleAttributeProjection(_AbstractProjection):
def __init__(self, attribute_path):
# type: (str) -> None
_validate_attribute_path(attribute_path)
self._attribute_path = attribute_path

Expand All @@ -48,7 +50,8 @@ def get_class_id(self):


class _MultiAttributeProjection(_AbstractProjection):
def __init__(self, *attribute_paths):
def __init__(self, attribute_paths):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about adding types to new classes and functions?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have added type hints to the projections module. I didn't type-hint the code that is coming from the superclass (IdentifiedDataSerializable), should I add type hints to those methods too (like get_class_id)?

Also, I question. We have docstrings that define types for public methods, should we define type hints for them too? I have added type hints to them but I can remove them if you want

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

According to this SO discussion, it is possible to configure sphinx to recognize types, so writing in docstrings is probably not necessary: https://stackoverflow.com/questions/40071573/python-3-sphinx-doesnt-show-type-hints-correctly But I think we should keep them until we are sure about that.

IMO adding types to only functions/classes added/changed in this PR is OK. We can tackle with adding types to others in a separate PR. I think types in the base class are used for derived classes, but since IdentifiedDataSerializable is an older class, I think it's OK to not add types to get_class_id et al.

# type: (list[str]) -> None
if not attribute_paths:
raise ValueError("Specify at least one attribute path")

Expand All @@ -73,6 +76,7 @@ def get_class_id(self):


def single_attribute(attribute_path):
# type: (str) -> Projection
"""Creates a projection that extracts the value of
the given attribute path.

Expand All @@ -81,12 +85,13 @@ def single_attribute(attribute_path):

Returns:
Projection[any]: A projection that extracts the value of the given
attribute path.
attribute path.
"""
return _SingleAttributeProjection(attribute_path)


def multi_attribute(*attribute_paths):
# type: (str) -> Projection
"""Creates a projection that extracts the values of
one or more attribute paths.

Expand All @@ -95,16 +100,17 @@ def multi_attribute(*attribute_paths):

Returns:
Projection[list]: A projection that extracts the values of the given
attribute paths.
attribute paths.
"""
return _MultiAttributeProjection(*attribute_paths)
return _MultiAttributeProjection(list(attribute_paths))


def identity():
# type: () -> Projection
"""Creates a projection that does no transformation.

Returns:
Projection[hazelcast.core.MapEntry]: A projection that does no
transformation.
transformation.
"""
return _IdentityProjection()
78 changes: 40 additions & 38 deletions hazelcast/proxy/map.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,7 @@ def aggregate(self, aggregator, predicate=None):
"""
check_not_none(aggregator, "aggregator can't be none")
aggregator_data = self._to_data(aggregator)

if predicate:
if isinstance(predicate, PagingPredicate):
raise AssertionError("Paging predicate is not supported.")
Expand All @@ -347,44 +348,6 @@ def handler(message):
request = map_aggregate_codec.encode_request(self.name, aggregator_data)
return self._invoke(request, handler)

def project(self, projection, predicate=None):
"""Applies the projection logic on map entries and filter the result with the
predicate, if given.

Args:
projection (hazelcast.projection.Projection): Projection to project the
entries with.
predicate (hazelcast.predicate.Predicate): Predicate to filter the entries
with.

Returns:
hazelcast.future.Future: The result of the projection.
"""
check_not_none(projection, "Projection can't be none")
projection_data = self._to_data(projection)
if predicate:
if isinstance(predicate, PagingPredicate):
raise AssertionError("Paging predicate is not supported.")

def handler(message):
return ImmutableLazyDataList(
map_project_with_predicate_codec.decode_response(message), self._to_object
)

predicate_data = self._to_data(predicate)
request = map_project_with_predicate_codec.encode_request(
self.name, projection_data, predicate_data
)
return self._invoke(request, handler)

def handler(message):
return ImmutableLazyDataList(
map_project_codec.decode_response(message), self._to_object
)

request = map_project_codec.encode_request(self.name, projection_data)
return self._invoke(request, handler)

def clear(self):
"""Clears the map.

Expand Down Expand Up @@ -876,6 +839,45 @@ def lock(self, key, lease_time=None):
self._invocation_service.invoke(invocation)
return invocation.future

def project(self, projection, predicate=None):
"""Applies the projection logic on map entries and filter the result with the
predicate, if given.

Args:
projection (hazelcast.projection.Projection): Projection to project the
entries with.
predicate (hazelcast.predicate.Predicate): Predicate to filter the entries
with.

Returns:
hazelcast.future.Future: The result of the projection.
"""
check_not_none(projection, "Projection can't be none")
projection_data = self._to_data(projection)

if predicate:
if isinstance(predicate, PagingPredicate):
raise AssertionError("Paging predicate is not supported.")

def handler(message):
return ImmutableLazyDataList(
map_project_with_predicate_codec.decode_response(message), self._to_object
)

predicate_data = self._to_data(predicate)
request = map_project_with_predicate_codec.encode_request(
self.name, projection_data, predicate_data
)
return self._invoke(request, handler)

def handler(message):
return ImmutableLazyDataList(
map_project_codec.decode_response(message), self._to_object
)

request = map_project_codec.encode_request(self.name, projection_data)
return self._invoke(request, handler)

def put(self, key, value, ttl=None, max_idle=None):
"""Associates the specified value with the specified key in this map.

Expand Down
39 changes: 29 additions & 10 deletions tests/integration/backward_compatible/proxy/map_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
from hazelcast.core import HazelcastJsonValue
from hazelcast.config import IndexType, IntType
from hazelcast.errors import HazelcastError
from hazelcast.predicate import greater_or_equal, less_or_equal, sql, between
from hazelcast.predicate import greater_or_equal, less_or_equal, sql, paging, true
from hazelcast.proxy.map import EntryEventType
from hazelcast.serialization.api import IdentifiedDataSerializable
from hazelcast.six.moves import range
Expand Down Expand Up @@ -796,6 +796,14 @@ def setUp(self):
def tearDown(self):
self.map.destroy()

def test_aggregate_with_none_aggregator(self):
with self.assertRaises(AssertionError):
self.map.aggregate(None)
Comment on lines +800 to +801
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can be simplified as: self.assertRaises(AssertionError, lambda: self.map.aggregate(None)) I think most of other assertRaises uses can also be put into a single line.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I personally prefer this style, and try to use it like this across the codebase. Advantages:

  • You can easily put breakpoints
  • You can put multiple lines without defining a closure

And IMHO, this one is more readable


def test_aggregate_with_paging_predicate(self):
with self.assertRaises(AssertionError):
self.map.aggregate(int_avg("foo"), paging(true(), 10))

def test_int_average(self):
average = self.map.aggregate(int_avg())
self.assertEqual(24.5, average)
Expand Down Expand Up @@ -1034,23 +1042,32 @@ def setUp(self):
def tearDown(self):
self.map.destroy()

def test_project_with_none_projection(self):
with self.assertRaises(AssertionError):
self.map.project(None)

def test_project_with_paging_predicate(self):
with self.assertRaises(AssertionError):
self.map.project(single_attribute("foo"), paging(true(), 10))

def test_single_attribute(self):
attribute = self.map.project(single_attribute("attr1"))
six.assertCountEqual(self, [4, 1], attribute)
attributes = self.map.project(single_attribute("attr1"))
six.assertCountEqual(self, [1, 4], attributes)

def test_single_attribute_with_predicate(self):
attribute = self.map.project(single_attribute("attr1"), greater_or_equal("attr1", 4))
self.assertEqual([4], attribute)
attributes = self.map.project(single_attribute("attr1"), greater_or_equal("attr1", 4))
six.assertCountEqual(self, [4], attributes)

def test_multi_attribute(self):
attributes = self.map.project(multi_attribute("attr1", "attr2"))
six.assertCountEqual(self, [[4, 5], [1, 2]], attributes)
six.assertCountEqual(self, [[1, 2], [4, 5]], attributes)

def test_multi_attribute_with_predicate(self):
attributes = self.map.project(
multi_attribute("attr1", "attr2"), greater_or_equal("attr2", 3)
multi_attribute("attr1", "attr2"),
greater_or_equal("attr2", 3),
)
self.assertEqual([[4, 5]], attributes)
six.assertCountEqual(self, [[4, 5]], attributes)

def test_identity(self):
attributes = self.map.project(identity())
Expand All @@ -1065,6 +1082,8 @@ def test_identity(self):

def test_identity_with_predicate(self):
attributes = self.map.project(identity(), greater_or_equal("attr2", 3))
self.assertEqual(
HazelcastJsonValue('{"attr1": 4, "attr2": 5, "attr3": 6}'), attributes[0].value
six.assertCountEqual(
self,
[HazelcastJsonValue('{"attr1": 4, "attr2": 5, "attr3": 6}')],
[attribute.value for attribute in attributes],
)
25 changes: 25 additions & 0 deletions tests/unit/projection_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import unittest

from hazelcast.projection import single_attribute, multi_attribute


class ProjectionsInvalidInputTest(unittest.TestCase):
def test_single_attribute_with_any_operator(self):
with self.assertRaises(ValueError):
single_attribute("foo[any]")

def test_single_attribute_with_empty_path(self):
with self.assertRaises(ValueError):
single_attribute("")

def test_multi_attribute_with_no_paths(self):
with self.assertRaises(ValueError):
multi_attribute()

def test_multi_attribute_with_any_operator(self):
with self.assertRaises(ValueError):
multi_attribute("valid", "invalid[any]")

def test_multi_attribute_with_empty_path(self):
with self.assertRaises(ValueError):
multi_attribute("valid", "")