Skip to content

Commit

Permalink
Deserialize SqlRows lazily (#472)
Browse files Browse the repository at this point in the history
This is a required change to be able to use parts of the row,
even if we cannot deserialize some column values.

Also, lazy deserialization is a plus on its own.

To accomplish this, similar to the Java client, I have changed the
code so that, we try to deserialize column values when we
call `get_object` or `get_object_with_index`. Note that,
just like the Java client

```
row.get_object("smth")
row.get_object("smth")
```

will result in two deserializations.
We could easily solve this with an extra object per row (something
like LazyDeserializable class with two fields: serialized and deserialized
where once we deserialize it, it assigns the result to deserialized and
return it for the subsequent calls). But, I didn't want to go that way due
to extra object cost.
  • Loading branch information
mdumandag committed Sep 16, 2021
1 parent f89fe19 commit f614580
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 15 deletions.
49 changes: 35 additions & 14 deletions hazelcast/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -523,11 +523,12 @@ def __repr__(self):
class SqlRow(object):
"""One of the rows of an SQL query result."""

__slots__ = ("_row_metadata", "_row")
__slots__ = ("_row_metadata", "_row", "_deserialize_fn")

def __init__(self, row_metadata, row):
def __init__(self, row_metadata, row, deserialize_fn):
self._row_metadata = row_metadata
self._row = row
self._deserialize_fn = deserialize_fn

def get_object(self, column_name):
"""Gets the value in the column indicated by the column name.
Expand All @@ -539,6 +540,13 @@ def get_object(self, column_name):
The type of the returned value depends on the SQL type of the column.
No implicit conversions are performed on the value.
Warnings:
Each call to this method might result in a deserialization if the
column type for this object is :const:`SqlColumnType.OBJECT`.
It is advised to assign the result of this method call to some
variable and reuse it.
Args:
column_name (str):
Expand All @@ -548,6 +556,7 @@ def get_object(self, column_name):
Raises:
ValueError: If a column with the given name does not exist.
AssertionError: If the column name is not a string.
HazelcastSqlError: If the object cannot be deserialized.
See Also:
:attr:`metadata`
Expand All @@ -561,14 +570,21 @@ def get_object(self, column_name):
index = self._row_metadata.find_column(column_name)
if index == SqlRowMetadata.COLUMN_NOT_FOUND:
raise ValueError("Column '%s' doesn't exist" % column_name)
return self._row[index]
return self._deserialize_fn(self._row[index])

def get_object_with_index(self, column_index):
"""Gets the value of the column by index.
The class of the returned value depends on the SQL type of the column.
No implicit conversions are performed on the value.
Warnings:
Each call to this method might result in a deserialization if the
column type for this object is :const:`SqlColumnType.OBJECT`.
It is advised to assign the result of this method call to some
variable and reuse it.
Args:
column_index (int): Zero-based column index.
Expand All @@ -578,14 +594,15 @@ def get_object_with_index(self, column_index):
Raises:
IndexError: If the column index is out of bounds.
AssertionError: If the column index is not an integer.
HazelcastSqlError: If the object cannot be deserialized.
See Also:
:attr:`metadata`
:attr:`SqlColumnMetadata.type`
"""
check_is_int(column_index, "Column index must be an integer")
return self._row[column_index]
return self._deserialize_fn(self._row[column_index])

@property
def metadata(self):
Expand All @@ -598,7 +615,7 @@ def __repr__(self):
% (
self._row_metadata.get_column(i).name,
get_attr_name(SqlColumnType, self._row_metadata.get_column(i).type),
self._row[i],
self.get_object_with_index(i),
)
for i in range(self._row_metadata.column_count)
)
Expand Down Expand Up @@ -679,12 +696,8 @@ def _get_current_row(self):
list: The row pointed by the current position.
"""

# The column might contain user objects so we have to deserialize it.
# Deserialization is no-op if the value is not Data.
return [
self.deserialize_fn(self.page.get_value(i, self.position))
for i in range(self.page.column_count)
]
# Deserialization happens lazily while getting the object.
return [self.page.get_value(i, self.position) for i in range(self.page.column_count)]


class _FutureProducingIterator(_IteratorBase):
Expand Down Expand Up @@ -724,7 +737,7 @@ def _has_next_continuation(self, future):

row = self._get_current_row()
self.position += 1
return SqlRow(self.row_metadata, row)
return SqlRow(self.row_metadata, row, self.deserialize_fn)

def _has_next(self):
"""Returns a Future indicating whether there are more rows
Expand Down Expand Up @@ -788,7 +801,7 @@ def __next__(self):

row = self._get_current_row()
self.position += 1
return SqlRow(self.row_metadata, row)
return SqlRow(self.row_metadata, row, self.deserialize_fn)

def _has_next(self):
while self.position == self.row_count:
Expand Down Expand Up @@ -1364,7 +1377,15 @@ def execute_statement(self, statement):
raise self.re_raise(e, connection)

def deserialize_object(self, obj):
return self._serialization_service.to_object(obj)
try:
return self._serialization_service.to_object(obj)
except Exception as e:
raise HazelcastSqlError(
self.get_client_id(),
_SqlErrorCode.GENERIC,
"Failed to deserialize query result value: %s" % try_to_get_error_message(e),
e,
)

def fetch(self, connection, query_id, cursor_buffer_size):
"""Fetches the next page of the query execution.
Expand Down
32 changes: 31 additions & 1 deletion tests/integration/backward_compatible/sql_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def _create_mapping_for_portable(self, factory_id, class_id, columns):

create_mapping_query = """
CREATE MAPPING "%s" (
__key INT,
__key INT%s
%s
)
TYPE IMaP
Expand All @@ -137,6 +137,7 @@ def _create_mapping_for_portable(self, factory_id, class_id, columns):
)
""" % (
self.map_name,
"," if len(columns) > 0 else "",
",\n".join(["%s %s" % (c_name, c_type) for c_name, c_type in columns.items()]),
factory_id,
class_id,
Expand Down Expand Up @@ -542,6 +543,35 @@ def test_with_statement_when_iteration_throws(self):

self.assertIsInstance(result.close(), ImmediateFuture)

def test_lazy_deserialization(self):
skip_if_client_version_older_than(self, "5.0")

# Using a Portable that is not defined on the client-side.
self._create_mapping_for_portable(666, 1, {})

script = (
"""
var m = instance_0.getMap("%s");
m.put(1, new com.hazelcast.client.test.Employee(1, "Joe"));
"""
% self.map_name
)

res = self.rc.executeOnController(self.cluster.id, script, Lang.JAVASCRIPT)
self.assertTrue(res.success)

with self.client.sql.execute('SELECT __key, this FROM "%s"' % self.map_name) as result:
rows = list(result)
self.assertEqual(1, len(rows))
row = rows[0]
# We should be able to deserialize parts of the response
self.assertEqual(1, row.get_object("__key"))

# We should throw lazily when we try to access the columns
# that are not deserializable
with self.assertRaises(HazelcastSqlError):
row.get_object("this")


@unittest.skipIf(
compare_client_version("4.2") < 0, "Tests the features added in 4.2 version of the client"
Expand Down

0 comments on commit f614580

Please sign in to comment.