From 7b39199be343a2562dd832cdcf2e1b5a38c5d4f2 Mon Sep 17 00:00:00 2001 From: mdumandag Date: Tue, 14 Sep 2021 17:03:48 +0300 Subject: [PATCH] Deserialize `SqlRow`s lazily This is a required change to be able to use parts of the row, even if we cannot deserialize some column values. Also, lazy deserialization is a plus on its own. To accomplish this, similar to the Java client, I have changed the code so that, we try to deserialize column values when we call `get_object` or `get_object_with_index`. Note that, just like the Java client ``` row.get_object("smth") row.get_object("smth") ``` will result in two deserializations. We could easily solve this with an extra object per row (something like LazyDeserializable class with two fields: serialized and deserialized where once we deserialize it, it assigns the result to deserialized and return it for the subsequent calls). But, I didn't want to go that way due to extra object cost. --- hazelcast/sql.py | 49 +++++++++++++------ .../backward_compatible/sql_test.py | 32 +++++++++++- 2 files changed, 66 insertions(+), 15 deletions(-) diff --git a/hazelcast/sql.py b/hazelcast/sql.py index b00a9e3d45..9a11c12011 100644 --- a/hazelcast/sql.py +++ b/hazelcast/sql.py @@ -523,11 +523,12 @@ def __repr__(self): class SqlRow(object): """One of the rows of an SQL query result.""" - __slots__ = ("_row_metadata", "_row") + __slots__ = ("_row_metadata", "_row", "_deserialize_fn") - def __init__(self, row_metadata, row): + def __init__(self, row_metadata, row, deserialize_fn): self._row_metadata = row_metadata self._row = row + self._deserialize_fn = deserialize_fn def get_object(self, column_name): """Gets the value in the column indicated by the column name. @@ -539,6 +540,13 @@ def get_object(self, column_name): The type of the returned value depends on the SQL type of the column. No implicit conversions are performed on the value. + Warnings: + + Each call to this method might result in a deserialization if the + column type for this object is :const:`SqlColumnType.OBJECT`. + It is advised to assign the result of this method call to some + variable and reuse it. + Args: column_name (str): @@ -548,6 +556,7 @@ def get_object(self, column_name): Raises: ValueError: If a column with the given name does not exist. AssertionError: If the column name is not a string. + HazelcastSqlError: If the object cannot be deserialized. See Also: :attr:`metadata` @@ -561,7 +570,7 @@ def get_object(self, column_name): index = self._row_metadata.find_column(column_name) if index == SqlRowMetadata.COLUMN_NOT_FOUND: raise ValueError("Column '%s' doesn't exist" % column_name) - return self._row[index] + return self._deserialize_fn(self._row[index]) def get_object_with_index(self, column_index): """Gets the value of the column by index. @@ -569,6 +578,13 @@ def get_object_with_index(self, column_index): The class of the returned value depends on the SQL type of the column. No implicit conversions are performed on the value. + Warnings: + + Each call to this method might result in a deserialization if the + column type for this object is :const:`SqlColumnType.OBJECT`. + It is advised to assign the result of this method call to some + variable and reuse it. + Args: column_index (int): Zero-based column index. @@ -578,6 +594,7 @@ def get_object_with_index(self, column_index): Raises: IndexError: If the column index is out of bounds. AssertionError: If the column index is not an integer. + HazelcastSqlError: If the object cannot be deserialized. See Also: :attr:`metadata` @@ -585,7 +602,7 @@ def get_object_with_index(self, column_index): :attr:`SqlColumnMetadata.type` """ check_is_int(column_index, "Column index must be an integer") - return self._row[column_index] + return self._deserialize_fn(self._row[column_index]) @property def metadata(self): @@ -598,7 +615,7 @@ def __repr__(self): % ( self._row_metadata.get_column(i).name, get_attr_name(SqlColumnType, self._row_metadata.get_column(i).type), - self._row[i], + self.get_object_with_index(i), ) for i in range(self._row_metadata.column_count) ) @@ -679,12 +696,8 @@ def _get_current_row(self): list: The row pointed by the current position. """ - # The column might contain user objects so we have to deserialize it. - # Deserialization is no-op if the value is not Data. - return [ - self.deserialize_fn(self.page.get_value(i, self.position)) - for i in range(self.page.column_count) - ] + # Deserialization happens lazily while getting the object. + return [self.page.get_value(i, self.position) for i in range(self.page.column_count)] class _FutureProducingIterator(_IteratorBase): @@ -724,7 +737,7 @@ def _has_next_continuation(self, future): row = self._get_current_row() self.position += 1 - return SqlRow(self.row_metadata, row) + return SqlRow(self.row_metadata, row, self.deserialize_fn) def _has_next(self): """Returns a Future indicating whether there are more rows @@ -788,7 +801,7 @@ def __next__(self): row = self._get_current_row() self.position += 1 - return SqlRow(self.row_metadata, row) + return SqlRow(self.row_metadata, row, self.deserialize_fn) def _has_next(self): while self.position == self.row_count: @@ -1364,7 +1377,15 @@ def execute_statement(self, statement): raise self.re_raise(e, connection) def deserialize_object(self, obj): - return self._serialization_service.to_object(obj) + try: + return self._serialization_service.to_object(obj) + except Exception as e: + raise HazelcastSqlError( + self.get_client_id(), + _SqlErrorCode.GENERIC, + "Failed to deserialize query result value: %s" % try_to_get_error_message(e), + e, + ) def fetch(self, connection, query_id, cursor_buffer_size): """Fetches the next page of the query execution. diff --git a/tests/integration/backward_compatible/sql_test.py b/tests/integration/backward_compatible/sql_test.py index 488dab730b..ae5309a666 100644 --- a/tests/integration/backward_compatible/sql_test.py +++ b/tests/integration/backward_compatible/sql_test.py @@ -125,7 +125,7 @@ def _create_mapping_for_portable(self, factory_id, class_id, columns): create_mapping_query = """ CREATE MAPPING "%s" ( - __key INT, + __key INT%s %s ) TYPE IMaP @@ -137,6 +137,7 @@ def _create_mapping_for_portable(self, factory_id, class_id, columns): ) """ % ( self.map_name, + "," if len(columns) > 0 else "", ",\n".join(["%s %s" % (c_name, c_type) for c_name, c_type in columns.items()]), factory_id, class_id, @@ -542,6 +543,35 @@ def test_with_statement_when_iteration_throws(self): self.assertIsInstance(result.close(), ImmediateFuture) + def test_lazy_deserialization(self): + skip_if_client_version_older_than(self, "5.0") + + # Using a Portable that is not defined on the client-side. + self._create_mapping_for_portable(666, 1, {}) + + script = ( + """ + var m = instance_0.getMap("%s"); + m.put(1, new com.hazelcast.client.test.Employee(1, "Joe")); + """ + % self.map_name + ) + + res = self.rc.executeOnController(self.cluster.id, script, Lang.JAVASCRIPT) + self.assertTrue(res.success) + + with self.client.sql.execute('SELECT __key, this FROM "%s"' % self.map_name) as result: + rows = list(result) + self.assertEqual(1, len(rows)) + row = rows[0] + # We should be able to deserialize parts of the response + self.assertEqual(1, row.get_object("__key")) + + # We should throw lazily when we try to access the columns + # that are not deserializable + with self.assertRaises(HazelcastSqlError): + row.get_object("this") + @unittest.skipIf( compare_client_version("4.2") < 0, "Tests the features added in 4.2 version of the client"