From c3c896dac74a178a36a074f99a2507e6b127db94 Mon Sep 17 00:00:00 2001 From: Florian Gaudin-Delrieu <9217921+FlorianGD@users.noreply.github.com> Date: Tue, 15 Aug 2023 13:06:29 +0200 Subject: [PATCH] =?UTF-8?q?fix(datasets):=20do=20not=20double=20encode=20t?= =?UTF-8?q?he=20data=20as=20json=20when=20saving=20an=20A=E2=80=A6=20(#301?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(datasets): do not double encode the data as json when saving an APIDataSet Signed-off-by: Florian Gaudin-Delrieu Signed-off-by: Florian Gaudin-Delrieu * chore(lint): make pyling happy Signed-off-by: Florian Gaudin-Delrieu --------- Signed-off-by: Florian Gaudin-Delrieu Signed-off-by: Florian Gaudin-Delrieu Co-authored-by: Nok Lam Chan --- kedro-datasets/RELEASE.md | 2 + .../kedro_datasets/api/api_dataset.py | 4 +- kedro-datasets/tests/api/test_api_dataset.py | 39 +++++++++++++++---- .../databricks/test_managed_table_dataset.py | 2 +- .../tests/pandas/test_hdf_dataset.py | 1 - 5 files changed, 37 insertions(+), 11 deletions(-) diff --git a/kedro-datasets/RELEASE.md b/kedro-datasets/RELEASE.md index 65fad2953..672562c4a 100644 --- a/kedro-datasets/RELEASE.md +++ b/kedro-datasets/RELEASE.md @@ -2,6 +2,8 @@ ## Major features and improvements ## Bug fixes and other changes +* Fixed an issue on `api.APIDataSet` where the sent data was doubly converted to json + string (once by us and once by the `requests` library). ## Community contributions diff --git a/kedro-datasets/kedro_datasets/api/api_dataset.py b/kedro-datasets/kedro_datasets/api/api_dataset.py index 8414a23a9..a69281aaf 100644 --- a/kedro-datasets/kedro_datasets/api/api_dataset.py +++ b/kedro-datasets/kedro_datasets/api/api_dataset.py @@ -207,9 +207,9 @@ def _execute_save_with_chunks( def _execute_save_request(self, json_data: Any) -> requests.Response: try: - json_.loads(json_data) + self._request_args["json"] = json_.loads(json_data) except TypeError: - self._request_args["json"] = json_.dumps(json_data) + self._request_args["json"] = json_data try: response = requests.request(**self._request_args) response.raise_for_status() diff --git a/kedro-datasets/tests/api/test_api_dataset.py b/kedro-datasets/tests/api/test_api_dataset.py index c736f90b5..e87d1cd02 100644 --- a/kedro-datasets/tests/api/test_api_dataset.py +++ b/kedro-datasets/tests/api/test_api_dataset.py @@ -2,6 +2,7 @@ import base64 import json import socket +from typing import Any import pytest import requests @@ -23,7 +24,7 @@ TEST_METHOD = "GET" TEST_HEADERS = {"key": "value"} -TEST_SAVE_DATA = [json.dumps({"key1": "info1", "key2": "info2"})] +TEST_SAVE_DATA = [{"key1": "info1", "key2": "info2"}] class TestAPIDataSet: @@ -272,12 +273,24 @@ def test_socket_error(self, requests_mock): api_data_set.load() @pytest.mark.parametrize("method", POSSIBLE_METHODS) - def test_successful_save(self, requests_mock, method): + @pytest.mark.parametrize( + "data", + [TEST_SAVE_DATA, json.dumps(TEST_SAVE_DATA)], + ids=["data_as_dict", "data_as_json_string"], + ) + def test_successful_save(self, requests_mock, method, data): """ When we want to save some data on a server Given an APIDataSet class - Then check we get a response + Then check that the response is OK and the sent data is in the correct form. """ + + def json_callback( + request: requests.Request, context: Any # pylint: disable=unused-argument + ) -> dict: + """Callback that sends back the json.""" + return request.json() + if method in ["PUT", "POST"]: api_data_set = APIDataSet( url=TEST_URL, @@ -289,10 +302,12 @@ def test_successful_save(self, requests_mock, method): TEST_URL_WITH_PARAMS, headers=TEST_HEADERS, status_code=requests.codes.ok, + json=json_callback, ) - response = api_data_set._save(TEST_SAVE_DATA) - + response = api_data_set._save(data) assert isinstance(response, requests.Response) + assert response.json() == TEST_SAVE_DATA + elif method == "GET": api_data_set = APIDataSet( url=TEST_URL, @@ -315,6 +330,13 @@ def test_successful_save_with_json(self, requests_mock, save_methods): Given an APIDataSet class Then check we get a response """ + + def json_callback( + request: requests.Request, context: Any # pylint: disable=unused-argument + ) -> dict: + """Callback that sends back the json.""" + return request.json() + api_data_set = APIDataSet( url=TEST_URL, method=save_methods, @@ -324,17 +346,20 @@ def test_successful_save_with_json(self, requests_mock, save_methods): save_methods, TEST_URL, headers=TEST_HEADERS, - text=json.dumps(TEST_JSON_RESPONSE_DATA), + json=json_callback, ) response_list = api_data_set._save(TEST_SAVE_DATA) - assert isinstance(response_list, requests.Response) + # check that the data was sent in the correct format + assert response_list.json() == TEST_SAVE_DATA response_dict = api_data_set._save({"item1": "key1"}) assert isinstance(response_dict, requests.Response) + assert response_dict.json() == {"item1": "key1"} response_json = api_data_set._save(TEST_SAVE_DATA[0]) assert isinstance(response_json, requests.Response) + assert response_json.json() == TEST_SAVE_DATA[0] @pytest.mark.parametrize("save_methods", SAVE_METHODS) def test_save_http_error(self, requests_mock, save_methods): diff --git a/kedro-datasets/tests/databricks/test_managed_table_dataset.py b/kedro-datasets/tests/databricks/test_managed_table_dataset.py index 9aae08707..c4dfce067 100644 --- a/kedro-datasets/tests/databricks/test_managed_table_dataset.py +++ b/kedro-datasets/tests/databricks/test_managed_table_dataset.py @@ -187,7 +187,7 @@ def test_full_table(self): assert unity_ds._table.full_table_location() == "`default`.`test`" with pytest.raises(TypeError): - ManagedTableDataSet() # pylint: disable=no-value-for-parameter + ManagedTableDataSet() def test_describe(self): unity_ds = ManagedTableDataSet(table="test") diff --git a/kedro-datasets/tests/pandas/test_hdf_dataset.py b/kedro-datasets/tests/pandas/test_hdf_dataset.py index 67e5468ef..02a796d61 100644 --- a/kedro-datasets/tests/pandas/test_hdf_dataset.py +++ b/kedro-datasets/tests/pandas/test_hdf_dataset.py @@ -128,7 +128,6 @@ def test_save_and_load_df_with_categorical_variables(self, hdf_data_set): def test_thread_lock_usage(self, hdf_data_set, dummy_dataframe, mocker): """Test thread lock usage.""" - # pylint: disable=no-member mocked_lock = HDFDataSet._lock mocked_lock.assert_not_called()