From fd8162d5bf384ef666c01ef2c529d01fd9fa8354 Mon Sep 17 00:00:00 2001 From: Julius Hetzel Date: Fri, 23 Jun 2023 16:53:05 +0200 Subject: [PATCH] =?UTF-8?q?Check=20for=20the=20whole=20protocol=20includin?= =?UTF-8?q?g=20delimited=20(://)=20when=20j=E2=80=A6=20(#2715)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Check for the whole protocol including delimited (://) when joining path for partitions in partitioned data set Signed-off-by: Julius Hetzel * Add bugfix description to release nodes Signed-off-by: Julius Hetzel --------- Signed-off-by: Julius Hetzel --- RELEASE.md | 1 + kedro/io/partitioned_dataset.py | 7 ++++--- tests/io/test_partitioned_dataset.py | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index c8ffa1188a..260ebe289c 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -3,6 +3,7 @@ ## Major features and improvements ## Bug fixes and other changes +* Compare for protocol and delimiter in `PartitionedDataSet` to be able to pass the protocol to partitions which paths starts with the same characters as the protocol (e.g. `s3://s3-my-bucket`). ## Breaking changes to the API diff --git a/kedro/io/partitioned_dataset.py b/kedro/io/partitioned_dataset.py index 07b34ca65c..683abac283 100644 --- a/kedro/io/partitioned_dataset.py +++ b/kedro/io/partitioned_dataset.py @@ -263,10 +263,11 @@ def _list_partitions(self) -> list[str]: ] def _join_protocol(self, path: str) -> str: - if self._path.startswith(self._protocol) and not path.startswith( - self._protocol + protocol_prefix = f"{self._protocol}://" + if self._path.startswith(protocol_prefix) and not path.startswith( + protocol_prefix ): - return f"{self._protocol}://{path}" + return f"{protocol_prefix}{path}" return path def _partition_to_path(self, path: str): diff --git a/tests/io/test_partitioned_dataset.py b/tests/io/test_partitioned_dataset.py index 05993593ae..2d0af84a61 100644 --- a/tests/io/test_partitioned_dataset.py +++ b/tests/io/test_partitioned_dataset.py @@ -401,7 +401,7 @@ def test_dataset_creds(self, pds_config, expected_ds_creds, global_creds): assert pds._credentials == global_creds -BUCKET_NAME = "fake_bucket_name" +BUCKET_NAME = "s3_fake_bucket_name" S3_DATASET_DEFINITION = [ "pandas.CSVDataSet", "kedro.extras.datasets.pandas.CSVDataSet",