diff --git a/kedro-datasets/RELEASE.md b/kedro-datasets/RELEASE.md index cb3332edb..9149a8a08 100755 --- a/kedro-datasets/RELEASE.md +++ b/kedro-datasets/RELEASE.md @@ -6,11 +6,13 @@ ## Bug fixes and other changes * Fixed bug with loading models saved with `TensorFlowModelDataset`. +* Make dataset parameters keyword-only. ## Community contributions Many thanks to the following Kedroids for contributing PRs to this release: * [Edouard59](https://github.com/Edouard59) * [Miguel Rodriguez Gutierrez](https://github.com/MigQ2) +* [felixscherz](https://github.com/felixscherz) # Release 1.8.0 ## Major features and improvements diff --git a/kedro-datasets/kedro_datasets/api/api_dataset.py b/kedro-datasets/kedro_datasets/api/api_dataset.py index 48de22128..b4c979304 100644 --- a/kedro-datasets/kedro_datasets/api/api_dataset.py +++ b/kedro-datasets/kedro_datasets/api/api_dataset.py @@ -90,6 +90,7 @@ class APIDataset(AbstractDataset[None, requests.Response]): def __init__( # noqa: PLR0913 self, + *, url: str, method: str = "GET", load_args: Dict[str, Any] = None, diff --git a/kedro-datasets/kedro_datasets/biosequence/biosequence_dataset.py b/kedro-datasets/kedro_datasets/biosequence/biosequence_dataset.py index b735408fd..61a03047c 100644 --- a/kedro-datasets/kedro_datasets/biosequence/biosequence_dataset.py +++ b/kedro-datasets/kedro_datasets/biosequence/biosequence_dataset.py @@ -47,6 +47,7 @@ class BioSequenceDataset(AbstractDataset[List, List]): def __init__( # noqa: PLR0913 self, + *, filepath: str, load_args: Dict[str, Any] = None, save_args: Dict[str, Any] = None, diff --git a/kedro-datasets/kedro_datasets/dask/parquet_dataset.py b/kedro-datasets/kedro_datasets/dask/parquet_dataset.py index 6bafe81ee..d0127513e 100644 --- a/kedro-datasets/kedro_datasets/dask/parquet_dataset.py +++ b/kedro-datasets/kedro_datasets/dask/parquet_dataset.py @@ -83,6 +83,7 @@ class ParquetDataset(AbstractDataset[dd.DataFrame, dd.DataFrame]): def __init__( # noqa: PLR0913 self, + *, filepath: str, load_args: Dict[str, Any] = None, save_args: Dict[str, Any] = None, diff --git a/kedro-datasets/kedro_datasets/databricks/managed_table_dataset.py b/kedro-datasets/kedro_datasets/databricks/managed_table_dataset.py index 130a7775b..3dd019a1a 100644 --- a/kedro-datasets/kedro_datasets/databricks/managed_table_dataset.py +++ b/kedro-datasets/kedro_datasets/databricks/managed_table_dataset.py @@ -197,6 +197,7 @@ class ManagedTableDataset(AbstractVersionedDataset): def __init__( # noqa: PLR0913 self, + *, table: str, catalog: str = None, database: str = "default", @@ -204,7 +205,6 @@ def __init__( # noqa: PLR0913 dataframe_type: str = "spark", primary_key: Optional[Union[str, List[str]]] = None, version: Version = None, - *, # the following parameters are used by project hooks # to create or update table properties schema: Dict[str, Any] = None, diff --git a/kedro-datasets/kedro_datasets/email/message_dataset.py b/kedro-datasets/kedro_datasets/email/message_dataset.py index b6b8dcd85..fd4a5e727 100644 --- a/kedro-datasets/kedro_datasets/email/message_dataset.py +++ b/kedro-datasets/kedro_datasets/email/message_dataset.py @@ -52,6 +52,7 @@ class EmailMessageDataset(AbstractVersionedDataset[Message, Message]): def __init__( # noqa: PLR0913 self, + *, filepath: str, load_args: Dict[str, Any] = None, save_args: Dict[str, Any] = None, diff --git a/kedro-datasets/kedro_datasets/geopandas/geojson_dataset.py b/kedro-datasets/kedro_datasets/geopandas/geojson_dataset.py index f4e231fb3..a284b46bc 100644 --- a/kedro-datasets/kedro_datasets/geopandas/geojson_dataset.py +++ b/kedro-datasets/kedro_datasets/geopandas/geojson_dataset.py @@ -48,6 +48,7 @@ class GeoJSONDataset( def __init__( # noqa: PLR0913 self, + *, filepath: str, load_args: Dict[str, Any] = None, save_args: Dict[str, Any] = None, diff --git a/kedro-datasets/kedro_datasets/holoviews/holoviews_writer.py b/kedro-datasets/kedro_datasets/holoviews/holoviews_writer.py index 18e817c9b..8bae8f6ea 100644 --- a/kedro-datasets/kedro_datasets/holoviews/holoviews_writer.py +++ b/kedro-datasets/kedro_datasets/holoviews/holoviews_writer.py @@ -28,7 +28,7 @@ class HoloviewsWriter(AbstractVersionedDataset[HoloViews, NoReturn]): >>> from kedro_datasets.holoviews import HoloviewsWriter >>> >>> curve = hv.Curve(range(10)) - >>> holoviews_writer = HoloviewsWriter("/tmp/holoviews") + >>> holoviews_writer = HoloviewsWriter(filepath="/tmp/holoviews") >>> >>> holoviews_writer.save(curve) @@ -38,6 +38,7 @@ class HoloviewsWriter(AbstractVersionedDataset[HoloViews, NoReturn]): def __init__( # noqa: PLR0913 self, + *, filepath: str, fs_args: Dict[str, Any] = None, credentials: Dict[str, Any] = None, diff --git a/kedro-datasets/kedro_datasets/json/json_dataset.py b/kedro-datasets/kedro_datasets/json/json_dataset.py index 25c8b1950..2579f1afd 100644 --- a/kedro-datasets/kedro_datasets/json/json_dataset.py +++ b/kedro-datasets/kedro_datasets/json/json_dataset.py @@ -50,6 +50,7 @@ class JSONDataset(AbstractVersionedDataset[Any, Any]): def __init__( # noqa: PLR0913 self, + *, filepath: str, save_args: Dict[str, Any] = None, version: Version = None, diff --git a/kedro-datasets/kedro_datasets/matplotlib/matplotlib_writer.py b/kedro-datasets/kedro_datasets/matplotlib/matplotlib_writer.py index bea1cde1c..5060ff8a8 100644 --- a/kedro-datasets/kedro_datasets/matplotlib/matplotlib_writer.py +++ b/kedro-datasets/kedro_datasets/matplotlib/matplotlib_writer.py @@ -103,6 +103,7 @@ class MatplotlibWriter( def __init__( # noqa: PLR0913 self, + *, filepath: str, fs_args: Dict[str, Any] = None, credentials: Dict[str, Any] = None, diff --git a/kedro-datasets/kedro_datasets/networkx/gml_dataset.py b/kedro-datasets/kedro_datasets/networkx/gml_dataset.py index 544f72f04..37d03e4b4 100644 --- a/kedro-datasets/kedro_datasets/networkx/gml_dataset.py +++ b/kedro-datasets/kedro_datasets/networkx/gml_dataset.py @@ -38,6 +38,7 @@ class GMLDataset(AbstractVersionedDataset[networkx.Graph, networkx.Graph]): def __init__( # noqa: PLR0913 self, + *, filepath: str, load_args: Dict[str, Any] = None, save_args: Dict[str, Any] = None, diff --git a/kedro-datasets/kedro_datasets/networkx/graphml_dataset.py b/kedro-datasets/kedro_datasets/networkx/graphml_dataset.py index 101bdf22d..63351d062 100644 --- a/kedro-datasets/kedro_datasets/networkx/graphml_dataset.py +++ b/kedro-datasets/kedro_datasets/networkx/graphml_dataset.py @@ -37,6 +37,7 @@ class GraphMLDataset(AbstractVersionedDataset[networkx.Graph, networkx.Graph]): def __init__( # noqa: PLR0913 self, + *, filepath: str, load_args: Dict[str, Any] = None, save_args: Dict[str, Any] = None, diff --git a/kedro-datasets/kedro_datasets/networkx/json_dataset.py b/kedro-datasets/kedro_datasets/networkx/json_dataset.py index af793ea3a..27a2f0fa7 100644 --- a/kedro-datasets/kedro_datasets/networkx/json_dataset.py +++ b/kedro-datasets/kedro_datasets/networkx/json_dataset.py @@ -38,6 +38,7 @@ class JSONDataset(AbstractVersionedDataset[networkx.Graph, networkx.Graph]): def __init__( # noqa: PLR0913 self, + *, filepath: str, load_args: Dict[str, Any] = None, save_args: Dict[str, Any] = None, diff --git a/kedro-datasets/kedro_datasets/pandas/csv_dataset.py b/kedro-datasets/kedro_datasets/pandas/csv_dataset.py index d81cdacac..f16d7ac1b 100644 --- a/kedro-datasets/kedro_datasets/pandas/csv_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/csv_dataset.py @@ -70,6 +70,7 @@ class CSVDataset(AbstractVersionedDataset[pd.DataFrame, pd.DataFrame]): def __init__( # noqa: PLR0913 self, + *, filepath: str, load_args: Dict[str, Any] = None, save_args: Dict[str, Any] = None, diff --git a/kedro-datasets/kedro_datasets/pandas/deltatable_dataset.py b/kedro-datasets/kedro_datasets/pandas/deltatable_dataset.py index 9252dfa2d..e492f0375 100644 --- a/kedro-datasets/kedro_datasets/pandas/deltatable_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/deltatable_dataset.py @@ -86,6 +86,7 @@ class DeltaTableDataset(AbstractDataset): def __init__( # noqa: PLR0913 self, + *, filepath: Optional[str] = None, catalog_type: Optional[DataCatalog] = None, catalog_name: Optional[str] = None, diff --git a/kedro-datasets/kedro_datasets/pandas/excel_dataset.py b/kedro-datasets/kedro_datasets/pandas/excel_dataset.py index 980c2efad..67b4a6565 100644 --- a/kedro-datasets/kedro_datasets/pandas/excel_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/excel_dataset.py @@ -110,6 +110,7 @@ class ExcelDataset( def __init__( # noqa: PLR0913 self, + *, filepath: str, engine: str = "openpyxl", load_args: Dict[str, Any] = None, diff --git a/kedro-datasets/kedro_datasets/pandas/feather_dataset.py b/kedro-datasets/kedro_datasets/pandas/feather_dataset.py index 3611382d2..3282ab907 100644 --- a/kedro-datasets/kedro_datasets/pandas/feather_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/feather_dataset.py @@ -71,6 +71,7 @@ class FeatherDataset(AbstractVersionedDataset[pd.DataFrame, pd.DataFrame]): def __init__( # noqa: PLR0913 self, + *, filepath: str, load_args: Dict[str, Any] = None, save_args: Dict[str, Any] = None, diff --git a/kedro-datasets/kedro_datasets/pandas/gbq_dataset.py b/kedro-datasets/kedro_datasets/pandas/gbq_dataset.py index e3054309b..fedd05442 100644 --- a/kedro-datasets/kedro_datasets/pandas/gbq_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/gbq_dataset.py @@ -64,6 +64,7 @@ class GBQTableDataset(AbstractDataset[None, pd.DataFrame]): def __init__( # noqa: PLR0913 self, + *, dataset: str, table_name: str, project: str = None, diff --git a/kedro-datasets/kedro_datasets/pandas/generic_dataset.py b/kedro-datasets/kedro_datasets/pandas/generic_dataset.py index b48e0f9ba..613a91383 100644 --- a/kedro-datasets/kedro_datasets/pandas/generic_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/generic_dataset.py @@ -84,6 +84,7 @@ class GenericDataset(AbstractVersionedDataset[pd.DataFrame, pd.DataFrame]): def __init__( # noqa: PLR0913 self, + *, filepath: str, file_format: str, load_args: Dict[str, Any] = None, diff --git a/kedro-datasets/kedro_datasets/pandas/hdf_dataset.py b/kedro-datasets/kedro_datasets/pandas/hdf_dataset.py index 69132e24e..4865e034e 100644 --- a/kedro-datasets/kedro_datasets/pandas/hdf_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/hdf_dataset.py @@ -57,6 +57,7 @@ class HDFDataset(AbstractVersionedDataset[pd.DataFrame, pd.DataFrame]): def __init__( # noqa: PLR0913 self, + *, filepath: str, key: str, load_args: Dict[str, Any] = None, diff --git a/kedro-datasets/kedro_datasets/pandas/json_dataset.py b/kedro-datasets/kedro_datasets/pandas/json_dataset.py index f3ae9893f..5c075855c 100644 --- a/kedro-datasets/kedro_datasets/pandas/json_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/json_dataset.py @@ -65,6 +65,7 @@ class JSONDataset(AbstractVersionedDataset[pd.DataFrame, pd.DataFrame]): def __init__( # noqa: PLR0913 self, + *, filepath: str, load_args: Dict[str, Any] = None, save_args: Dict[str, Any] = None, diff --git a/kedro-datasets/kedro_datasets/pandas/parquet_dataset.py b/kedro-datasets/kedro_datasets/pandas/parquet_dataset.py index 22c7e495e..bb925a8d4 100644 --- a/kedro-datasets/kedro_datasets/pandas/parquet_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/parquet_dataset.py @@ -76,6 +76,7 @@ class ParquetDataset(AbstractVersionedDataset[pd.DataFrame, pd.DataFrame]): def __init__( # noqa: PLR0913 self, + *, filepath: str, load_args: Dict[str, Any] = None, save_args: Dict[str, Any] = None, diff --git a/kedro-datasets/kedro_datasets/pandas/sql_dataset.py b/kedro-datasets/kedro_datasets/pandas/sql_dataset.py index ca5fdcfc2..f708432ba 100644 --- a/kedro-datasets/kedro_datasets/pandas/sql_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/sql_dataset.py @@ -153,6 +153,7 @@ class SQLTableDataset(AbstractDataset[pd.DataFrame, pd.DataFrame]): def __init__( # noqa: PLR0913 self, + *, table_name: str, credentials: dict[str, Any], load_args: dict[str, Any] = None, diff --git a/kedro-datasets/kedro_datasets/pandas/xml_dataset.py b/kedro-datasets/kedro_datasets/pandas/xml_dataset.py index 126d7d402..731c77970 100644 --- a/kedro-datasets/kedro_datasets/pandas/xml_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/xml_dataset.py @@ -48,6 +48,7 @@ class XMLDataset(AbstractVersionedDataset[pd.DataFrame, pd.DataFrame]): def __init__( # noqa: PLR0913 self, + *, filepath: str, load_args: Dict[str, Any] = None, save_args: Dict[str, Any] = None, diff --git a/kedro-datasets/kedro_datasets/partitions/incremental_dataset.py b/kedro-datasets/kedro_datasets/partitions/incremental_dataset.py index cab476611..59eb91971 100644 --- a/kedro-datasets/kedro_datasets/partitions/incremental_dataset.py +++ b/kedro-datasets/kedro_datasets/partitions/incremental_dataset.py @@ -67,6 +67,7 @@ class IncrementalDataset(PartitionedDataset): def __init__( # noqa: PLR0913 self, + *, path: str, dataset: str | type[AbstractDataset] | dict[str, Any], checkpoint: str | dict[str, Any] | None = None, diff --git a/kedro-datasets/kedro_datasets/pickle/pickle_dataset.py b/kedro-datasets/kedro_datasets/pickle/pickle_dataset.py index 2c13061c2..c1a24524a 100644 --- a/kedro-datasets/kedro_datasets/pickle/pickle_dataset.py +++ b/kedro-datasets/kedro_datasets/pickle/pickle_dataset.py @@ -71,6 +71,7 @@ class PickleDataset(AbstractVersionedDataset[Any, Any]): def __init__( # noqa: PLR0913 self, + *, filepath: str, backend: str = "pickle", load_args: Dict[str, Any] = None, diff --git a/kedro-datasets/kedro_datasets/pillow/image_dataset.py b/kedro-datasets/kedro_datasets/pillow/image_dataset.py index 2bffa8716..67855875f 100644 --- a/kedro-datasets/kedro_datasets/pillow/image_dataset.py +++ b/kedro-datasets/kedro_datasets/pillow/image_dataset.py @@ -34,6 +34,7 @@ class ImageDataset(AbstractVersionedDataset[Image.Image, Image.Image]): def __init__( # noqa: PLR0913 self, + *, filepath: str, save_args: Dict[str, Any] = None, version: Version = None, diff --git a/kedro-datasets/kedro_datasets/plotly/json_dataset.py b/kedro-datasets/kedro_datasets/plotly/json_dataset.py index d16eab615..ea51b3e2c 100644 --- a/kedro-datasets/kedro_datasets/plotly/json_dataset.py +++ b/kedro-datasets/kedro_datasets/plotly/json_dataset.py @@ -52,6 +52,7 @@ class JSONDataset( def __init__( # noqa: PLR0913 self, + *, filepath: str, load_args: Dict[str, Any] = None, save_args: Dict[str, Any] = None, diff --git a/kedro-datasets/kedro_datasets/plotly/plotly_dataset.py b/kedro-datasets/kedro_datasets/plotly/plotly_dataset.py index 67dd63100..303fb3612 100644 --- a/kedro-datasets/kedro_datasets/plotly/plotly_dataset.py +++ b/kedro-datasets/kedro_datasets/plotly/plotly_dataset.py @@ -68,6 +68,7 @@ class PlotlyDataset(JSONDataset): def __init__( # noqa: PLR0913 self, + *, filepath: str, plotly_args: Dict[str, Any], load_args: Dict[str, Any] = None, @@ -113,7 +114,14 @@ def __init__( # noqa: PLR0913 metadata: Any arbitrary metadata. This is ignored by Kedro, but may be consumed by users or external plugins. """ - super().__init__(filepath, load_args, save_args, version, credentials, fs_args) + super().__init__( + filepath=filepath, + load_args=load_args, + save_args=save_args, + version=version, + credentials=credentials, + fs_args=fs_args, + ) self._plotly_args = plotly_args _fs_args = deepcopy(fs_args) or {} diff --git a/kedro-datasets/kedro_datasets/polars/csv_dataset.py b/kedro-datasets/kedro_datasets/polars/csv_dataset.py index 7b20da814..8ee0a49f1 100644 --- a/kedro-datasets/kedro_datasets/polars/csv_dataset.py +++ b/kedro-datasets/kedro_datasets/polars/csv_dataset.py @@ -68,6 +68,7 @@ class CSVDataset(AbstractVersionedDataset[pl.DataFrame, pl.DataFrame]): def __init__( # noqa: PLR0913 self, + *, filepath: str, load_args: Dict[str, Any] = None, save_args: Dict[str, Any] = None, diff --git a/kedro-datasets/kedro_datasets/polars/eager_polars_dataset.py b/kedro-datasets/kedro_datasets/polars/eager_polars_dataset.py index 007c123ef..b72642899 100644 --- a/kedro-datasets/kedro_datasets/polars/eager_polars_dataset.py +++ b/kedro-datasets/kedro_datasets/polars/eager_polars_dataset.py @@ -54,6 +54,7 @@ class EagerPolarsDataset(AbstractVersionedDataset[pl.DataFrame, pl.DataFrame]): def __init__( # noqa: PLR0913 self, + *, filepath: str, file_format: str, load_args: Dict[str, Any] = None, diff --git a/kedro-datasets/kedro_datasets/polars/lazy_polars_dataset.py b/kedro-datasets/kedro_datasets/polars/lazy_polars_dataset.py index 0de1355e6..6a57f20bd 100644 --- a/kedro-datasets/kedro_datasets/polars/lazy_polars_dataset.py +++ b/kedro-datasets/kedro_datasets/polars/lazy_polars_dataset.py @@ -75,6 +75,7 @@ class LazyPolarsDataset(AbstractVersionedDataset[pl.LazyFrame, PolarsFrame]): def __init__( # noqa: PLR0913 self, + *, filepath: str, file_format: str, load_args: Optional[Dict[str, Any]] = None, diff --git a/kedro-datasets/kedro_datasets/redis/redis_dataset.py b/kedro-datasets/kedro_datasets/redis/redis_dataset.py index 13003b6e2..8cccc1423 100644 --- a/kedro-datasets/kedro_datasets/redis/redis_dataset.py +++ b/kedro-datasets/kedro_datasets/redis/redis_dataset.py @@ -61,6 +61,7 @@ class PickleDataset(AbstractDataset[Any, Any]): def __init__( # noqa: PLR0913 self, + *, key: str, backend: str = "pickle", load_args: Dict[str, Any] = None, diff --git a/kedro-datasets/kedro_datasets/snowflake/snowpark_dataset.py b/kedro-datasets/kedro_datasets/snowflake/snowpark_dataset.py index 3eea86127..7463236a9 100644 --- a/kedro-datasets/kedro_datasets/snowflake/snowpark_dataset.py +++ b/kedro-datasets/kedro_datasets/snowflake/snowpark_dataset.py @@ -103,6 +103,7 @@ class SnowparkTableDataset(AbstractDataset): def __init__( # noqa: PLR0913 self, + *, table_name: str, schema: str = None, database: str = None, diff --git a/kedro-datasets/kedro_datasets/spark/deltatable_dataset.py b/kedro-datasets/kedro_datasets/spark/deltatable_dataset.py index 5f7673ed0..313cbb821 100644 --- a/kedro-datasets/kedro_datasets/spark/deltatable_dataset.py +++ b/kedro-datasets/kedro_datasets/spark/deltatable_dataset.py @@ -65,7 +65,7 @@ class DeltaTableDataset(AbstractDataset[None, DeltaTable]): # using ``ThreadRunner`` instead _SINGLE_PROCESS = True - def __init__(self, filepath: str, metadata: Dict[str, Any] = None) -> None: + def __init__(self, *, filepath: str, metadata: Dict[str, Any] = None) -> None: """Creates a new instance of ``DeltaTableDataset``. Args: diff --git a/kedro-datasets/kedro_datasets/spark/spark_dataset.py b/kedro-datasets/kedro_datasets/spark/spark_dataset.py index 38211b4eb..c73b8dcf6 100644 --- a/kedro-datasets/kedro_datasets/spark/spark_dataset.py +++ b/kedro-datasets/kedro_datasets/spark/spark_dataset.py @@ -262,6 +262,7 @@ class SparkDataset(AbstractVersionedDataset[DataFrame, DataFrame]): def __init__( # noqa: PLR0913 self, + *, filepath: str, file_format: str = "parquet", load_args: Dict[str, Any] = None, diff --git a/kedro-datasets/kedro_datasets/spark/spark_hive_dataset.py b/kedro-datasets/kedro_datasets/spark/spark_hive_dataset.py index bb14a63c0..aa0bf7ea7 100644 --- a/kedro-datasets/kedro_datasets/spark/spark_hive_dataset.py +++ b/kedro-datasets/kedro_datasets/spark/spark_hive_dataset.py @@ -70,6 +70,7 @@ class SparkHiveDataset(AbstractDataset[DataFrame, DataFrame]): def __init__( # noqa: PLR0913 self, + *, database: str, table: str, write_mode: str = "errorifexists", diff --git a/kedro-datasets/kedro_datasets/spark/spark_jdbc_dataset.py b/kedro-datasets/kedro_datasets/spark/spark_jdbc_dataset.py index 22a4ab110..a04277e82 100644 --- a/kedro-datasets/kedro_datasets/spark/spark_jdbc_dataset.py +++ b/kedro-datasets/kedro_datasets/spark/spark_jdbc_dataset.py @@ -70,6 +70,7 @@ class SparkJDBCDataset(AbstractDataset[DataFrame, DataFrame]): def __init__( # noqa: PLR0913 self, + *, url: str, table: str, credentials: Dict[str, Any] = None, diff --git a/kedro-datasets/kedro_datasets/spark/spark_streaming_dataset.py b/kedro-datasets/kedro_datasets/spark/spark_streaming_dataset.py index 8cf9724c7..5bd996d3c 100644 --- a/kedro-datasets/kedro_datasets/spark/spark_streaming_dataset.py +++ b/kedro-datasets/kedro_datasets/spark/spark_streaming_dataset.py @@ -42,6 +42,7 @@ class SparkStreamingDataset(AbstractDataset): def __init__( self, + *, filepath: str = "", file_format: str = "", save_args: Dict[str, Any] = None, diff --git a/kedro-datasets/kedro_datasets/svmlight/svmlight_dataset.py b/kedro-datasets/kedro_datasets/svmlight/svmlight_dataset.py index 5341d055f..293f12810 100644 --- a/kedro-datasets/kedro_datasets/svmlight/svmlight_dataset.py +++ b/kedro-datasets/kedro_datasets/svmlight/svmlight_dataset.py @@ -88,6 +88,7 @@ class SVMLightDataset(AbstractVersionedDataset[_DI, _DO]): def __init__( # noqa: PLR0913 self, + *, filepath: str, load_args: Dict[str, Any] = None, save_args: Dict[str, Any] = None, diff --git a/kedro-datasets/kedro_datasets/tensorflow/tensorflow_model_dataset.py b/kedro-datasets/kedro_datasets/tensorflow/tensorflow_model_dataset.py index e62d6b2c9..e9acfedae 100644 --- a/kedro-datasets/kedro_datasets/tensorflow/tensorflow_model_dataset.py +++ b/kedro-datasets/kedro_datasets/tensorflow/tensorflow_model_dataset.py @@ -62,6 +62,7 @@ class TensorFlowModelDataset(AbstractVersionedDataset[tf.keras.Model, tf.keras.M def __init__( # noqa: PLR0913 self, + *, filepath: str, load_args: Dict[str, Any] = None, save_args: Dict[str, Any] = None, diff --git a/kedro-datasets/kedro_datasets/text/text_dataset.py b/kedro-datasets/kedro_datasets/text/text_dataset.py index b784d5e9f..b734bb429 100644 --- a/kedro-datasets/kedro_datasets/text/text_dataset.py +++ b/kedro-datasets/kedro_datasets/text/text_dataset.py @@ -44,6 +44,7 @@ class TextDataset(AbstractVersionedDataset[str, str]): def __init__( # noqa: PLR0913 self, + *, filepath: str, version: Version = None, credentials: Dict[str, Any] = None, diff --git a/kedro-datasets/kedro_datasets/video/video_dataset.py b/kedro-datasets/kedro_datasets/video/video_dataset.py index d2520fd5d..0f10b7681 100644 --- a/kedro-datasets/kedro_datasets/video/video_dataset.py +++ b/kedro-datasets/kedro_datasets/video/video_dataset.py @@ -268,6 +268,7 @@ class VideoDataset(AbstractDataset[AbstractVideo, AbstractVideo]): def __init__( # noqa: PLR0913 self, + *, filepath: str, fourcc: Optional[str] = "mp4v", credentials: Dict[str, Any] = None, diff --git a/kedro-datasets/kedro_datasets/yaml/yaml_dataset.py b/kedro-datasets/kedro_datasets/yaml/yaml_dataset.py index ee5683e31..f3b0ac7e9 100644 --- a/kedro-datasets/kedro_datasets/yaml/yaml_dataset.py +++ b/kedro-datasets/kedro_datasets/yaml/yaml_dataset.py @@ -47,6 +47,7 @@ class YAMLDataset(AbstractVersionedDataset[Dict, Dict]): def __init__( # noqa: PLR0913 self, + *, filepath: str, save_args: Dict[str, Any] = None, version: Version = None, diff --git a/kedro-datasets/tests/holoviews/test_holoviews_writer.py b/kedro-datasets/tests/holoviews/test_holoviews_writer.py index 866637b9b..9f71ede9f 100644 --- a/kedro-datasets/tests/holoviews/test_holoviews_writer.py +++ b/kedro-datasets/tests/holoviews/test_holoviews_writer.py @@ -27,12 +27,14 @@ def dummy_hv_object(): @pytest.fixture def hv_writer(filepath_png, save_args, fs_args): - return HoloviewsWriter(filepath_png, save_args=save_args, fs_args=fs_args) + return HoloviewsWriter(filepath=filepath_png, save_args=save_args, fs_args=fs_args) @pytest.fixture def versioned_hv_writer(filepath_png, load_version, save_version): - return HoloviewsWriter(filepath_png, version=Version(load_version, save_version)) + return HoloviewsWriter( + filepath=filepath_png, version=Version(load_version, save_version) + ) @pytest.mark.skipif( @@ -63,7 +65,7 @@ def test_save_data(self, tmp_path, dummy_hv_object, hv_writer): ) def test_open_extra_args(self, tmp_path, fs_args, mocker): fs_mock = mocker.patch("fsspec.filesystem") - writer = HoloviewsWriter(str(tmp_path), fs_args) + writer = HoloviewsWriter(filepath=str(tmp_path), fs_args=fs_args) fs_mock.assert_called_once_with("file", auto_mkdir=True, storage_option="value") assert writer._fs_open_args_save == fs_args["open_args_save"] diff --git a/kedro-datasets/tests/pandas/test_csv_dataset.py b/kedro-datasets/tests/pandas/test_csv_dataset.py index c48352a01..0a042918d 100644 --- a/kedro-datasets/tests/pandas/test_csv_dataset.py +++ b/kedro-datasets/tests/pandas/test_csv_dataset.py @@ -399,7 +399,7 @@ def test_load_and_confirm(self, mocker, mocked_csv_in_s3, mocked_dataframe): (any implementation using S3FileSystem). Likely to be a bug with moto (tested with moto==4.0.8, moto==3.0.4) -- see #67 """ - df = CSVDataset(mocked_csv_in_s3) + df = CSVDataset(filepath=mocked_csv_in_s3) assert df._protocol == "s3" # if Python >= 3.10, modify test procedure (see #67) if sys.version_info[1] >= 10: diff --git a/kedro-datasets/tests/pandas/test_deltatable_dataset.py b/kedro-datasets/tests/pandas/test_deltatable_dataset.py index d38fe7c48..29427b5ab 100644 --- a/kedro-datasets/tests/pandas/test_deltatable_dataset.py +++ b/kedro-datasets/tests/pandas/test_deltatable_dataset.py @@ -73,7 +73,7 @@ def test_append(self, deltatable_dataset_from_path, dummy_df): def test_versioning(self, filepath, dummy_df): """Test loading different versions.""" - deltatable_dataset_from_path = DeltaTableDataset(filepath) + deltatable_dataset_from_path = DeltaTableDataset(filepath=filepath) deltatable_dataset_from_path.save(dummy_df) assert deltatable_dataset_from_path.get_loaded_version() == 0 new_df = pd.DataFrame({"col1": [0, 0], "col2": [1, 1], "col3": [2, 2]}) @@ -81,14 +81,14 @@ def test_versioning(self, filepath, dummy_df): assert deltatable_dataset_from_path.get_loaded_version() == 1 deltatable_dataset_from_path0 = DeltaTableDataset( - filepath, load_args={"version": 0} + filepath=filepath, load_args={"version": 0} ) version_0 = deltatable_dataset_from_path0.load() assert deltatable_dataset_from_path0.get_loaded_version() == 0 assert_frame_equal(dummy_df, version_0) deltatable_dataset_from_path1 = DeltaTableDataset( - filepath, load_args={"version": 1} + filepath=filepath, load_args={"version": 1} ) version_1 = deltatable_dataset_from_path1.load() assert deltatable_dataset_from_path1.get_loaded_version() == 1 @@ -108,7 +108,7 @@ def test_property_schema(self, deltatable_dataset_from_path, dummy_df): def test_describe(self, filepath): """Test the describe method.""" - deltatable_dataset_from_path = DeltaTableDataset(filepath) + deltatable_dataset_from_path = DeltaTableDataset(filepath=filepath) desc = deltatable_dataset_from_path._describe() assert desc["filepath"] == filepath assert desc["version"] is None @@ -152,7 +152,7 @@ def test_unsupported_write_mode(self, filepath): """Test write mode not supported.""" pattern = "Write mode unsupported is not supported" with pytest.raises(DatasetError, match=pattern): - DeltaTableDataset(filepath, save_args={"mode": "unsupported"}) + DeltaTableDataset(filepath=filepath, save_args={"mode": "unsupported"}) def test_metadata(self, deltatable_dataset_from_path, dummy_df): """Test metadata property exists and return a metadata object.""" diff --git a/kedro-datasets/tests/pandas/test_gbq_dataset.py b/kedro-datasets/tests/pandas/test_gbq_dataset.py index 65c39d2ab..7d3b23b3b 100644 --- a/kedro-datasets/tests/pandas/test_gbq_dataset.py +++ b/kedro-datasets/tests/pandas/test_gbq_dataset.py @@ -72,7 +72,7 @@ def test_exists(self, mock_bigquery_client): "exists", ] - dataset = GBQTableDataset(DATASET, TABLE_NAME) + dataset = GBQTableDataset(dataset=DATASET, table_name=TABLE_NAME) assert not dataset.exists() assert dataset.exists() diff --git a/kedro-datasets/tests/partitions/test_incremental_dataset.py b/kedro-datasets/tests/partitions/test_incremental_dataset.py index 539ab0a66..4e2571062 100644 --- a/kedro-datasets/tests/partitions/test_incremental_dataset.py +++ b/kedro-datasets/tests/partitions/test_incremental_dataset.py @@ -68,7 +68,7 @@ class TestIncrementalDatasetLocal: def test_load_and_confirm(self, local_csvs, partitioned_data_pandas): """Test the standard flow for loading, confirming and reloading an IncrementalDataset""" - pds = IncrementalDataset(str(local_csvs), DATASET) + pds = IncrementalDataset(path=str(local_csvs), dataset=DATASET) loaded = pds.load() assert loaded.keys() == partitioned_data_pandas.keys() for partition_id, data in loaded.items(): @@ -92,7 +92,7 @@ def test_save(self, local_csvs): df = pd.DataFrame({"dummy": [1, 2, 3]}) new_partition_key = "p05/data.csv" new_partition_path = local_csvs / new_partition_key - pds = IncrementalDataset(str(local_csvs), DATASET) + pds = IncrementalDataset(path=str(local_csvs), dataset=DATASET) assert not new_partition_path.exists() assert new_partition_key not in pds.load() @@ -123,7 +123,7 @@ def test_filename_suffix(self, filename_suffix, expected_partitions, local_csvs) """Test how specifying filename_suffix affects the available partitions and their names""" pds = IncrementalDataset( - str(local_csvs), DATASET, filename_suffix=filename_suffix + path=str(local_csvs), dataset=DATASET, filename_suffix=filename_suffix ) loaded = pds.load() assert loaded.keys() == expected_partitions @@ -153,7 +153,9 @@ def test_force_checkpoint_no_checkpoint_file( ): """Test how forcing checkpoint value affects the available partitions if the checkpoint file does not exist""" - pds = IncrementalDataset(str(local_csvs), DATASET, checkpoint=forced_checkpoint) + pds = IncrementalDataset( + path=str(local_csvs), dataset=DATASET, checkpoint=forced_checkpoint + ) loaded = pds.load() assert loaded.keys() == expected_partitions @@ -188,11 +190,13 @@ def test_force_checkpoint_checkpoint_file_exists( ): """Test how forcing checkpoint value affects the available partitions if the checkpoint file exists""" - IncrementalDataset(str(local_csvs), DATASET).confirm() + IncrementalDataset(path=str(local_csvs), dataset=DATASET).confirm() checkpoint = local_csvs / IncrementalDataset.DEFAULT_CHECKPOINT_FILENAME assert checkpoint.read_text() == "p04/data.csv" - pds = IncrementalDataset(str(local_csvs), DATASET, checkpoint=forced_checkpoint) + pds = IncrementalDataset( + path=str(local_csvs), dataset=DATASET, checkpoint=forced_checkpoint + ) assert pds._checkpoint.exists() loaded = pds.load() assert loaded.keys() == expected_partitions @@ -203,7 +207,9 @@ def test_force_checkpoint_checkpoint_file_exists( def test_force_checkpoint_no_partitions(self, forced_checkpoint, local_csvs): """Test that forcing the checkpoint to certain values results in no partitions being returned""" - pds = IncrementalDataset(str(local_csvs), DATASET, checkpoint=forced_checkpoint) + pds = IncrementalDataset( + path=str(local_csvs), dataset=DATASET, checkpoint=forced_checkpoint + ) loaded = pds.load() assert not loaded @@ -219,7 +225,9 @@ def test_checkpoint_path(self, local_csvs, partitioned_data_pandas): assert not checkpoint_path.exists() IncrementalDataset( - str(local_csvs), DATASET, checkpoint={"filepath": str(checkpoint_path)} + path=str(local_csvs), + dataset=DATASET, + checkpoint={"filepath": str(checkpoint_path)}, ).confirm() assert checkpoint_path.is_file() assert checkpoint_path.read_text() == max(partitioned_data_pandas) @@ -239,7 +247,9 @@ def test_checkpoint_type( self, tmp_path, checkpoint_config, expected_checkpoint_class ): """Test configuring a different checkpoint dataset type""" - pds = IncrementalDataset(str(tmp_path), DATASET, checkpoint=checkpoint_config) + pds = IncrementalDataset( + path=str(tmp_path), dataset=DATASET, checkpoint=checkpoint_config + ) assert isinstance(pds._checkpoint, expected_checkpoint_class) @pytest.mark.parametrize( @@ -262,7 +272,9 @@ def test_checkpoint_type( def test_version_not_allowed(self, tmp_path, checkpoint_config, error_pattern): """Test that invalid checkpoint configurations raise expected errors""" with pytest.raises(DatasetError, match=re.escape(error_pattern)): - IncrementalDataset(str(tmp_path), DATASET, checkpoint=checkpoint_config) + IncrementalDataset( + path=str(tmp_path), dataset=DATASET, checkpoint=checkpoint_config + ) @pytest.mark.parametrize( "pds_config,fs_creds,dataset_creds,checkpoint_creds", @@ -316,7 +328,7 @@ def test_version_not_allowed(self, tmp_path, checkpoint_config, error_pattern): def test_credentials(self, pds_config, fs_creds, dataset_creds, checkpoint_creds): """Test correctness of credentials propagation into the dataset and checkpoint constructors""" - pds = IncrementalDataset(str(Path.cwd()), **pds_config) + pds = IncrementalDataset(path=str(Path.cwd()), **pds_config) assert pds._credentials == fs_creds assert pds._dataset_config[CREDENTIALS_KEY] == dataset_creds assert pds._checkpoint_config[CREDENTIALS_KEY] == checkpoint_creds @@ -343,7 +355,9 @@ def test_comparison_func(self, comparison_func, expected_partitions, local_csvs) "force_checkpoint": "p02/data.csv", "comparison_func": comparison_func, } - pds = IncrementalDataset(str(local_csvs), DATASET, checkpoint=checkpoint_config) + pds = IncrementalDataset( + path=str(local_csvs), dataset=DATASET, checkpoint=checkpoint_config + ) assert pds.load().keys() == expected_partitions @@ -382,7 +396,7 @@ class TestIncrementalDatasetS3: def test_load_and_confirm(self, mocked_csvs_in_s3, partitioned_data_pandas): """Test the standard flow for loading, confirming and reloading a IncrementalDataset in S3""" - pds = IncrementalDataset(mocked_csvs_in_s3, DATASET) + pds = IncrementalDataset(path=mocked_csvs_in_s3, dataset=DATASET) assert pds._checkpoint._protocol == "s3" loaded = pds.load() assert loaded.keys() == partitioned_data_pandas.keys() @@ -399,7 +413,7 @@ def test_load_and_confirm_s3a( self, mocked_csvs_in_s3, partitioned_data_pandas, mocker ): s3a_path = f"s3a://{mocked_csvs_in_s3.split('://', 1)[1]}" - pds = IncrementalDataset(s3a_path, DATASET) + pds = IncrementalDataset(path=s3a_path, dataset=DATASET) assert pds._protocol == "s3a" assert pds._checkpoint._protocol == "s3" @@ -440,7 +454,7 @@ def test_force_checkpoint_no_checkpoint_file( """Test how forcing checkpoint value affects the available partitions in S3 if the checkpoint file does not exist""" pds = IncrementalDataset( - mocked_csvs_in_s3, DATASET, checkpoint=forced_checkpoint + path=mocked_csvs_in_s3, dataset=DATASET, checkpoint=forced_checkpoint ) loaded = pds.load() assert loaded.keys() == expected_partitions @@ -476,15 +490,15 @@ def test_force_checkpoint_checkpoint_file_exists( """Test how forcing checkpoint value affects the available partitions in S3 if the checkpoint file exists""" # create checkpoint and assert that it exists - IncrementalDataset(mocked_csvs_in_s3, DATASET).confirm() + IncrementalDataset(path=mocked_csvs_in_s3, dataset=DATASET).confirm() checkpoint_path = ( f"{mocked_csvs_in_s3}/{IncrementalDataset.DEFAULT_CHECKPOINT_FILENAME}" ) - checkpoint_value = TextDataset(checkpoint_path).load() + checkpoint_value = TextDataset(filepath=checkpoint_path).load() assert checkpoint_value == "p04/data.csv" pds = IncrementalDataset( - mocked_csvs_in_s3, DATASET, checkpoint=forced_checkpoint + path=mocked_csvs_in_s3, dataset=DATASET, checkpoint=forced_checkpoint ) assert pds._checkpoint.exists() loaded = pds.load() @@ -497,7 +511,7 @@ def test_force_checkpoint_no_partitions(self, forced_checkpoint, mocked_csvs_in_ """Test that forcing the checkpoint to certain values results in no partitions returned from S3""" pds = IncrementalDataset( - mocked_csvs_in_s3, DATASET, checkpoint=forced_checkpoint + path=mocked_csvs_in_s3, dataset=DATASET, checkpoint=forced_checkpoint ) loaded = pds.load() assert not loaded diff --git a/kedro-datasets/tests/pillow/test_image_dataset.py b/kedro-datasets/tests/pillow/test_image_dataset.py index ba5813235..b75b9708a 100644 --- a/kedro-datasets/tests/pillow/test_image_dataset.py +++ b/kedro-datasets/tests/pillow/test_image_dataset.py @@ -121,7 +121,7 @@ def test_catalog_release(self, mocker): ) def test_get_format(self, image_filepath, expected_extension): """Unit test for pillow.ImageDataset._get_format() fn""" - dataset = ImageDataset(image_filepath) + dataset = ImageDataset(filepath=image_filepath) ext = dataset._get_format(Path(image_filepath)) assert expected_extension == ext diff --git a/kedro-datasets/tests/polars/test_csv_dataset.py b/kedro-datasets/tests/polars/test_csv_dataset.py index 6a2de3501..6c7e341af 100644 --- a/kedro-datasets/tests/polars/test_csv_dataset.py +++ b/kedro-datasets/tests/polars/test_csv_dataset.py @@ -359,7 +359,7 @@ def test_load_and_confirm(self, mocker, mocked_csv_in_s3, mocked_dataframe): (any implementation using S3FileSystem). Likely to be a bug with moto (tested with moto==4.0.8, moto==3.0.4) -- see #67 """ - df = CSVDataset(mocked_csv_in_s3) + df = CSVDataset(filepath=mocked_csv_in_s3) assert df._protocol == "s3" # if Python >= 3.10, modify test procedure (see #67) if sys.version_info[1] >= 10: diff --git a/kedro-datasets/tests/polars/test_lazy_polars_dataset.py b/kedro-datasets/tests/polars/test_lazy_polars_dataset.py index 11dde502a..2a552482a 100644 --- a/kedro-datasets/tests/polars/test_lazy_polars_dataset.py +++ b/kedro-datasets/tests/polars/test_lazy_polars_dataset.py @@ -123,7 +123,7 @@ def test_load(self, dummy_dataframe, csv_data_set, filepath_csv): assert df.collect().shape == (2, 3) def test_load_s3(self, dummy_dataframe, mocked_csv_in_s3): - ds = LazyPolarsDataset(mocked_csv_in_s3, file_format="csv") + ds = LazyPolarsDataset(filepath=mocked_csv_in_s3, file_format="csv") assert ds._protocol == "s3" diff --git a/kedro-datasets/tests/redis/test_redis_dataset.py b/kedro-datasets/tests/redis/test_redis_dataset.py index 1a00c48c8..eba3ee84a 100644 --- a/kedro-datasets/tests/redis/test_redis_dataset.py +++ b/kedro-datasets/tests/redis/test_redis_dataset.py @@ -162,4 +162,4 @@ def test_no_backend(self, mocker): side_effect=ImportError, ) with pytest.raises(ImportError, match=pattern): - PickleDataset("key", backend="fake.backend.does.not.exist") + PickleDataset(key="key", backend="fake.backend.does.not.exist") diff --git a/kedro-datasets/tests/spark/test_spark_dataset.py b/kedro-datasets/tests/spark/test_spark_dataset.py index 9c76ff1d6..8ba5feddf 100644 --- a/kedro-datasets/tests/spark/test_spark_dataset.py +++ b/kedro-datasets/tests/spark/test_spark_dataset.py @@ -974,9 +974,9 @@ def test_repr(self, version): @pytest.fixture def data_catalog(tmp_path): source_path = Path(__file__).parent / "data/test.parquet" - spark_in = SparkDataset(source_path.as_posix()) - spark_out = SparkDataset((tmp_path / "spark_data").as_posix()) - pickle_ds = PickleDataset((tmp_path / "pickle/test.pkl").as_posix()) + spark_in = SparkDataset(filepath=source_path.as_posix()) + spark_out = SparkDataset(filepath=(tmp_path / "spark_data").as_posix()) + pickle_ds = PickleDataset(filepath=(tmp_path / "pickle/test.pkl").as_posix()) return DataCatalog( {"spark_in": spark_in, "spark_out": spark_out, "pickle_ds": pickle_ds} diff --git a/kedro-datasets/tests/tensorflow/test_tensorflow_model_dataset.py b/kedro-datasets/tests/tensorflow/test_tensorflow_model_dataset.py index b4541df28..345152761 100644 --- a/kedro-datasets/tests/tensorflow/test_tensorflow_model_dataset.py +++ b/kedro-datasets/tests/tensorflow/test_tensorflow_model_dataset.py @@ -260,7 +260,7 @@ def test_catalog_release(self, mocker, tensorflow_model_dataset): @pytest.mark.parametrize("fs_args", [{"storage_option": "value"}]) def test_fs_args(self, fs_args, mocker, tensorflow_model_dataset): fs_mock = mocker.patch("fsspec.filesystem") - tensorflow_model_dataset("test.tf", fs_args=fs_args) + tensorflow_model_dataset(filepath="test.tf", fs_args=fs_args) fs_mock.assert_called_once_with("file", auto_mkdir=True, storage_option="value") diff --git a/kedro-datasets/tests/video/test_video_dataset.py b/kedro-datasets/tests/video/test_video_dataset.py index 8dccca833..e7141c720 100644 --- a/kedro-datasets/tests/video/test_video_dataset.py +++ b/kedro-datasets/tests/video/test_video_dataset.py @@ -53,7 +53,7 @@ def mocked_s3_bucket(): class TestVideoDataset: def test_load_mp4(self, filepath_mp4, mp4_object): """Loading a mp4 dataset should create a FileVideo""" - ds = VideoDataset(filepath_mp4) + ds = VideoDataset(filepath=filepath_mp4) loaded_video = ds.load() assert_videos_equal(loaded_video, mp4_object) @@ -180,7 +180,7 @@ def test_video_codecs(self, fourcc, suffix, color_video): """ video_name = f"video.{suffix}" video = SequenceVideo(color_video._frames, 25, fourcc) - ds = VideoDataset(video_name, fourcc=None) + ds = VideoDataset(filepath=video_name, fourcc=None) ds.save(video) # We also need to verify that the correct codec was used # since OpenCV silently (with a warning in the log) fall backs to