diff --git a/RELEASE.md b/RELEASE.md index 9f274159b7..93240d0e42 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -25,6 +25,7 @@ * Updated MatplotlibWriter Dataset docs with working examples. * Modified implementation of the Kedro IPython extension to use `local_ns` rather than a global variable. * Refactored `ShelveStore` to it's own module to ensure multiprocessing works with it. +* Fixed `AttributeError` when using `/dbfs` paths on an unsupported environment ## Minor breaking changes to the API diff --git a/kedro/extras/datasets/spark/spark_dataset.py b/kedro/extras/datasets/spark/spark_dataset.py index eb188e208f..dab50cd113 100644 --- a/kedro/extras/datasets/spark/spark_dataset.py +++ b/kedro/extras/datasets/spark/spark_dataset.py @@ -309,7 +309,13 @@ def __init__( # pylint: disable=too-many-arguments path = PurePosixPath(filepath) if filepath.startswith("/dbfs"): - dbutils = _get_dbutils(self._get_spark()) + dbutils = None + try: + dbutils = _get_dbutils(self._get_spark()) + except AttributeError: + # Databricks is known to raise AttributeError when called + # on an unsupported environment + pass if dbutils: glob_function = partial(_dbfs_glob, dbutils=dbutils) exists_function = partial(_dbfs_exists, dbutils=dbutils) diff --git a/tests/extras/datasets/spark/test_spark_dataset.py b/tests/extras/datasets/spark/test_spark_dataset.py index 7e83d09d8b..0d5607f6f5 100644 --- a/tests/extras/datasets/spark/test_spark_dataset.py +++ b/tests/extras/datasets/spark/test_spark_dataset.py @@ -614,6 +614,16 @@ def test_dbfs_exists(self, mocker): dbutils_mock.fs.ls.side_effect = Exception() assert not _dbfs_exists(test_path, dbutils_mock) + def test_ds_init_get_dbutils_raises_exception(self, mocker): + get_dbutils_mock = mocker.Mock() + get_dbutils_mock.side_effect = AttributeError + get_dbutils_mock = mocker.patch( + "kedro.extras.datasets.spark.spark_dataset._get_dbutils", get_dbutils_mock + ) + + data_set = SparkDataSet(filepath="/dbfs/tmp/data") + assert data_set._glob_function.__name__ == "iglob" + def test_ds_init_no_dbutils(self, mocker): get_dbutils_mock = mocker.patch( "kedro.extras.datasets.spark.spark_dataset._get_dbutils", return_value=None