kedro-org · mle-els · Oct 13, 2022 · Oct 18, 2022 · Oct 19, 2022 · Oct 19, 2022
@@ -25,6 +25,7 @@
 * Updated MatplotlibWriter Dataset docs with working examples.
 * Modified implementation of the Kedro IPython extension to use `local_ns` rather than a global variable.
 * Refactored `ShelveStore` to it's own module to ensure multiprocessing works with it.
+* Fixed `AttributeError` when using `/dbfs` paths on an unsupported environment
 
 ## Minor breaking changes to the API
 

@@ -309,7 +309,13 @@ def __init__(  # pylint: disable=too-many-arguments
             path = PurePosixPath(filepath)
 
             if filepath.startswith("/dbfs"):
-                dbutils = _get_dbutils(self._get_spark())
+                dbutils = None
+                try:
+                    dbutils = _get_dbutils(self._get_spark())
+                except AttributeError:
+                    # Databricks is known to raise AttributeError when called
+                    # on an unsupported environment
+                    pass
                 if dbutils:
                     glob_function = partial(_dbfs_glob, dbutils=dbutils)
                     exists_function = partial(_dbfs_exists, dbutils=dbutils)

@@ -614,6 +614,16 @@ def test_dbfs_exists(self, mocker):
         dbutils_mock.fs.ls.side_effect = Exception()
         assert not _dbfs_exists(test_path, dbutils_mock)
 
+    def test_ds_init_get_dbutils_raises_exception(self, mocker):
+        get_dbutils_mock = mocker.Mock()
+        get_dbutils_mock.side_effect = AttributeError
+        get_dbutils_mock = mocker.patch(
+            "kedro.extras.datasets.spark.spark_dataset._get_dbutils", get_dbutils_mock
+        )
+
+        data_set = SparkDataSet(filepath="/dbfs/tmp/data")
+        assert data_set._glob_function.__name__ == "iglob"
+
     def test_ds_init_no_dbutils(self, mocker):
         get_dbutils_mock = mocker.patch(
             "kedro.extras.datasets.spark.spark_dataset._get_dbutils", return_value=None