diff --git a/src/databricks/labs/blueprint/paths.py b/src/databricks/labs/blueprint/paths.py index c203db6..d66204c 100644 --- a/src/databricks/labs/blueprint/paths.py +++ b/src/databricks/labs/blueprint/paths.py @@ -11,6 +11,7 @@ import posixpath import re import shutil +import stat from abc import abstractmethod from collections.abc import Generator, Iterable, Sequence from io import BytesIO, StringIO @@ -121,7 +122,6 @@ class _DatabricksPath(Path, abc.ABC): # pylint: disable=too-many-public-methods # Public APIs that we don't support. as_uri = _na("as_uri") cwd = _na("cwd") - stat = _na("stat") chmod = _na("chmod") lchmod = _na("lchmod") lstat = _na("lstat") @@ -138,6 +138,7 @@ def __new__(cls, *args, **kwargs): # Force all initialisation to go via __init__() irrespective of the (Python-specific) base version. return object.__new__(cls) + # pylint: disable=super-init-not-called def __init__(self, ws: WorkspaceClient, *args: str | bytes | os.PathLike) -> None: # We deliberately do _not_ call the super initializer because we're taking over complete responsibility for the # implementation of the public API. @@ -385,6 +386,7 @@ def with_suffix(self: P, suffix: str) -> P: raise ValueError(msg) return self.with_name(stem + suffix) + # pylint: disable=arguments-differ def relative_to(self: P, *other: str | bytes | os.PathLike, walk_up: bool = False) -> P: normalized = self.with_segments(*other) if self.anchor != normalized.anchor: @@ -691,6 +693,14 @@ def _file_info(self) -> FileInfo: self._cached_file_info = self._ws.dbfs.get_status(self.as_posix()) return self._cached_file_info + def stat(self, *, follow_symlinks=True) -> os.stat_result: + seq: list[float] = [-1.0] * 10 + seq[stat.ST_SIZE] = self._file_info.file_size or -1 # 6 + seq[stat.ST_MTIME] = ( + float(self._file_info.modification_time) / 1000.0 if self._file_info.modification_time else -1.0 + ) # 8 + return os.stat_result(seq) + def is_dir(self) -> bool: """Return True if the path points to a DBFS directory.""" try: @@ -841,6 +851,15 @@ def _object_info(self) -> ObjectInfo: self._cached_object_info = self._ws.workspace.get_status(self.as_posix()) return self._object_info + def stat(self, *, follow_symlinks=True) -> os.stat_result: + seq: list[float] = [-1.0] * 10 + seq[stat.ST_SIZE] = self._object_info.size or -1 # 6 + seq[stat.ST_MTIME] = ( + float(self._object_info.modified_at) / 1000.0 if self._object_info.modified_at else -1.0 + ) # 8 + seq[stat.ST_CTIME] = float(self._object_info.created_at) / 1000.0 if self._object_info.created_at else -1.0 # 9 + return os.stat_result(seq) + def is_dir(self) -> bool: """Return True if the path points to a directory in Databricks Workspace.""" try: diff --git a/tests/integration/test_paths.py b/tests/integration/test_paths.py index 4691f17..c682bab 100644 --- a/tests/integration/test_paths.py +++ b/tests/integration/test_paths.py @@ -1,4 +1,5 @@ import codecs +from datetime import datetime from pathlib import Path import pytest @@ -67,6 +68,21 @@ def test_open_text_io(ws, make_random, cls): assert not hello_txt.exists() +@pytest.mark.parametrize("cls", DATABRICKS_PATHLIKE) +def test_stat(ws, make_random, cls): + now = datetime.now().timestamp() + name = make_random() + wsp = cls(ws, f"~/{name}/a/b/c") + with_user = wsp.expanduser() + with_user.mkdir(parents=True) + + hello_txt = with_user / "hello.txt" + hello_txt.write_text("Hello, World!") + if cls is WorkspacePath: # DBFSPath has no st_ctime + assert hello_txt.stat().st_ctime >= now + assert hello_txt.stat().st_mtime >= now + + @pytest.mark.parametrize("cls", DATABRICKS_PATHLIKE) def test_unlink(ws, make_random, cls): name = make_random() diff --git a/tests/unit/test_paths.py b/tests/unit/test_paths.py index 7a30b90..3d6c88b 100644 --- a/tests/unit/test_paths.py +++ b/tests/unit/test_paths.py @@ -7,6 +7,7 @@ from databricks.sdk import WorkspaceClient from databricks.sdk.errors import NotFound, ResourceDoesNotExist from databricks.sdk.mixins.workspace import WorkspaceExt +from databricks.sdk.service.files import FileInfo from databricks.sdk.service.workspace import ( ImportFormat, Language, @@ -14,7 +15,7 @@ ObjectType, ) -from databricks.labs.blueprint.paths import WorkspacePath +from databricks.labs.blueprint.paths import DBFSPath, WorkspacePath def test_empty_init() -> None: @@ -1007,3 +1008,24 @@ def test_rglob() -> None: WorkspacePath(ws, "/test/path/dir1/file1.json"), WorkspacePath(ws, "/test/path/dir2/file2.json"), } + + +def test_workspace_path_stat_has_fields(): + info = ObjectInfo(created_at=1234, modified_at=2345, size=3456) + ws = create_autospec(WorkspaceClient) + ws.workspace.get_status.return_value = info + workspace_path = WorkspacePath(ws, "/test/path") + stats = workspace_path.stat() + assert stats.st_ctime == info.created_at / 1000.0 + assert stats.st_mtime == info.modified_at / 1000.0 + assert stats.st_size == info.size + + +def test_dbfs_path_stat_has_fields(): + info = FileInfo(modification_time=2345, file_size=3456) + ws = create_autospec(WorkspaceClient) + ws.dbfs.get_status.return_value = info + dbfs_path = DBFSPath(ws, "/test/path") + stats = dbfs_path.stat() + assert stats.st_mtime == info.modification_time / 1000.0 + assert stats.st_size == info.file_size