Skip to content

Commit

Permalink
add missing stat() methods to DBFSPath and WorkspacePath (#144)
Browse files Browse the repository at this point in the history
Fixes #142 and #143

---------

Co-authored-by: Eric Vergnaud <eric.vergnaud@databricks.com>
  • Loading branch information
ericvergnaud and ericvergnaud authored Sep 13, 2024
1 parent c531c3f commit 36fc873
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 2 deletions.
21 changes: 20 additions & 1 deletion src/databricks/labs/blueprint/paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import posixpath
import re
import shutil
import stat
from abc import abstractmethod
from collections.abc import Generator, Iterable, Sequence
from io import BytesIO, StringIO
Expand Down Expand Up @@ -121,7 +122,6 @@ class _DatabricksPath(Path, abc.ABC): # pylint: disable=too-many-public-methods
# Public APIs that we don't support.
as_uri = _na("as_uri")
cwd = _na("cwd")
stat = _na("stat")
chmod = _na("chmod")
lchmod = _na("lchmod")
lstat = _na("lstat")
Expand All @@ -138,6 +138,7 @@ def __new__(cls, *args, **kwargs):
# Force all initialisation to go via __init__() irrespective of the (Python-specific) base version.
return object.__new__(cls)

# pylint: disable=super-init-not-called
def __init__(self, ws: WorkspaceClient, *args: str | bytes | os.PathLike) -> None:
# We deliberately do _not_ call the super initializer because we're taking over complete responsibility for the
# implementation of the public API.
Expand Down Expand Up @@ -385,6 +386,7 @@ def with_suffix(self: P, suffix: str) -> P:
raise ValueError(msg)
return self.with_name(stem + suffix)

# pylint: disable=arguments-differ
def relative_to(self: P, *other: str | bytes | os.PathLike, walk_up: bool = False) -> P:
normalized = self.with_segments(*other)
if self.anchor != normalized.anchor:
Expand Down Expand Up @@ -691,6 +693,14 @@ def _file_info(self) -> FileInfo:
self._cached_file_info = self._ws.dbfs.get_status(self.as_posix())
return self._cached_file_info

def stat(self, *, follow_symlinks=True) -> os.stat_result:
seq: list[float] = [-1.0] * 10
seq[stat.ST_SIZE] = self._file_info.file_size or -1 # 6
seq[stat.ST_MTIME] = (
float(self._file_info.modification_time) / 1000.0 if self._file_info.modification_time else -1.0
) # 8
return os.stat_result(seq)

def is_dir(self) -> bool:
"""Return True if the path points to a DBFS directory."""
try:
Expand Down Expand Up @@ -841,6 +851,15 @@ def _object_info(self) -> ObjectInfo:
self._cached_object_info = self._ws.workspace.get_status(self.as_posix())
return self._object_info

def stat(self, *, follow_symlinks=True) -> os.stat_result:
seq: list[float] = [-1.0] * 10
seq[stat.ST_SIZE] = self._object_info.size or -1 # 6
seq[stat.ST_MTIME] = (
float(self._object_info.modified_at) / 1000.0 if self._object_info.modified_at else -1.0
) # 8
seq[stat.ST_CTIME] = float(self._object_info.created_at) / 1000.0 if self._object_info.created_at else -1.0 # 9
return os.stat_result(seq)

def is_dir(self) -> bool:
"""Return True if the path points to a directory in Databricks Workspace."""
try:
Expand Down
16 changes: 16 additions & 0 deletions tests/integration/test_paths.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import codecs
from datetime import datetime
from pathlib import Path

import pytest
Expand Down Expand Up @@ -67,6 +68,21 @@ def test_open_text_io(ws, make_random, cls):
assert not hello_txt.exists()


@pytest.mark.parametrize("cls", DATABRICKS_PATHLIKE)
def test_stat(ws, make_random, cls):
now = datetime.now().timestamp()
name = make_random()
wsp = cls(ws, f"~/{name}/a/b/c")
with_user = wsp.expanduser()
with_user.mkdir(parents=True)

hello_txt = with_user / "hello.txt"
hello_txt.write_text("Hello, World!")
if cls is WorkspacePath: # DBFSPath has no st_ctime
assert hello_txt.stat().st_ctime >= now
assert hello_txt.stat().st_mtime >= now


@pytest.mark.parametrize("cls", DATABRICKS_PATHLIKE)
def test_unlink(ws, make_random, cls):
name = make_random()
Expand Down
24 changes: 23 additions & 1 deletion tests/unit/test_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,15 @@
from databricks.sdk import WorkspaceClient
from databricks.sdk.errors import NotFound, ResourceDoesNotExist
from databricks.sdk.mixins.workspace import WorkspaceExt
from databricks.sdk.service.files import FileInfo
from databricks.sdk.service.workspace import (
ImportFormat,
Language,
ObjectInfo,
ObjectType,
)

from databricks.labs.blueprint.paths import WorkspacePath
from databricks.labs.blueprint.paths import DBFSPath, WorkspacePath


def test_empty_init() -> None:
Expand Down Expand Up @@ -1007,3 +1008,24 @@ def test_rglob() -> None:
WorkspacePath(ws, "/test/path/dir1/file1.json"),
WorkspacePath(ws, "/test/path/dir2/file2.json"),
}


def test_workspace_path_stat_has_fields():
info = ObjectInfo(created_at=1234, modified_at=2345, size=3456)
ws = create_autospec(WorkspaceClient)
ws.workspace.get_status.return_value = info
workspace_path = WorkspacePath(ws, "/test/path")
stats = workspace_path.stat()
assert stats.st_ctime == info.created_at / 1000.0
assert stats.st_mtime == info.modified_at / 1000.0
assert stats.st_size == info.size


def test_dbfs_path_stat_has_fields():
info = FileInfo(modification_time=2345, file_size=3456)
ws = create_autospec(WorkspaceClient)
ws.dbfs.get_status.return_value = info
dbfs_path = DBFSPath(ws, "/test/path")
stats = dbfs_path.stat()
assert stats.st_mtime == info.modification_time / 1000.0
assert stats.st_size == info.file_size

0 comments on commit 36fc873

Please sign in to comment.