Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[TOSFS #10] Implement rmdir API #23

Merged
merged 1 commit into from
Aug 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 52 additions & 2 deletions tosfs/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,55 @@ def info(

return self._try_dir_info(bucket, key, path, fullpath)

def rmdir(self, path: str) -> None:
"""Remove a directory if it is empty.

Parameters
----------
path : str
The path of the directory to remove. The path should be in the format
`tos://bucket/path/to/directory`.

Raises
------
FileNotFoundError
yanghua marked this conversation as resolved.
Show resolved Hide resolved
If the directory does not exist.
NotADirectoryError
yanghua marked this conversation as resolved.
Show resolved Hide resolved
If the path is not a directory.
TosfsError
If the directory is not empty,
or the path is a bucket.

Examples
--------
>>> fs = TosFileSystem()
>>> fs.rmdir("tos://mybucket/mydir/")

"""
path = self._strip_protocol(path).rstrip("/") + "/"
bucket, key, _ = self._split_path(path)
if not key:
raise TosfsError("Cannot remove a bucket using rmdir api.")

if not self.exists(path):
raise FileNotFoundError(f"Directory {path} not found.")

if not self.isdir(path):
raise NotADirectoryError(f"{path} is not a directory.")

if len(self._listdir(bucket, max_items=1, prefix=key.rstrip("/") + "/")) > 0:
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@openinx Put a solution that is better than the previous one, but it's uncertain whether there is a more efficient one.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Essentially, the reason why I commented as not efficient is: we call two self.infos , and at least one OBJECT LIST ( the object LIST the a limit 1000 QPS by default). So for the rmrdir, it will easily reach the QPS limit. Also the latency of limit is much more higher than the normal HEAD (10ms vs 1ms ).

But I don't want this block your further PRs, So I plan to merge this firstly. We can improve it in the next following PRs.

Thanks.

raise TosfsError(f"Directory {path} is not empty.")

try:
self.tos_client.delete_object(bucket, key.rstrip("/") + "/")
yanghua marked this conversation as resolved.
Show resolved Hide resolved
self.invalidate_cache(path.rstrip("/"))
except tos.exceptions.TosClientError as e:
raise e
except tos.exceptions.TosServerError as e:
raise e
except Exception as e:
raise TosfsError(f"Tosfs failed with unknown error: {e}") from e

def _info_from_cache(
self, path: str, fullpath: str, version_id: Optional[str]
) -> dict:
Expand Down Expand Up @@ -643,9 +692,10 @@ def _split_path(self, path: str) -> Tuple[str, str, Optional[str]]:

@staticmethod
def _fill_common_prefix_info(common_prefix: CommonPrefixInfo, bucket: str) -> dict:
name = "/".join([bucket, common_prefix.prefix[:-1]])
return {
"name": common_prefix.prefix[:-1],
"Key": "/".join([bucket, common_prefix.prefix]),
"name": name,
"Key": name,
"Size": 0,
"type": "directory",
}
Expand Down
10 changes: 6 additions & 4 deletions tosfs/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import pytest
from tos import EnvCredentialsProvider

from tosfs.core import TosFileSystem
from tosfs.core import TosFileSystem, logger
from tosfs.utils import random_path


Expand Down Expand Up @@ -54,6 +54,8 @@ def temporary_workspace(
# will replace with tosfs.mkdir in the future
tosfs.tos_client.put_object(bucket=bucket, key=f"{workspace}/")
yield workspace
# currently, remove dir via purely tos python client,
# will replace with tosfs.rmdir in the future
tosfs.tos_client.delete_object(bucket=bucket, key=f"{workspace}/")
try:
tosfs.rmdir(f"{bucket}/{workspace}/")
except Exception:
logger.error("Ignore exception.")
assert not tosfs.exists(f"{bucket}/{workspace}/")
30 changes: 28 additions & 2 deletions tosfs/tests/test_tosfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from tos.exceptions import TosServerError

from tosfs.core import TosFileSystem
from tosfs.exceptions import TosfsError
from tosfs.utils import random_path


Expand All @@ -35,11 +36,11 @@ def test_ls_bucket(tosfs: TosFileSystem, bucket: str) -> None:


def test_ls_dir(tosfs: TosFileSystem, bucket: str, temporary_workspace: str) -> None:
assert temporary_workspace in tosfs.ls(bucket, detail=False)
assert f"{bucket}/{temporary_workspace}" in tosfs.ls(bucket, detail=False)
detailed_list = tosfs.ls(bucket, detail=True)
assert detailed_list
for item in detailed_list:
if item["name"] == temporary_workspace:
if item["name"] == f"{bucket}/{temporary_workspace}":
assert item["type"] == "directory"
break
else:
Expand Down Expand Up @@ -105,3 +106,28 @@ def test_info(tosfs: TosFileSystem, bucket: str, temporary_workspace: str) -> No

with pytest.raises(FileNotFoundError):
tosfs.info(f"{bucket}/nonexistent")


def test_rmdir(tosfs: TosFileSystem, bucket: str, temporary_workspace: str) -> None:
with pytest.raises(TosfsError):
tosfs.rmdir(bucket)

file_name = random_path()
tosfs.tos_client.put_object(bucket=bucket, key=f"{temporary_workspace}/{file_name}")
assert f"{bucket}/{temporary_workspace}/{file_name}" in tosfs.ls(
f"{bucket}/{temporary_workspace}", detail=False
)

with pytest.raises(TosfsError):
tosfs.rmdir(f"{bucket}/{temporary_workspace}")

with pytest.raises(NotADirectoryError):
tosfs.rmdir(f"{bucket}/{temporary_workspace}/{file_name}")

tosfs._rm(f"{bucket}/{temporary_workspace}/{file_name}")
assert tosfs.ls(f"{bucket}/{temporary_workspace}", detail=False) == []

tosfs.rmdir(f"{bucket}/{temporary_workspace}")
assert f"{bucket}/{temporary_workspace}" not in tosfs.ls(
bucket, detail=False, refresh=True
)