Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: fs.exists() checks if path exists TDE-710 #441

Merged
merged 3 commits into from
Apr 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions scripts/files/fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,17 @@ def read(path: str) -> bytes:
return fs_s3.read(path)

return fs_local.read(path)


def exists(path: str) -> bool:
"""Check if path (file or directory) exists

Args:
path: A path to a directory or file

Returns:
True if the path exists
"""
if is_s3(path):
return fs_s3.exists(path)
return fs_local.exists(path)
15 changes: 15 additions & 0 deletions scripts/files/fs_local.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import os


def write(destination: str, source: bytes) -> None:
"""Write the source to the local destination file.

Expand All @@ -20,3 +23,15 @@ def read(path: str) -> bytes:
"""
with open(path, "rb") as file:
return file.read()


def exists(path: str) -> bool:
"""Check if path (file or directory) exists

Args:
path: A local path to a directory or file

Returns:
True if the path exists
"""
return os.path.exists(path)
50 changes: 50 additions & 0 deletions scripts/files/fs_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,56 @@ def read(path: str, needs_credentials: bool = False) -> bytes:
return file


def exists(path: str, needs_credentials: bool = False) -> bool:
"""Check if s3 Object exists

Args:
path: path to the s3 object/key
needs_credentials: if acces to object needs credentials. Defaults to False.

Raises:
ce: ClientError
nsb: NoSuchBucket

Returns:
True if the S3 Object exists
"""
s3_path, key = parse_path(path)
s3 = boto3.resource("s3")

try:
if needs_credentials:
s3 = get_session(path).resource("s3")

if path.endswith("/"):
bucket_name = bucket_name_from_path(s3_path)
bucket = s3.Bucket(bucket_name)
# MaxKeys limits to 1 object in the response
objects = bucket.objects.filter(Prefix=key, MaxKeys=1)

if len(list(objects)) > 0:
return True
return False

# load() fetch the metadata, not the data. Calls a `head` behind the scene.
s3.Object(s3_path, key).load()
return True
except s3.meta.client.exceptions.NoSuchBucket as nsb:
get_log().debug("s3_bucket_not_found", path=path, info=f"The specified bucket does not seem to exist: {nsb}")
return False
except s3.meta.client.exceptions.ClientError as ce:
if not needs_credentials and ce.response["Error"]["Code"] == "AccessDenied":
get_log().debug("read_s3_needs_credentials", path=path)
return exists(path, True)
# https://boto3.amazonaws.com/v1/documentation/api/latest/guide/error-handling.html#parsing-error-responses-and-catching-exceptions-from-aws-services
# 404 for NoSuchKey - https://github.com/boto/boto3/issues/2442
if ce.response["Error"]["Code"] == "404":
get_log().debug("s3_key_not_found", path=path, info=f"The specified key does not seem to exist: {ce}")
return False
get_log().error("s3_client_error", path=path, error=f"ClientError raised: {ce}")
raise ce


def bucket_name_from_path(path: str) -> str:
path_parts = path.replace("s3://", "").split("/")
return path_parts.pop(0)
Expand Down
17 changes: 16 additions & 1 deletion scripts/files/tests/fs_local_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import pytest

from scripts.files.fs_local import read, write
from scripts.files.fs_local import exists, read, write


@pytest.fixture(name="setup", autouse=True)
Expand Down Expand Up @@ -36,3 +36,18 @@ def test_read(setup: str) -> None:
write(path, content)
file_content = read(path)
assert file_content == content


@pytest.mark.dependency(name="exists", depends=["write"])
def test_exists(setup: str) -> None:
content = b"test content"
target = setup
path = os.path.join(target, "test.file")
write(path, content)
found = exists(path)
assert found is True


def test_exists_file_not_found() -> None:
found = exists("/tmp/test.file")
assert found is False
59 changes: 58 additions & 1 deletion scripts/files/tests/fs_s3_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from moto.s3.responses import DEFAULT_REGION_NAME
from pytest import CaptureFixture

from scripts.files.fs_s3 import read, write
from scripts.files.fs_s3 import exists, read, write


@mock_s3 # type: ignore
Expand Down Expand Up @@ -54,3 +54,60 @@ def test_read_key_not_found(capsys: CaptureFixture[str]) -> None:

logs = json.loads(capsys.readouterr().out)
assert logs["msg"] == "s3_key_not_found"


@mock_s3 # type: ignore
def test_exists() -> None:
s3 = boto3.resource("s3", region_name=DEFAULT_REGION_NAME)
client = boto3.client("s3", region_name=DEFAULT_REGION_NAME)
s3.create_bucket(Bucket="testbucket")
client.put_object(Bucket="testbucket", Key="test.file", Body=b"test content")

file_exists = exists("s3://testbucket/test.file")

assert file_exists is True


@mock_s3 # type: ignore
def test_directory_exists() -> None:
s3 = boto3.resource("s3", region_name=DEFAULT_REGION_NAME)
client = boto3.client("s3", region_name=DEFAULT_REGION_NAME)
s3.create_bucket(Bucket="testbucket")
client.put_object(Bucket="testbucket", Key="hello/test.file", Body=b"test content")

directory_exists = exists("s3://testbucket/hello/")

assert directory_exists is True


@mock_s3 # type: ignore
def test_exists_bucket_not_exists(capsys: CaptureFixture[str]) -> None:
file_exists = exists("s3://testbucket/test.file")

logs = json.loads(capsys.readouterr().out)
assert logs["msg"] == "s3_bucket_not_found"
assert file_exists is False


@mock_s3 # type: ignore
def test_exists_object_not_exists() -> None:
s3 = boto3.resource("s3", region_name=DEFAULT_REGION_NAME)
client = boto3.client("s3", region_name=DEFAULT_REGION_NAME)
s3.create_bucket(Bucket="testbucket")
client.put_object(Bucket="testbucket", Key="hello/another.file", Body=b"test content")

file_exists = exists("s3://testbucket/test.file")

assert file_exists is False


@mock_s3 # type: ignore
def test_exists_object_starting_with_not_exists() -> None:
s3 = boto3.resource("s3", region_name=DEFAULT_REGION_NAME)
client = boto3.client("s3", region_name=DEFAULT_REGION_NAME)
s3.create_bucket(Bucket="testbucket")
client.put_object(Bucket="testbucket", Key="hello/another.file", Body=b"test content")

file_exists = exists("s3://testbucket/hello/another.fi")

assert file_exists is False