Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix bug in S3KeysUnchangedTrigger and is_key_unchanged in hook #109

Merged
merged 6 commits into from
Mar 10, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 19 additions & 4 deletions astronomer/providers/amazon/aws/hooks/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ async def is_keys_unchanged(
inactivity_seconds: int = 0,
allow_delete: bool = True,
last_activity_time: Optional[datetime] = None,
) -> Dict[str, str]:
) -> Dict[str, Any]:
sunank200 marked this conversation as resolved.
Show resolved Hide resolved
"""
Checks whether new objects have been uploaded and the inactivity_period
has passed and updates the state of the sensor accordingly.
Expand Down Expand Up @@ -252,7 +252,12 @@ async def is_keys_unchanged(
last_activity_time = datetime.now()
inactivity_seconds = 0
previous_objects = current_objects
return {"status": "pending"}
return {
"status": "pending",
"previous_objects": previous_objects,
"last_activity_time": last_activity_time,
"inactivity_seconds": inactivity_seconds,
}

if len(previous_objects) - len(current_objects):
# During the last poke interval objects were deleted.
Expand All @@ -265,7 +270,12 @@ async def is_keys_unchanged(
"file counter and resetting last_activity_time:\n%s",
deleted_objects,
)
return {"status": "pending"}
return {
"status": "pending",
"previous_objects": previous_objects,
"last_activity_time": last_activity_time,
"inactivity_seconds": inactivity_seconds,
}

return {
"status": "error",
Expand Down Expand Up @@ -298,4 +308,9 @@ async def is_keys_unchanged(
"status": "error",
"message": f"FAILURE: Inactivity Period passed, not enough objects found in {path}",
}
return {"status": "pending"}
return {
"status": "pending",
"previous_objects": previous_objects,
"last_activity_time": last_activity_time,
"inactivity_seconds": inactivity_seconds,
}
12 changes: 9 additions & 3 deletions astronomer/providers/amazon/aws/triggers/s3.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import asyncio
import logging
from datetime import datetime
from typing import Any, AsyncIterator, Callable, Dict, List, Optional, Set, Tuple, Union
Expand Down Expand Up @@ -129,7 +130,7 @@ def __init__(
inactivity_period: float = 60 * 60,
min_objects: int = 1,
inactivity_seconds: int = 0,
previous_objects: Optional[Set[str]] = None,
previous_objects: Optional[Set[str]] = set(),
allow_delete: bool = True,
aws_conn_id: str = "aws_default",
last_activity_time: Optional[datetime] = None,
Expand All @@ -148,6 +149,7 @@ def __init__(
self.aws_conn_id = aws_conn_id
self.last_activity_time: Optional[datetime] = last_activity_time
self.verify = verify
self.polling_period_seconds = 0

def serialize(self) -> Tuple[str, Dict[str, Any]]:
"""Serialize S3KeysUnchangedTrigger arguments and classpath."""
Expand All @@ -172,7 +174,6 @@ async def run(self) -> AsyncIterator["TriggerEvent"]: # type: ignore[override]
hook = self._get_async_hook()
async with await hook.get_client_async() as client:
while True:

result = await hook.is_keys_unchanged(
client,
self.bucket_name,
Expand All @@ -184,9 +185,14 @@ async def run(self) -> AsyncIterator["TriggerEvent"]: # type: ignore[override]
self.allow_delete,
self.last_activity_time,
)
if result.get("status") == "success" or result.get("error") == "error":
if result.get("status") == "success" or result.get("status") == "error":
yield TriggerEvent(result)
return
elif result.get("status") == "pending":
sunank200 marked this conversation as resolved.
Show resolved Hide resolved
self.previous_objects = result.get("previous_objects", set())
self.last_activity_time = result.get("last_activity_time")
self.inactivity_seconds = result.get("inactivity_seconds", 0)
await asyncio.sleep(self.polling_period_seconds)
except Exception as e:
yield TriggerEvent({"status": "error", "message": str(e)})

Expand Down
89 changes: 86 additions & 3 deletions tests/amazon/aws/hooks/test_s3_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ async def test_s3_key_hook_check_wildcard_key_invalid(mock_client):

@mock.patch("astronomer.providers.amazon.aws.triggers.s3.S3HookAsync.get_client_async")
@pytest.mark.asyncio
async def test_s3_key_hook_get_files(mock_client):
async def test_s3_key_hook_get_files_without_wildcard(mock_client):
"""
Test get_files for a valid response
:return:
Expand All @@ -230,6 +230,32 @@ async def test_s3_key_hook_get_files(mock_client):
assert response == []


@mock.patch("astronomer.providers.amazon.aws.triggers.s3.S3HookAsync.get_client_async")
@pytest.mark.asyncio
async def test_s3_key_hook_get_files_with_wildcard(mock_client):
"""
Test get_files for a valid response
:return:
"""
test_resp_iter = [
{
"Contents": [
{"Key": "test_key", "ETag": "etag1", "LastModified": datetime(2020, 8, 14, 17, 19, 34)},
{"Key": "test_key2", "ETag": "etag2", "LastModified": datetime(2020, 8, 14, 17, 19, 34)},
]
}
]
mock_paginator = mock.Mock()
mock_paginate = mock.MagicMock()
mock_paginate.__aiter__.return_value = test_resp_iter
mock_paginator.paginate.return_value = mock_paginate

s3_hook_async = S3HookAsync(client_type="S3", resource_type="S3")
mock_client.get_paginator = mock.Mock(return_value=mock_paginator)
response = await s3_hook_async.get_files(mock_client, "test_bucket", "test.txt", True)
assert response == []


@mock.patch("astronomer.providers.amazon.aws.triggers.s3.S3HookAsync.get_client_async")
@pytest.mark.asyncio
async def test_s3_key_hook_list_keys(mock_client):
Expand Down Expand Up @@ -280,7 +306,7 @@ async def test_s3_key_hook_is_keys_unchanged_false(mock_list_keys, mock_client):
last_activity_time=None,
)

assert response == {"status": "pending"}
assert response.get("status") == "pending"

# test for the case when current_objects < previous_objects
mock_list_keys.return_value = []
Expand All @@ -298,7 +324,7 @@ async def test_s3_key_hook_is_keys_unchanged_false(mock_list_keys, mock_client):
last_activity_time=None,
)

assert response == {"status": "pending"}
assert response.get("status") == "pending"


@mock.patch("astronomer.providers.amazon.aws.triggers.s3.S3HookAsync.get_client_async")
Expand Down Expand Up @@ -328,6 +354,63 @@ async def test_s3_key_hook_is_keys_unchanged_exception(mock_list_keys, mock_clie
assert response == {"message": " test_bucket/test between pokes.", "status": "error"}


@mock.patch("astronomer.providers.amazon.aws.triggers.s3.S3HookAsync.get_client_async")
@mock.patch("astronomer.providers.amazon.aws.triggers.s3.S3HookAsync._list_keys")
@pytest.mark.asyncio
async def test_s3_key_hook_is_keys_unchanged_pending(mock_list_keys, mock_client):
"""
Test is_key_unchanged gives AirflowException
:return:
"""
mock_list_keys.return_value = []

s3_hook_async = S3HookAsync(client_type="S3", resource_type="S3")

response = await s3_hook_async.is_keys_unchanged(
client=mock_client.return_value,
bucket_name="test_bucket",
prefix="test",
inactivity_period=1,
min_objects=0,
previous_objects=set(),
inactivity_seconds=0,
allow_delete=False,
last_activity_time=None,
)

assert response.get("status") == "pending"


@mock.patch("astronomer.providers.amazon.aws.triggers.s3.S3HookAsync.get_client_async")
@mock.patch("astronomer.providers.amazon.aws.triggers.s3.S3HookAsync._list_keys")
@pytest.mark.asyncio
async def test_s3_key_hook_is_keys_unchanged_inactivity_error(mock_list_keys, mock_client):
"""
Test is_key_unchanged gives AirflowException
:return:
"""
mock_list_keys.return_value = []

s3_hook_async = S3HookAsync(client_type="S3", resource_type="S3")

response = await s3_hook_async.is_keys_unchanged(
client=mock_client.return_value,
bucket_name="test_bucket",
prefix="test",
inactivity_period=0,
min_objects=5,
previous_objects=set(),
inactivity_seconds=5,
allow_delete=False,
last_activity_time=None,
)

assert response == {
"status": "error",
"message": "FAILURE: Inactivity Period passed, not enough objects found in test_bucket/test",
}


@mock.patch("astronomer.providers.amazon.aws.triggers.s3.S3HookAsync.get_client_async")
@mock.patch("astronomer.providers.amazon.aws.triggers.s3.S3HookAsync._list_keys")
@pytest.mark.asyncio
Expand Down
12 changes: 12 additions & 0 deletions tests/amazon/aws/sensors/test_s3_sensors.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,18 @@ def test_s3_keys_unchanged_sensor_execute_complete_error(self, bucket, prefix, m
with pytest.raises(AirflowException):
sensor.execute_complete(context={}, event={"status": "error", "message": "Mocked error"})

def test_s3_keys_unchanged_sensor_raise_value_error(self):
"""
Test if the S3KeysUnchangedTrigger raises Value error for negative inactivity_period.
"""
with pytest.raises(ValueError):
S3KeysUnchangedSensorAsync(
task_id="s3_keys_unchanged_sensor",
bucket_name="test_bucket",
prefix="test",
inactivity_period=-100,
)


class TestS3PrefixSensorAsync(unittest.TestCase):
def test_s3_prefix_sensor_async(self):
Expand Down
30 changes: 30 additions & 0 deletions tests/amazon/aws/triggers/test_s3_triggers.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import asyncio
from datetime import datetime
from unittest import mock

import pytest
Expand Down Expand Up @@ -46,6 +47,18 @@ async def test_s3_key_trigger_run(mock_client):
asyncio.get_event_loop().stop()


@pytest.mark.asyncio
async def test_s3_key_trigger_run_exception():
"""
Test if the task is run is in triggerr successfully.
:return:
"""
trigger = S3KeyTrigger(bucket_key="s3://test_bucket/file", bucket_name="test_bucket")
task = [i async for i in trigger.run()]
assert len(task) == 1
assert TriggerEvent({"status": "error", "message": "Unable to locate credentials"}) in task


def test_s3_key_size_trigger_serialization():
"""
Asserts that the TaskStateTrigger correctly serializes its arguments
Expand Down Expand Up @@ -196,6 +209,23 @@ async def test_s3_keys_unchanged_trigger_run_success(mock_is_keys_unchanged, moc
assert TriggerEvent({"status": "success"}) in task


@pytest.mark.asyncio
@mock.patch("astronomer.providers.amazon.aws.triggers.s3.S3HookAsync.get_client_async")
@mock.patch("astronomer.providers.amazon.aws.triggers.s3.S3HookAsync.is_keys_unchanged")
async def test_s3_keys_unchanged_trigger_run_pending(mock_is_keys_unchanged, mock_client):
"""
Test if the task is run is in triggerer successfully.
"""
mock_is_keys_unchanged.return_value = {"status": "pending", "last_activity_time": datetime.now()}
trigger = S3KeysUnchangedTrigger(bucket_name="test_bucket", prefix="test")
task = asyncio.create_task(trigger.run().__anext__())
await asyncio.sleep(0.5)

# TriggerEvent was not returned
assert task.done() is False
asyncio.get_event_loop().stop()


def test_s3_prefix_sensor_trigger_serialization():
"""
Asserts that the S3 prefix trigger correctly serializes its arguments
Expand Down