Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix for multiple ignores is not working #1956

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 14 additions & 7 deletions dlt/sources/helpers/rest_client/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,13 @@ def __init__(
self.auth = auth

if session:
# dlt.sources.helpers.requests.session.Session
# has raise_for_status=True by default
# If the `session` is provided (for example, an instance of
# dlt.sources.helpers.requests.session.Session), warn if
# it has raise_for_status=True by default
self.session = _warn_if_raise_for_status_and_return(session)
else:
# Otherwise, create a new Client with disabled raise_for_status
# to allow for custom error handling in the hooks
from dlt.sources.helpers.requests.retry import Client

self.session = Client(raise_for_status=False).session
Expand Down Expand Up @@ -182,9 +185,9 @@ def paginate(
**kwargs (Any): Optional arguments to that the Request library accepts, such as
`stream`, `verify`, `proxies`, `cert`, `timeout`, and `allow_redirects`.


Yields:
PageData[Any]: A page of data from the paginated API response, along with request and response context.
PageData[Any]: A page of data from the paginated API response, along with request
and response context.

Raises:
HTTPError: If the response status code is not a success code. This is raised
Expand All @@ -200,9 +203,9 @@ def paginate(
data_selector = data_selector or self.data_selector
hooks = hooks or {}

def raise_for_status(response: Response, *args: Any, **kwargs: Any) -> None:
response.raise_for_status()

# Add the raise_for_status hook to ensure an exception is raised on
# HTTP error status codes. This is a fallback to handle errors
# unless explicitly overridden in the provided hooks.
if "response" not in hooks:
hooks["response"] = [raise_for_status]

Expand Down Expand Up @@ -305,6 +308,10 @@ def detect_paginator(self, response: Response, data: Any) -> BasePaginator:
return paginator


def raise_for_status(response: Response, *args: Any, **kwargs: Any) -> None:
response.raise_for_status()


def _warn_if_raise_for_status_and_return(session: BaseSession) -> BaseSession:
"""A generic function to warn if the session has raise_for_status enabled."""
if getattr(session, "raise_for_status", False):
Expand Down
10 changes: 3 additions & 7 deletions dlt/sources/rest_api/config_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
APIKeyAuth,
OAuth2ClientCredentials,
)
from dlt.sources.helpers.rest_client.client import raise_for_status

from dlt.extract.resource import DltResource

Expand Down Expand Up @@ -530,12 +531,6 @@ def response_action_hook(response: Response, *args: Any, **kwargs: Any) -> None:
)
raise IgnoreResponseException

# If there are hooks, then the REST client does not raise for status
# If no action has been taken and the status code indicates an error,
# raise an HTTP error based on the response status
elif not action_type:
response.raise_for_status()

return response_action_hook


Expand Down Expand Up @@ -570,7 +565,8 @@ def remove_field(response: Response, *args, **kwargs) -> Response:
"""
if response_actions:
hooks = [_create_response_action_hook(action) for action in response_actions]
return {"response": hooks}
fallback_hooks = [raise_for_status]
return {"response": hooks + fallback_hooks}
return None


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,14 @@ def custom_hook(response, *args, **kwargs):
{"status_code": 200, "content": "some text", "action": "ignore"},
]
hooks = create_response_hooks(response_actions)
assert len(hooks["response"]) == 4
assert len(hooks["response"]) == 5

response_actions_2: List[ResponseAction] = [
custom_hook,
{"status_code": 200, "action": custom_hook},
]
hooks_2 = create_response_hooks(response_actions_2)
assert len(hooks_2["response"]) == 2
assert len(hooks_2["response"]) == 3


def test_response_action_raises_type_error(mocker):
Expand Down
16 changes: 15 additions & 1 deletion tests/sources/rest_api/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,21 @@ def post_detail_404(request, context):
return {"id": post_id, "body": f"Post body {post_id}"}
else:
context.status_code = 404
return {"error": "Post not found"}
return {"error": f"Post with id {post_id} not found"}

@router.get(r"/posts/\d+/some_details_404_others_422")
def post_detail_404_422(request, context):
"""Return 404 No Content for post with id 1. Return 422 for post with id > 1.
Used to test ignoring 404 and 422 responses."""
post_id = int(request.url.split("/")[-2])
if post_id < 1:
return {"id": post_id, "body": f"Post body {post_id}"}
elif post_id == 1:
context.status_code = 404
return {"error": f"Post with id {post_id} not found"}
else:
context.status_code = 422
return None

@router.get(r"/posts/\d+/some_details_204")
def post_detail_204(request, context):
Expand Down
82 changes: 1 addition & 81 deletions tests/sources/rest_api/integration/test_offline.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,86 +100,6 @@ def test_load_mock_api(mock_api_server):
)


def test_ignoring_endpoint_returning_404(mock_api_server):
mock_source = rest_api_source(
{
"client": {"base_url": "https://api.example.com"},
"resources": [
"posts",
{
"name": "post_details",
"endpoint": {
"path": "posts/{post_id}/some_details_404",
"params": {
"post_id": {
"type": "resolve",
"resource": "posts",
"field": "id",
}
},
"response_actions": [
{
"status_code": 404,
"action": "ignore",
},
],
},
},
],
}
)

res = list(mock_source.with_resources("posts", "post_details").add_limit(1))

assert res[:5] == [
{"id": 0, "body": "Post body 0"},
{"id": 0, "title": "Post 0"},
{"id": 1, "title": "Post 1"},
{"id": 2, "title": "Post 2"},
{"id": 3, "title": "Post 3"},
]


def test_ignoring_endpoint_returning_204(mock_api_server):
mock_source = rest_api_source(
{
"client": {"base_url": "https://api.example.com"},
"resources": [
"posts",
{
"name": "post_details",
"endpoint": {
"path": "posts/{post_id}/some_details_204",
"params": {
"post_id": {
"type": "resolve",
"resource": "posts",
"field": "id",
}
},
"response_actions": [
{
"status_code": 204,
"action": "ignore",
},
],
},
},
],
}
)

res = list(mock_source.with_resources("posts", "post_details").add_limit(1))

assert res[:5] == [
{"id": 0, "body": "Post body 0"},
{"id": 0, "title": "Post 0"},
{"id": 1, "title": "Post 1"},
{"id": 2, "title": "Post 2"},
{"id": 3, "title": "Post 3"},
]


def test_source_with_post_request(mock_api_server):
class JSONBodyPageCursorPaginator(BaseReferencePaginator):
def update_state(self, response: Response, data: Optional[List[Any]] = None) -> None:
Expand Down Expand Up @@ -369,7 +289,7 @@ def test_posts_with_inremental_date_conversion(mock_api_server) -> None:
assert called_kwargs["path"] == "posts"


def test_multiple_response_actions_on_every_response(mock_api_server, mocker):
def test_custom_session_is_used(mock_api_server, mocker):
class CustomSession(Session):
pass

Expand Down
Loading
Loading