Skip to content

Commit

Permalink
Handle both error statuses and response actions
Browse files Browse the repository at this point in the history
  • Loading branch information
burnash committed Mar 20, 2024
1 parent b971f9b commit ebff65c
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 12 deletions.
15 changes: 14 additions & 1 deletion sources/rest_api/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,21 +183,34 @@ def paginate(
pagination logic.
data_selector (Optional[jsonpath.TJsonPath]): JSONPath selector for
extracting data from the response.
hooks (Optional[Hooks]): Hooks to modify request/response objects.
hooks (Optional[Hooks]): Hooks to modify request/response objects. Note that
when hooks are not provided, the default behavior is to raise an exception
on error status codes.
Yields:
PageData[Any]: A page of data from the paginated API response, along with request and response context.
Raises:
HTTPError: If the response status code is not a success code. This is raised
by default when hooks are not provided.
Example:
>>> client = RESTClient(base_url="https://api.example.com")
>>> for page in client.paginate("/search", method="post", json={"query": "foo"}):
>>> print(page)
"""

paginator = paginator if paginator else copy.deepcopy(self.paginator)
auth = auth or self.auth
data_selector = data_selector or self.data_selector
hooks = hooks or {}

def raise_for_status(response: Response, *args: Any, **kwargs: Any) -> None:
response.raise_for_status()

if "response" not in hooks:

This comment has been minimized.

Copy link
@willi-mueller

willi-mueller Mar 27, 2024

Collaborator

"response" seems to never be in hooks when we follow the example usage

Example:
    response_actions = [
        {"status_code": 404, "action": "ignore"},
        {"content": "Not found", "action": "ignore"},
        {"status_code": 429, "action": "retry"},
        {"status_code": 200, "content": "some text", "action": "retry"},
    ]
    ```
hooks["response"] = [raise_for_status]

This comment has been minimized.

Copy link
@willi-mueller

willi-mueller Mar 27, 2024

Collaborator

@burnash what about retrying error 429 by default? This implementation of raising it here crashes the pipeline :(


request = self._create_request(
path=path, method=method, params=params, json=json, auth=auth, hooks=hooks
)
Expand Down
30 changes: 19 additions & 11 deletions sources/rest_api/config_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,17 +221,7 @@ def find_resolved_params(endpoint_config: Endpoint) -> List[ResolvedParam]:
def _handle_response_actions(
response: Response, actions: List[ResponseAction]
) -> Optional[str]:
"""Handle response actions based on the response and the provided actions.
Example:
response_actions = [
{"status_code": 404, "action": "ignore"},
{"content": "Not found", "action": "ignore"},
{"status_code": 429, "action": "retry"},
{"status_code": 200, "content": "some text", "action": "retry"},
]
action_type = client.handle_response_actions(response, response_actions)
"""
"""Handle response actions based on the response and the provided actions."""
content = response.text

for action in actions:
Expand Down Expand Up @@ -266,12 +256,30 @@ def response_actions_hook(response: Response, *args: Any, **kwargs: Any) -> None
)
raise IgnoreResponseException

# If no action has been taken and the status code indicates an error,
# raise an HTTP error based on the response status
if not action_type and response.status_code >= 400:
response.raise_for_status()

return response_actions_hook


def create_response_hooks(
response_actions: Optional[List[ResponseAction]],
) -> Optional[Dict[str, Any]]:
"""Create response hooks based on the provided response actions. Note
that if the error status code is not handled by the response actions,
the default behavior is to raise an HTTP error.
Example:
response_actions = [
{"status_code": 404, "action": "ignore"},
{"content": "Not found", "action": "ignore"},
{"status_code": 429, "action": "retry"},
{"status_code": 200, "content": "some text", "action": "retry"},
]
hooks = create_response_hooks(response_actions)
"""
if response_actions:
return {"response": [_create_response_actions_hook(response_actions)]}
return None
8 changes: 8 additions & 0 deletions tests/rest_api/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,14 @@ def protected_bearer_token(request, context):
context.status_code = 401
return json.dumps({"error": "Unauthorized"})

@router.get("/protected/posts/bearer-token-plain-text-error")
def protected_bearer_token_plain_text_erorr(request, context):
auth = request.headers.get("Authorization")
if auth == "Bearer test-token":
return paginate_response(request, generate_posts())
context.status_code = 401
return "Unauthorized"

@router.get("/protected/posts/api-key")
def protected_api_key(request, context):
api_key = request.headers.get("x-api-key")
Expand Down
23 changes: 23 additions & 0 deletions tests/rest_api/test_rest_api_source_offline.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pytest

import dlt
from dlt.pipeline.exceptions import PipelineStepFailed
from tests.utils import assert_load_info, load_table_counts, assert_query_data

from sources.rest_api import rest_api_source
Expand Down Expand Up @@ -133,6 +134,28 @@ def test_ignoring_endpoint_returning_404(mock_api_server):
]


def test_unauthorized_access_to_protected_endpoint(mock_api_server):
pipeline = dlt.pipeline(
pipeline_name="rest_api_mock",
destination="duckdb",
dataset_name="rest_api_mock",
full_refresh=True,
)

mock_source = rest_api_source(
{
"client": {"base_url": "https://api.example.com"},
"resources": [
"/protected/posts/bearer-token-plain-text-error",
],
}
)

# TODO: Check if it's specically a 401 error
with pytest.raises(PipelineStepFailed):
pipeline.run(mock_source)


def test_posts_under_results_key(mock_api_server):
mock_source = rest_api_source(
{
Expand Down

0 comments on commit ebff65c

Please sign in to comment.