diff --git a/src/crawlee/_types.py b/src/crawlee/_types.py index 1764f658a3..3cb84111fe 100644 --- a/src/crawlee/_types.py +++ b/src/crawlee/_types.py @@ -548,6 +548,7 @@ def __call__( url: str, *, method: HttpMethod = 'GET', + payload: HttpPayload | None = None, headers: HttpHeaders | dict[str, str] | None = None, ) -> Coroutine[None, None, HttpResponse]: """Call send request function. @@ -556,6 +557,7 @@ def __call__( url: The URL to send the request to. method: The HTTP method to use. headers: The headers to include in the request. + payload: The payload to include in the request. Returns: The HTTP response received from the server. diff --git a/src/crawlee/crawlers/_basic/_basic_crawler.py b/src/crawlee/crawlers/_basic/_basic_crawler.py index 2b427cdc97..084fb39fe9 100644 --- a/src/crawlee/crawlers/_basic/_basic_crawler.py +++ b/src/crawlee/crawlers/_basic/_basic_crawler.py @@ -31,6 +31,7 @@ BasicCrawlingContext, GetKeyValueStoreFromRequestHandlerFunction, HttpHeaders, + HttpPayload, RequestHandlerRunResult, SendRequestFunction, SkippedReason, @@ -1081,11 +1082,13 @@ async def send_request( url: str, *, method: HttpMethod = 'GET', + payload: HttpPayload | None = None, headers: HttpHeaders | dict[str, str] | None = None, ) -> HttpResponse: return await self._http_client.send_request( url=url, method=method, + payload=payload, headers=headers, session=session, proxy_info=proxy_info, diff --git a/tests/unit/crawlers/_basic/test_basic_crawler.py b/tests/unit/crawlers/_basic/test_basic_crawler.py index 4f151ad621..7e934f9f07 100644 --- a/tests/unit/crawlers/_basic/test_basic_crawler.py +++ b/tests/unit/crawlers/_basic/test_basic_crawler.py @@ -18,7 +18,7 @@ from crawlee import ConcurrencySettings, Glob, service_locator from crawlee._request import Request -from crawlee._types import BasicCrawlingContext, EnqueueLinksKwargs, HttpHeaders +from crawlee._types import BasicCrawlingContext, EnqueueLinksKwargs, HttpHeaders, HttpMethod from crawlee._utils.robots import RobotsTxtFile from crawlee.configuration import Configuration from crawlee.crawlers import BasicCrawler @@ -300,29 +300,36 @@ async def failed_request_handler(context: BasicCrawlingContext, error: Exception await crawler.run(['http://a.com/', 'http://b.com/', 'http://c.com/']) -async def test_send_request_works(server_url: URL) -> None: +@pytest.mark.parametrize( + ('method', 'path', 'payload'), + [ + pytest.param('GET', 'get', None, id='get send_request'), + pytest.param('POST', 'post', b'Hello, world!', id='post send_request'), + ], +) +async def test_send_request_works(server_url: URL, method: HttpMethod, path: str, payload: None | bytes) -> None: response_data: dict[str, Any] = {} crawler = BasicCrawler(max_request_retries=3) @crawler.router.default_handler async def handler(context: BasicCrawlingContext) -> None: - response = await context.send_request(str(server_url)) + response = await context.send_request(str(server_url / path), method=method, payload=payload) - response_data['body'] = response.read() + response_data['body'] = json.loads(response.read()) response_data['headers'] = response.headers await crawler.run(['http://a.com/', 'http://b.com/', 'http://c.com/']) response_body = response_data.get('body') assert response_body is not None - assert b'Hello, world!' in response_body + assert response_body.get('data') == (payload.decode() if payload else None) response_headers = response_data.get('headers') assert response_headers is not None content_type = response_headers.get('content-type') assert content_type is not None - assert content_type == 'text/html; charset=utf-8' + assert content_type == 'application/json' @dataclass