Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions src/crawlee/_browserforge_workaround.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
# ruff: noqa: N802, PLC0415
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from collections.abc import Callable


def patch_browserforge() -> None:
Expand All @@ -12,15 +16,15 @@ def patch_browserforge() -> None:
import apify_fingerprint_datapoints
from browserforge import download

download.DATA_DIRS: dict[str, Path] = { # type:ignore[misc]
download.DATA_DIRS = {
'headers': apify_fingerprint_datapoints.get_header_network().parent,
'fingerprints': apify_fingerprint_datapoints.get_fingerprint_network().parent,
}

def DownloadIfNotExists(**flags: bool) -> None:
pass

download.DownloadIfNotExists = DownloadIfNotExists # ty: ignore[invalid-assignment]
download.DownloadIfNotExists: Callable[..., None] = DownloadIfNotExists

import browserforge.bayesian_network

Expand All @@ -33,7 +37,7 @@ def __init__(self, path: Path) -> None:
path = download.DATA_DIRS['fingerprints'] / download.DATA_FILES['fingerprints'][path.name]
super().__init__(path)

browserforge.bayesian_network.BayesianNetwork = BayesianNetwork # type:ignore[misc]
browserforge.bayesian_network.BayesianNetwork: BayesianNetwork = BayesianNetwork
import browserforge.headers.generator

browserforge.headers.generator.DATA_DIR = download.DATA_DIRS['headers']
Expand Down
4 changes: 2 additions & 2 deletions src/crawlee/_utils/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import inspect
from collections.abc import Callable
from functools import wraps
from typing import Any, TypeVar
from typing import Any, TypeVar, cast

T = TypeVar('T', bound=Callable[..., Any])

Expand Down Expand Up @@ -44,4 +44,4 @@ async def async_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any:

return await method(self, *args, **kwargs)

return async_wrapper if inspect.iscoroutinefunction(method) else sync_wrapper # ty: ignore[invalid-return-type]
return cast('T', async_wrapper if inspect.iscoroutinefunction(method) else sync_wrapper)
2 changes: 1 addition & 1 deletion src/crawlee/fingerprint_suite/_browserforge_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def _get_single_browser_type(self, browser: Iterable[str | Browser] | None) -> s
class PatchedFingerprintGenerator(bf_FingerprintGenerator):
"""Browserforge `FingerprintGenerator` that contains patches not accepted in upstream repo."""

def __init__( # type:ignore[no-untyped-def] # Upstream repo types missing.
def __init__(
self,
*,
screen: Screen | None = None,
Expand Down
8 changes: 6 additions & 2 deletions src/crawlee/sessions/_cookies.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

if TYPE_CHECKING:
from collections.abc import Iterator
from typing import TypeGuard


@docs_group('Session management')
Expand Down Expand Up @@ -152,8 +153,8 @@ def _convert_cookie_to_dict(self, cookie: Cookie) -> CookieParam:
if cookie.expires:
cookie_dict['expires'] = cookie.expires

if (same_site := cookie.get_nonstandard_attr('SameSite')) and same_site in {'Lax', 'None', 'Strict'}:
cookie_dict['same_site'] = same_site # ty: ignore[invalid-assignment]
if (same_site := cookie.get_nonstandard_attr('SameSite')) and self._is_valid_same_site(same_site):
cookie_dict['same_site'] = same_site

return cookie_dict

Expand Down Expand Up @@ -274,3 +275,6 @@ def __hash__(self) -> int:
"""Return hash based on the cookies key attributes."""
cookie_tuples = frozenset((cookie.name, cookie.value, cookie.domain, cookie.path) for cookie in self._jar)
return hash(cookie_tuples)

def _is_valid_same_site(self, value: str | None) -> TypeGuard[Literal['Lax', 'None', 'Strict']]:
return value in {'Lax', 'None', 'Strict'}
2 changes: 1 addition & 1 deletion tests/unit/crawlers/_playwright/test_playwright_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ async def test_save_cookies_after_handler_processing(server_url: URL) -> None:
@crawler.router.default_handler
async def request_handler(context: PlaywrightCrawlingContext) -> None:
# Simulate cookies installed from an external source in the browser
await context.page.context.add_cookies([{'name': 'check', 'value': 'test', 'url': str(server_url)}]) # ty: ignore[invalid-argument-type]
await context.page.context.add_cookies([{'name': 'check', 'value': 'test', 'url': str(server_url)}])

if context.session:
session_ids.append(context.session.id)
Expand Down
Loading