diff --git a/src/crawlee/_browserforge_workaround.py b/src/crawlee/_browserforge_workaround.py index 8e8dcceca4..ed84e06baa 100644 --- a/src/crawlee/_browserforge_workaround.py +++ b/src/crawlee/_browserforge_workaround.py @@ -1,4 +1,8 @@ # ruff: noqa: N802, PLC0415 +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from collections.abc import Callable def patch_browserforge() -> None: @@ -12,7 +16,7 @@ def patch_browserforge() -> None: import apify_fingerprint_datapoints from browserforge import download - download.DATA_DIRS: dict[str, Path] = { # type:ignore[misc] + download.DATA_DIRS = { 'headers': apify_fingerprint_datapoints.get_header_network().parent, 'fingerprints': apify_fingerprint_datapoints.get_fingerprint_network().parent, } @@ -20,7 +24,7 @@ def patch_browserforge() -> None: def DownloadIfNotExists(**flags: bool) -> None: pass - download.DownloadIfNotExists = DownloadIfNotExists # ty: ignore[invalid-assignment] + download.DownloadIfNotExists: Callable[..., None] = DownloadIfNotExists import browserforge.bayesian_network @@ -33,7 +37,7 @@ def __init__(self, path: Path) -> None: path = download.DATA_DIRS['fingerprints'] / download.DATA_FILES['fingerprints'][path.name] super().__init__(path) - browserforge.bayesian_network.BayesianNetwork = BayesianNetwork # type:ignore[misc] + browserforge.bayesian_network.BayesianNetwork: BayesianNetwork = BayesianNetwork import browserforge.headers.generator browserforge.headers.generator.DATA_DIR = download.DATA_DIRS['headers'] diff --git a/src/crawlee/_utils/context.py b/src/crawlee/_utils/context.py index 6f3a65094b..9faa994376 100644 --- a/src/crawlee/_utils/context.py +++ b/src/crawlee/_utils/context.py @@ -3,7 +3,7 @@ import inspect from collections.abc import Callable from functools import wraps -from typing import Any, TypeVar +from typing import Any, TypeVar, cast T = TypeVar('T', bound=Callable[..., Any]) @@ -44,4 +44,4 @@ async def async_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any: return await method(self, *args, **kwargs) - return async_wrapper if inspect.iscoroutinefunction(method) else sync_wrapper # ty: ignore[invalid-return-type] + return cast('T', async_wrapper if inspect.iscoroutinefunction(method) else sync_wrapper) diff --git a/src/crawlee/fingerprint_suite/_browserforge_adapter.py b/src/crawlee/fingerprint_suite/_browserforge_adapter.py index 1b39da0006..98460abb00 100644 --- a/src/crawlee/fingerprint_suite/_browserforge_adapter.py +++ b/src/crawlee/fingerprint_suite/_browserforge_adapter.py @@ -154,7 +154,7 @@ def _get_single_browser_type(self, browser: Iterable[str | Browser] | None) -> s class PatchedFingerprintGenerator(bf_FingerprintGenerator): """Browserforge `FingerprintGenerator` that contains patches not accepted in upstream repo.""" - def __init__( # type:ignore[no-untyped-def] # Upstream repo types missing. + def __init__( self, *, screen: Screen | None = None, diff --git a/src/crawlee/sessions/_cookies.py b/src/crawlee/sessions/_cookies.py index f4878a7d78..4af98faf50 100644 --- a/src/crawlee/sessions/_cookies.py +++ b/src/crawlee/sessions/_cookies.py @@ -10,6 +10,7 @@ if TYPE_CHECKING: from collections.abc import Iterator + from typing import TypeGuard @docs_group('Session management') @@ -152,8 +153,8 @@ def _convert_cookie_to_dict(self, cookie: Cookie) -> CookieParam: if cookie.expires: cookie_dict['expires'] = cookie.expires - if (same_site := cookie.get_nonstandard_attr('SameSite')) and same_site in {'Lax', 'None', 'Strict'}: - cookie_dict['same_site'] = same_site # ty: ignore[invalid-assignment] + if (same_site := cookie.get_nonstandard_attr('SameSite')) and self._is_valid_same_site(same_site): + cookie_dict['same_site'] = same_site return cookie_dict @@ -274,3 +275,6 @@ def __hash__(self) -> int: """Return hash based on the cookies key attributes.""" cookie_tuples = frozenset((cookie.name, cookie.value, cookie.domain, cookie.path) for cookie in self._jar) return hash(cookie_tuples) + + def _is_valid_same_site(self, value: str | None) -> TypeGuard[Literal['Lax', 'None', 'Strict']]: + return value in {'Lax', 'None', 'Strict'} diff --git a/tests/unit/crawlers/_playwright/test_playwright_crawler.py b/tests/unit/crawlers/_playwright/test_playwright_crawler.py index db60867b44..1ecdb8859b 100644 --- a/tests/unit/crawlers/_playwright/test_playwright_crawler.py +++ b/tests/unit/crawlers/_playwright/test_playwright_crawler.py @@ -425,7 +425,7 @@ async def test_save_cookies_after_handler_processing(server_url: URL) -> None: @crawler.router.default_handler async def request_handler(context: PlaywrightCrawlingContext) -> None: # Simulate cookies installed from an external source in the browser - await context.page.context.add_cookies([{'name': 'check', 'value': 'test', 'url': str(server_url)}]) # ty: ignore[invalid-argument-type] + await context.page.context.add_cookies([{'name': 'check', 'value': 'test', 'url': str(server_url)}]) if context.session: session_ids.append(context.session.id)