diff --git a/pyproject.toml b/pyproject.toml index 462a958e97..20c56a9cba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,7 +40,7 @@ dependencies = [ "protego>=0.5.0", "psutil>=6.0.0", "pydantic-settings>=2.2.0,!=2.7.0,!=2.7.1,!=2.8.0", - "pydantic>=2.11.0,<2.12.0", + "pydantic>=2.11.0", "pyee>=9.0.0", "tldextract>=5.1.0", "typing-extensions>=4.1.0", diff --git a/src/crawlee/_request.py b/src/crawlee/_request.py index 1520dc5c17..b74a850b01 100644 --- a/src/crawlee/_request.py +++ b/src/crawlee/_request.py @@ -185,9 +185,6 @@ class Request(BaseModel): method: HttpMethod = 'GET' """HTTP request method.""" - headers: Annotated[HttpHeaders, Field(default_factory=HttpHeaders)] = HttpHeaders() - """HTTP request headers.""" - payload: Annotated[ HttpPayload | None, BeforeValidator(lambda v: v.encode() if isinstance(v, str) else v), @@ -195,23 +192,37 @@ class Request(BaseModel): ] = None """HTTP request payload.""" - user_data: Annotated[ - dict[str, JsonSerializable], # Internally, the model contains `UserData`, this is just for convenience - Field(alias='userData', default_factory=lambda: UserData()), - PlainValidator(user_data_adapter.validate_python), - PlainSerializer( - lambda instance: user_data_adapter.dump_python( - instance, - by_alias=True, - exclude_none=True, - exclude_unset=True, - exclude_defaults=True, - ) - ), - ] = {} - """Custom user data assigned to the request. Use this to save any request related data to the - request's scope, keeping them accessible on retries, failures etc. - """ + # Workaround for pydantic 2.12 and mypy type checking issue for Annotated with default_factory + if TYPE_CHECKING: + headers: HttpHeaders = HttpHeaders() + """HTTP request headers.""" + + user_data: dict[str, JsonSerializable] = {} + """Custom user data assigned to the request. Use this to save any request related data to the + request's scope, keeping them accessible on retries, failures etc. + """ + + else: + headers: Annotated[HttpHeaders, Field(default_factory=HttpHeaders)] + """HTTP request headers.""" + + user_data: Annotated[ + dict[str, JsonSerializable], # Internally, the model contains `UserData`, this is just for convenience + Field(alias='userData', default_factory=lambda: UserData()), + PlainValidator(user_data_adapter.validate_python), + PlainSerializer( + lambda instance: user_data_adapter.dump_python( + instance, + by_alias=True, + exclude_none=True, + exclude_unset=True, + exclude_defaults=True, + ) + ), + ] + """Custom user data assigned to the request. Use this to save any request related data to the + request's scope, keeping them accessible on retries, failures etc. + """ retry_count: Annotated[int, Field(alias='retryCount')] = 0 """Number of times the request has been retried.""" diff --git a/src/crawlee/_types.py b/src/crawlee/_types.py index 73a28b1dd1..008a7fcf6a 100644 --- a/src/crawlee/_types.py +++ b/src/crawlee/_types.py @@ -3,17 +3,7 @@ import dataclasses from collections.abc import Callable, Iterator, Mapping from dataclasses import dataclass -from typing import ( - TYPE_CHECKING, - Annotated, - Any, - Literal, - Protocol, - TypedDict, - TypeVar, - cast, - overload, -) +from typing import TYPE_CHECKING, Annotated, Any, Literal, Protocol, TypedDict, TypeVar, cast, overload from pydantic import ConfigDict, Field, PlainValidator, RootModel @@ -71,11 +61,15 @@ class HttpHeaders(RootModel, Mapping[str, str]): model_config = ConfigDict(validate_by_name=True, validate_by_alias=True) - root: Annotated[ - dict[str, str], - PlainValidator(lambda value: _normalize_headers(value)), - Field(default_factory=dict), - ] = {} + # Workaround for pydantic 2.12 and mypy type checking issue for Annotated with default_factory + if TYPE_CHECKING: + root: dict[str, str] = {} + else: + root: Annotated[ + dict[str, str], + PlainValidator(lambda value: _normalize_headers(value)), + Field(default_factory=dict), + ] def __getitem__(self, key: str) -> str: return self.root[key.lower()] diff --git a/uv.lock b/uv.lock index dbc7199a70..a1d46629aa 100644 --- a/uv.lock +++ b/uv.lock @@ -832,7 +832,7 @@ requires-dist = [ { name = "playwright", marker = "extra == 'playwright'", specifier = ">=1.27.0" }, { name = "protego", specifier = ">=0.5.0" }, { name = "psutil", specifier = ">=6.0.0" }, - { name = "pydantic", specifier = ">=2.11.0,<2.12.0" }, + { name = "pydantic", specifier = ">=2.11.0" }, { name = "pydantic-settings", specifier = ">=2.2.0,!=2.7.0,!=2.7.1,!=2.8.0" }, { name = "pyee", specifier = ">=9.0.0" }, { name = "rich", marker = "extra == 'cli'", specifier = ">=13.9.0" },