diff --git a/.github/workflows/quality_check_pydanticv2.yml b/.github/workflows/quality_check_pydanticv2.yml new file mode 100644 index 00000000000..435ee5df868 --- /dev/null +++ b/.github/workflows/quality_check_pydanticv2.yml @@ -0,0 +1,76 @@ +name: Code quality - Pydanticv2 + +# PROCESS +# +# 1. Install all dependencies and spin off containers for all supported Python versions +# 2. Run code formatters and linters (various checks) for code standard +# 3. Run static typing checker for potential bugs +# 4. Run entire test suite for regressions except end-to-end (unit, functional, performance) +# 5. Run static analysis (in addition to CodeQL) for common insecure code practices +# 6. Run complexity baseline to avoid error-prone bugs and keep maintenance lower +# 7. Collect and report on test coverage + +# USAGE +# +# Always triggered on new PRs, PR changes and PR merge. + + +on: + pull_request: + paths: + - "aws_lambda_powertools/**" + - "tests/**" + - "pyproject.toml" + - "poetry.lock" + - "mypy.ini" + branches: + - develop + push: + paths: + - "aws_lambda_powertools/**" + - "tests/**" + - "pyproject.toml" + - "poetry.lock" + - "mypy.ini" + branches: + - develop + +permissions: + contents: read + +jobs: + quality_check: + runs-on: ubuntu-latest + strategy: + max-parallel: 4 + matrix: + python-version: ["3.7", "3.8", "3.9", "3.10"] + env: + PYTHON: "${{ matrix.python-version }}" + permissions: + contents: read # checkout code only + steps: + - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # v3.5.3 + - name: Install poetry + run: pipx install poetry + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@bd6b4b6205c4dbad673328db7b31b7fab9e241c0 # v4.6.1 + with: + python-version: ${{ matrix.python-version }} + cache: "poetry" + - name: Removing dev dependencies locked to Pydantic v1 + run: poetry remove cfn-lint + - name: Replacing Pydantic v1 with v2 > 2.0.3 + run: poetry add "pydantic=^2.0.3" + - name: Install dependencies + run: make dev + - name: Formatting and Linting + run: make lint + - name: Static type checking + run: make mypy + - name: Test with pytest + run: make test + - name: Security baseline + run: make security-baseline + - name: Complexity baseline + run: make complexity-baseline diff --git a/aws_lambda_powertools/utilities/batch/base.py b/aws_lambda_powertools/utilities/batch/base.py index 4ab2c1a2b0b..b00b31449f2 100644 --- a/aws_lambda_powertools/utilities/batch/base.py +++ b/aws_lambda_powertools/utilities/batch/base.py @@ -348,6 +348,11 @@ def _to_batch_type(self, record: dict, event_type: EventType) -> EventSourceData def _to_batch_type(self, record: dict, event_type: EventType, model: Optional["BatchTypeModels"] = None): if model is not None: + # If a model is provided, we assume Pydantic is installed and we need to disable v2 warnings + from aws_lambda_powertools.utilities.parser.compat import disable_pydantic_v2_warning + + disable_pydantic_v2_warning() + return model.parse_obj(record) return self._DATA_CLASS_MAPPING[event_type](record) @@ -500,8 +505,13 @@ def _process_record(self, record: dict) -> Union[SuccessResponse, FailureRespons # we need to handle that exception differently. # We check for a public attr in validation errors coming from Pydantic exceptions (subclass or not) # and we compare if it's coming from the same model that trigger the exception in the first place - model = getattr(exc, "model", None) - if model == self.model: + + # Pydantic v1 raises a ValidationError with ErrorWrappers and store the model instance in a class variable. + # Pydantic v2 simplifies this by adding a title variable to store the model name directly. + model = getattr(exc, "model", None) or getattr(exc, "title", None) + model_name = getattr(self.model, "__name__", None) + + if model == self.model or model == model_name: return self._register_model_validation_error_record(record) return self.failure_handler(record=data, exception=sys.exc_info()) @@ -644,8 +654,13 @@ async def _async_process_record(self, record: dict) -> Union[SuccessResponse, Fa # we need to handle that exception differently. # We check for a public attr in validation errors coming from Pydantic exceptions (subclass or not) # and we compare if it's coming from the same model that trigger the exception in the first place - model = getattr(exc, "model", None) - if model == self.model: + + # Pydantic v1 raises a ValidationError with ErrorWrappers and store the model instance in a class variable. + # Pydantic v2 simplifies this by adding a title variable to store the model name directly. + model = getattr(exc, "model", None) or getattr(exc, "title", None) + model_name = getattr(self.model, "__name__", None) + + if model == self.model or model == model_name: return self._register_model_validation_error_record(record) return self.failure_handler(record=data, exception=sys.exc_info()) diff --git a/aws_lambda_powertools/utilities/parser/compat.py b/aws_lambda_powertools/utilities/parser/compat.py new file mode 100644 index 00000000000..c73098421b1 --- /dev/null +++ b/aws_lambda_powertools/utilities/parser/compat.py @@ -0,0 +1,34 @@ +import functools + + +@functools.lru_cache(maxsize=None) +def disable_pydantic_v2_warning(): + """ + Disables the Pydantic version 2 warning by filtering out the related warnings. + + This function checks the version of Pydantic currently installed and if it is version 2, + it filters out the PydanticDeprecationWarning and PydanticDeprecatedSince20 warnings + to suppress them. + + Since we only need to run the code once, we are using lru_cache to improve performance. + + Note: This function assumes that Pydantic is installed. + + Usage: + disable_pydantic_v2_warning() + """ + try: + from pydantic import __version__ + + version = __version__.split(".") + + if int(version[0]) == 2: + import warnings + + from pydantic import PydanticDeprecatedSince20, PydanticDeprecationWarning + + warnings.filterwarnings("ignore", category=PydanticDeprecationWarning) + warnings.filterwarnings("ignore", category=PydanticDeprecatedSince20) + + except ImportError: + pass diff --git a/aws_lambda_powertools/utilities/parser/envelopes/base.py b/aws_lambda_powertools/utilities/parser/envelopes/base.py index 85486fdd876..101e157ef69 100644 --- a/aws_lambda_powertools/utilities/parser/envelopes/base.py +++ b/aws_lambda_powertools/utilities/parser/envelopes/base.py @@ -2,6 +2,7 @@ from abc import ABC, abstractmethod from typing import Any, Dict, Optional, Type, TypeVar, Union +from aws_lambda_powertools.utilities.parser.compat import disable_pydantic_v2_warning from aws_lambda_powertools.utilities.parser.types import Model logger = logging.getLogger(__name__) @@ -26,6 +27,8 @@ def _parse(data: Optional[Union[Dict[str, Any], Any]], model: Type[Model]) -> Un Any Parsed data """ + disable_pydantic_v2_warning() + if data is None: logger.debug("Skipping parsing as event is None") return data diff --git a/aws_lambda_powertools/utilities/parser/models/__init__.py b/aws_lambda_powertools/utilities/parser/models/__init__.py index ddc76dc7819..f1b2d30d9cf 100644 --- a/aws_lambda_powertools/utilities/parser/models/__init__.py +++ b/aws_lambda_powertools/utilities/parser/models/__init__.py @@ -1,3 +1,7 @@ +from aws_lambda_powertools.utilities.parser.compat import disable_pydantic_v2_warning + +disable_pydantic_v2_warning() + from .alb import AlbModel, AlbRequestContext, AlbRequestContextData from .apigw import ( APIGatewayEventAuthorizer, diff --git a/aws_lambda_powertools/utilities/parser/models/apigw.py b/aws_lambda_powertools/utilities/parser/models/apigw.py index 82a3a6188d2..c17b094d0c0 100644 --- a/aws_lambda_powertools/utilities/parser/models/apigw.py +++ b/aws_lambda_powertools/utilities/parser/models/apigw.py @@ -21,74 +21,74 @@ class ApiGatewayUserCert(BaseModel): class APIGatewayEventIdentity(BaseModel): - accessKey: Optional[str] - accountId: Optional[str] - apiKey: Optional[str] - apiKeyId: Optional[str] - caller: Optional[str] - cognitoAuthenticationProvider: Optional[str] - cognitoAuthenticationType: Optional[str] - cognitoIdentityId: Optional[str] - cognitoIdentityPoolId: Optional[str] - principalOrgId: Optional[str] + accessKey: Optional[str] = None + accountId: Optional[str] = None + apiKey: Optional[str] = None + apiKeyId: Optional[str] = None + caller: Optional[str] = None + cognitoAuthenticationProvider: Optional[str] = None + cognitoAuthenticationType: Optional[str] = None + cognitoIdentityId: Optional[str] = None + cognitoIdentityPoolId: Optional[str] = None + principalOrgId: Optional[str] = None # see #1562, temp workaround until API Gateway fixes it the Test button payload # removing it will not be considered a regression in the future sourceIp: Union[IPvAnyNetwork, Literal["test-invoke-source-ip"]] - user: Optional[str] - userAgent: Optional[str] - userArn: Optional[str] - clientCert: Optional[ApiGatewayUserCert] + user: Optional[str] = None + userAgent: Optional[str] = None + userArn: Optional[str] = None + clientCert: Optional[ApiGatewayUserCert] = None class APIGatewayEventAuthorizer(BaseModel): - claims: Optional[Dict[str, Any]] - scopes: Optional[List[str]] + claims: Optional[Dict[str, Any]] = None + scopes: Optional[List[str]] = None class APIGatewayEventRequestContext(BaseModel): accountId: str apiId: str - authorizer: Optional[APIGatewayEventAuthorizer] + authorizer: Optional[APIGatewayEventAuthorizer] = None stage: str protocol: str identity: APIGatewayEventIdentity requestId: str requestTime: str requestTimeEpoch: datetime - resourceId: Optional[str] + resourceId: Optional[str] = None resourcePath: str - domainName: Optional[str] - domainPrefix: Optional[str] - extendedRequestId: Optional[str] + domainName: Optional[str] = None + domainPrefix: Optional[str] = None + extendedRequestId: Optional[str] = None httpMethod: Literal["DELETE", "GET", "HEAD", "OPTIONS", "PATCH", "POST", "PUT"] path: str - connectedAt: Optional[datetime] - connectionId: Optional[str] - eventType: Optional[Literal["CONNECT", "MESSAGE", "DISCONNECT"]] - messageDirection: Optional[str] - messageId: Optional[str] - routeKey: Optional[str] - operationName: Optional[str] + connectedAt: Optional[datetime] = None + connectionId: Optional[str] = None + eventType: Optional[Literal["CONNECT", "MESSAGE", "DISCONNECT"]] = None + messageDirection: Optional[str] = None + messageId: Optional[str] = None + routeKey: Optional[str] = None + operationName: Optional[str] = None - @root_validator(allow_reuse=True) + @root_validator(allow_reuse=True, skip_on_failure=True) def check_message_id(cls, values): message_id, event_type = values.get("messageId"), values.get("eventType") if message_id is not None and event_type != "MESSAGE": - raise TypeError("messageId is available only when the `eventType` is `MESSAGE`") + raise ValueError("messageId is available only when the `eventType` is `MESSAGE`") return values class APIGatewayProxyEventModel(BaseModel): - version: Optional[str] + version: Optional[str] = None resource: str path: str httpMethod: Literal["DELETE", "GET", "HEAD", "OPTIONS", "PATCH", "POST", "PUT"] headers: Dict[str, str] multiValueHeaders: Dict[str, List[str]] - queryStringParameters: Optional[Dict[str, str]] - multiValueQueryStringParameters: Optional[Dict[str, List[str]]] + queryStringParameters: Optional[Dict[str, str]] = None + multiValueQueryStringParameters: Optional[Dict[str, List[str]]] = None requestContext: APIGatewayEventRequestContext - pathParameters: Optional[Dict[str, str]] - stageVariables: Optional[Dict[str, str]] + pathParameters: Optional[Dict[str, str]] = None + stageVariables: Optional[Dict[str, str]] = None isBase64Encoded: bool - body: Optional[Union[str, Type[BaseModel]]] + body: Optional[Union[str, Type[BaseModel]]] = None diff --git a/aws_lambda_powertools/utilities/parser/models/apigwv2.py b/aws_lambda_powertools/utilities/parser/models/apigwv2.py index cb1f830bb47..3be793dd951 100644 --- a/aws_lambda_powertools/utilities/parser/models/apigwv2.py +++ b/aws_lambda_powertools/utilities/parser/models/apigwv2.py @@ -14,13 +14,13 @@ class RequestContextV2AuthorizerIamCognito(BaseModel): class RequestContextV2AuthorizerIam(BaseModel): - accessKey: Optional[str] - accountId: Optional[str] - callerId: Optional[str] - principalOrgId: Optional[str] - userArn: Optional[str] - userId: Optional[str] - cognitoIdentity: Optional[RequestContextV2AuthorizerIamCognito] + accessKey: Optional[str] = None + accountId: Optional[str] = None + callerId: Optional[str] = None + principalOrgId: Optional[str] = None + userArn: Optional[str] = None + userId: Optional[str] = None + cognitoIdentity: Optional[RequestContextV2AuthorizerIamCognito] = None class RequestContextV2AuthorizerJwt(BaseModel): @@ -29,8 +29,8 @@ class RequestContextV2AuthorizerJwt(BaseModel): class RequestContextV2Authorizer(BaseModel): - jwt: Optional[RequestContextV2AuthorizerJwt] - iam: Optional[RequestContextV2AuthorizerIam] + jwt: Optional[RequestContextV2AuthorizerJwt] = None + iam: Optional[RequestContextV2AuthorizerIam] = None lambda_value: Optional[Dict[str, Any]] = Field(None, alias="lambda") @@ -45,7 +45,7 @@ class RequestContextV2Http(BaseModel): class RequestContextV2(BaseModel): accountId: str apiId: str - authorizer: Optional[RequestContextV2Authorizer] + authorizer: Optional[RequestContextV2Authorizer] = None domainName: str domainPrefix: str requestId: str @@ -61,11 +61,11 @@ class APIGatewayProxyEventV2Model(BaseModel): routeKey: str rawPath: str rawQueryString: str - cookies: Optional[List[str]] + cookies: Optional[List[str]] = None headers: Dict[str, str] - queryStringParameters: Optional[Dict[str, str]] - pathParameters: Optional[Dict[str, str]] - stageVariables: Optional[Dict[str, str]] + queryStringParameters: Optional[Dict[str, str]] = None + pathParameters: Optional[Dict[str, str]] = None + stageVariables: Optional[Dict[str, str]] = None requestContext: RequestContextV2 - body: Optional[Union[str, Type[BaseModel]]] + body: Optional[Union[str, Type[BaseModel]]] = None isBase64Encoded: bool diff --git a/aws_lambda_powertools/utilities/parser/models/dynamodb.py b/aws_lambda_powertools/utilities/parser/models/dynamodb.py index 7a12bf195d3..679952a7181 100644 --- a/aws_lambda_powertools/utilities/parser/models/dynamodb.py +++ b/aws_lambda_powertools/utilities/parser/models/dynamodb.py @@ -7,10 +7,10 @@ class DynamoDBStreamChangedRecordModel(BaseModel): - ApproximateCreationDateTime: Optional[date] + ApproximateCreationDateTime: Optional[date] = None Keys: Dict[str, Dict[str, Any]] - NewImage: Optional[Union[Dict[str, Any], Type[BaseModel], BaseModel]] - OldImage: Optional[Union[Dict[str, Any], Type[BaseModel], BaseModel]] + NewImage: Optional[Union[Dict[str, Any], Type[BaseModel], BaseModel]] = None + OldImage: Optional[Union[Dict[str, Any], Type[BaseModel], BaseModel]] = None SequenceNumber: str SizeBytes: int StreamViewType: Literal["NEW_AND_OLD_IMAGES", "KEYS_ONLY", "NEW_IMAGE", "OLD_IMAGE"] @@ -40,7 +40,7 @@ class DynamoDBStreamRecordModel(BaseModel): awsRegion: str eventSourceARN: str dynamodb: DynamoDBStreamChangedRecordModel - userIdentity: Optional[UserIdentity] + userIdentity: Optional[UserIdentity] = None class DynamoDBStreamModel(BaseModel): diff --git a/aws_lambda_powertools/utilities/parser/models/kafka.py b/aws_lambda_powertools/utilities/parser/models/kafka.py index d4c36bf70f1..1d9d8114e65 100644 --- a/aws_lambda_powertools/utilities/parser/models/kafka.py +++ b/aws_lambda_powertools/utilities/parser/models/kafka.py @@ -19,8 +19,8 @@ class KafkaRecordModel(BaseModel): value: Union[str, Type[BaseModel]] headers: List[Dict[str, bytes]] - # validators - _decode_key = validator("key", allow_reuse=True)(base64_decode) + # Added type ignore to keep compatibility between Pydantic v1 and v2 + _decode_key = validator("key", allow_reuse=True)(base64_decode) # type: ignore[type-var, unused-ignore] @validator("value", pre=True, allow_reuse=True) def data_base64_decode(cls, value): diff --git a/aws_lambda_powertools/utilities/parser/models/kinesis_firehose.py b/aws_lambda_powertools/utilities/parser/models/kinesis_firehose.py index c59d8c680e5..7edc0ba4ebf 100644 --- a/aws_lambda_powertools/utilities/parser/models/kinesis_firehose.py +++ b/aws_lambda_powertools/utilities/parser/models/kinesis_firehose.py @@ -17,7 +17,7 @@ class KinesisFirehoseRecord(BaseModel): data: Union[bytes, Type[BaseModel]] # base64 encoded str is parsed into bytes recordId: str approximateArrivalTimestamp: PositiveInt - kinesisRecordMetadata: Optional[KinesisFirehoseRecordMetadata] + kinesisRecordMetadata: Optional[KinesisFirehoseRecordMetadata] = None @validator("data", pre=True, allow_reuse=True) def data_base64_decode(cls, value): @@ -28,5 +28,5 @@ class KinesisFirehoseModel(BaseModel): invocationId: str deliveryStreamArn: str region: str - sourceKinesisStreamArn: Optional[str] + sourceKinesisStreamArn: Optional[str] = None records: List[KinesisFirehoseRecord] diff --git a/aws_lambda_powertools/utilities/parser/models/kinesis_firehose_sqs.py b/aws_lambda_powertools/utilities/parser/models/kinesis_firehose_sqs.py index b649828853b..58a23e5006c 100644 --- a/aws_lambda_powertools/utilities/parser/models/kinesis_firehose_sqs.py +++ b/aws_lambda_powertools/utilities/parser/models/kinesis_firehose_sqs.py @@ -13,7 +13,7 @@ class KinesisFirehoseSqsRecord(BaseModel): data: SqsRecordModel recordId: str approximateArrivalTimestamp: PositiveInt - kinesisRecordMetadata: Optional[KinesisFirehoseRecordMetadata] + kinesisRecordMetadata: Optional[KinesisFirehoseRecordMetadata] = None @validator("data", pre=True, allow_reuse=True) def data_base64_decode(cls, value): @@ -25,5 +25,5 @@ class KinesisFirehoseSqsModel(BaseModel): invocationId: str deliveryStreamArn: str region: str - sourceKinesisStreamArn: Optional[str] + sourceKinesisStreamArn: Optional[str] = None records: List[KinesisFirehoseSqsRecord] diff --git a/aws_lambda_powertools/utilities/parser/models/s3.py b/aws_lambda_powertools/utilities/parser/models/s3.py index 01573b6d751..db6c41d30f3 100644 --- a/aws_lambda_powertools/utilities/parser/models/s3.py +++ b/aws_lambda_powertools/utilities/parser/models/s3.py @@ -45,10 +45,10 @@ class S3Bucket(BaseModel): class S3Object(BaseModel): key: str - size: Optional[NonNegativeFloat] - eTag: Optional[str] + size: Optional[NonNegativeFloat] = None + eTag: Optional[str] = None sequencer: str - versionId: Optional[str] + versionId: Optional[str] = None class S3Message(BaseModel): @@ -60,10 +60,10 @@ class S3Message(BaseModel): class S3EventNotificationObjectModel(BaseModel): key: str - size: Optional[NonNegativeFloat] + size: Optional[NonNegativeFloat] = None etag: str version_id: str = Field(None, alias="version-id") - sequencer: Optional[str] + sequencer: Optional[str] = None class S3EventNotificationEventBridgeBucketModel(BaseModel): @@ -77,7 +77,7 @@ class S3EventNotificationEventBridgeDetailModel(BaseModel): request_id: str = Field(None, alias="request-id") requester: str source_ip_address: str = Field(None, alias="source-ip-address") - reason: Optional[str] + reason: Optional[str] = None deletion_type: Optional[str] = Field(None, alias="deletion-type") restore_expiry_time: Optional[str] = Field(None, alias="restore-expiry-time") source_storage_class: Optional[str] = Field(None, alias="source-storage-class") @@ -99,9 +99,9 @@ class S3RecordModel(BaseModel): requestParameters: S3RequestParameters responseElements: S3ResponseElements s3: S3Message - glacierEventData: Optional[S3EventRecordGlacierEventData] + glacierEventData: Optional[S3EventRecordGlacierEventData] = None - @root_validator + @root_validator(allow_reuse=True, skip_on_failure=True) def validate_s3_object(cls, values): event_name = values.get("eventName") s3_object = values.get("s3").object diff --git a/aws_lambda_powertools/utilities/parser/models/s3_object_event.py b/aws_lambda_powertools/utilities/parser/models/s3_object_event.py index ef59e9c2f98..7ef98fe4bb2 100644 --- a/aws_lambda_powertools/utilities/parser/models/s3_object_event.py +++ b/aws_lambda_powertools/utilities/parser/models/s3_object_event.py @@ -22,7 +22,7 @@ class S3ObjectUserRequest(BaseModel): class S3ObjectSessionIssuer(BaseModel): type: str # noqa: A003, VNE003 - userName: Optional[str] + userName: Optional[str] = None principalId: str arn: str accountId: str @@ -42,10 +42,10 @@ class S3ObjectUserIdentity(BaseModel): type: str # noqa003 accountId: str accessKeyId: str - userName: Optional[str] + userName: Optional[str] = None principalId: str arn: str - sessionContext: Optional[S3ObjectSessionContext] + sessionContext: Optional[S3ObjectSessionContext] = None class S3ObjectLambdaEvent(BaseModel): diff --git a/aws_lambda_powertools/utilities/parser/models/ses.py b/aws_lambda_powertools/utilities/parser/models/ses.py index 77b23431099..2e9e93f368e 100644 --- a/aws_lambda_powertools/utilities/parser/models/ses.py +++ b/aws_lambda_powertools/utilities/parser/models/ses.py @@ -36,9 +36,9 @@ class SesMailHeaders(BaseModel): class SesMailCommonHeaders(BaseModel): header_from: List[str] = Field(None, alias="from") to: List[str] - cc: Optional[List[str]] - bcc: Optional[List[str]] - sender: Optional[List[str]] + cc: Optional[List[str]] = None + bcc: Optional[List[str]] = None + sender: Optional[List[str]] = None reply_to: Optional[List[str]] = Field(None, alias="reply-to") returnPath: str messageId: str diff --git a/aws_lambda_powertools/utilities/parser/models/sns.py b/aws_lambda_powertools/utilities/parser/models/sns.py index 6cd2fcec006..8f388f2974c 100644 --- a/aws_lambda_powertools/utilities/parser/models/sns.py +++ b/aws_lambda_powertools/utilities/parser/models/sns.py @@ -14,17 +14,17 @@ class SnsMsgAttributeModel(BaseModel): class SnsNotificationModel(BaseModel): - Subject: Optional[str] + Subject: Optional[str] = None TopicArn: str UnsubscribeUrl: HttpUrl Type: Literal["Notification"] - MessageAttributes: Optional[Dict[str, SnsMsgAttributeModel]] + MessageAttributes: Optional[Dict[str, SnsMsgAttributeModel]] = None Message: Union[str, TypingType[BaseModel]] MessageId: str - SigningCertUrl: Optional[HttpUrl] # NOTE: FIFO opt-in removes attribute - Signature: Optional[str] # NOTE: FIFO opt-in removes attribute + SigningCertUrl: Optional[HttpUrl] = None # NOTE: FIFO opt-in removes attribute + Signature: Optional[str] = None # NOTE: FIFO opt-in removes attribute Timestamp: datetime - SignatureVersion: Optional[str] # NOTE: FIFO opt-in removes attribute + SignatureVersion: Optional[str] = None # NOTE: FIFO opt-in removes attribute @root_validator(pre=True, allow_reuse=True) def check_sqs_protocol(cls, values): diff --git a/aws_lambda_powertools/utilities/parser/models/sqs.py b/aws_lambda_powertools/utilities/parser/models/sqs.py index 168707530f3..63ea4b76e0e 100644 --- a/aws_lambda_powertools/utilities/parser/models/sqs.py +++ b/aws_lambda_powertools/utilities/parser/models/sqs.py @@ -9,17 +9,17 @@ class SqsAttributesModel(BaseModel): ApproximateReceiveCount: str ApproximateFirstReceiveTimestamp: datetime - MessageDeduplicationId: Optional[str] - MessageGroupId: Optional[str] + MessageDeduplicationId: Optional[str] = None + MessageGroupId: Optional[str] = None SenderId: str SentTimestamp: datetime - SequenceNumber: Optional[str] - AWSTraceHeader: Optional[str] + SequenceNumber: Optional[str] = None + AWSTraceHeader: Optional[str] = None class SqsMsgAttributeModel(BaseModel): - stringValue: Optional[str] - binaryValue: Optional[str] + stringValue: Optional[str] = None + binaryValue: Optional[str] = None stringListValues: List[str] = [] binaryListValues: List[str] = [] dataType: str @@ -56,7 +56,7 @@ class SqsRecordModel(BaseModel): attributes: SqsAttributesModel messageAttributes: Dict[str, SqsMsgAttributeModel] md5OfBody: str - md5OfMessageAttributes: Optional[str] + md5OfMessageAttributes: Optional[str] = None eventSource: Literal["aws:sqs"] eventSourceARN: str awsRegion: str diff --git a/aws_lambda_powertools/utilities/parser/parser.py b/aws_lambda_powertools/utilities/parser/parser.py index eeaa5612fff..7e2d69e429c 100644 --- a/aws_lambda_powertools/utilities/parser/parser.py +++ b/aws_lambda_powertools/utilities/parser/parser.py @@ -1,6 +1,7 @@ import logging from typing import Any, Callable, Dict, Optional, Type, overload +from aws_lambda_powertools.utilities.parser.compat import disable_pydantic_v2_warning from aws_lambda_powertools.utilities.parser.types import EventParserReturnType, Model from ...middleware_factory import lambda_handler_decorator @@ -156,6 +157,7 @@ def handler(event: Order, context: LambdaContext): raise InvalidEnvelopeError(f"Envelope must implement BaseEnvelope, envelope={envelope}") try: + disable_pydantic_v2_warning() logger.debug("Parsing and validating event model; no envelope used") if isinstance(event, str): return model.parse_raw(event) diff --git a/docs/utilities/parser.md b/docs/utilities/parser.md index f482dcb0410..d98835a8381 100644 --- a/docs/utilities/parser.md +++ b/docs/utilities/parser.md @@ -11,14 +11,19 @@ This utility provides data parsing and deep validation using [Pydantic](https:// * Defines data in pure Python classes, then parse, validate and extract only what you want * Built-in envelopes to unwrap, extend, and validate popular event sources payloads * Enforces type hints at runtime with user-friendly errors +* Support for Pydantic v1 and v2 ## Getting started ### Install +Powertools for AWS Lambda (Python) supports Pydantic v1 and v2. Each Pydantic version requires different dependencies before you can use Parser. + +#### Using Pydantic v1 + !!! info "This is not necessary if you're installing Powertools for AWS Lambda (Python) via [Lambda Layer/SAR](../index.md#lambda-layer){target="_blank"}" -Add `aws-lambda-powertools[parser]` as a dependency in your preferred tool: _e.g._, _requirements.txt_, _pyproject.toml_. This will ensure you have the required dependencies before using Parser. +Add `aws-lambda-powertools[parser]` as a dependency in your preferred tool: _e.g._, _requirements.txt_, _pyproject.toml_. ???+ warning This will increase the compressed package size by >10MB due to the Pydantic dependency. @@ -28,6 +33,12 @@ Add `aws-lambda-powertools[parser]` as a dependency in your preferred tool: _e.g Pip example: `SKIP_CYTHON=1 pip install --no-binary pydantic aws-lambda-powertools[parser]` +#### Using Pydantic v2 + +You need to bring Pydantic v2.0.3 or later as an external dependency. Note that [we suppress Pydantic v2 deprecation warnings](https://github.com/aws-powertools/powertools-lambda-python/issues/2672){target="_blank"} to reduce noise and optimize log costs. + +Add `aws-lambda-powertools` and `pydantic>=2.0.3` as a dependency in your preferred tool: _e.g._, _requirements.txt_, _pyproject.toml_. + ### Defining models You can define models to parse incoming events by inheriting from `BaseModel`. @@ -45,7 +56,7 @@ class Order(BaseModel): id: int description: str items: List[OrderItem] # nesting models are supported - optional_field: Optional[str] # this field may or may not be available when parsing + optional_field: Optional[str] = None # this field may or may not be available when parsing ``` These are simply Python classes that inherit from BaseModel. **Parser** enforces type hints declared in your model at runtime. @@ -79,7 +90,7 @@ class Order(BaseModel): id: int description: str items: List[OrderItem] # nesting models are supported - optional_field: Optional[str] # this field may or may not be available when parsing + optional_field: Optional[str] = None # this field may or may not be available when parsing @event_parser(model=Order) @@ -124,7 +135,7 @@ class Order(BaseModel): id: int description: str items: List[OrderItem] # nesting models are supported - optional_field: Optional[str] # this field may or may not be available when parsing + optional_field: Optional[str] = None # this field may or may not be available when parsing payload = { diff --git a/pyproject.toml b/pyproject.toml index 0356ff6167d..b6b89d3bae1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -161,6 +161,15 @@ markers = [ "perf: marks perf tests to be deselected (deselect with '-m \"not perf\"')", ] +# MAINTENANCE: Remove these lines when drop support to Pydantic v1 +filterwarnings=[ + "ignore:.*The `parse_obj` method is deprecated*:DeprecationWarning", + "ignore:.*The `parse_raw` method is deprecated*:DeprecationWarning", + "ignore:.*load_str_bytes is deprecated*:DeprecationWarning", + "ignore:.*The `dict` method is deprecated; use `model_dump` instead*:DeprecationWarning", + "ignore:.*Pydantic V1 style `@validator` validators are deprecated*:DeprecationWarning" +] + [build-system] requires = ["poetry-core>=1.3.2"] build-backend = "poetry.core.masonry.api" diff --git a/ruff.toml b/ruff.toml index f3a50abc720..424040ede1f 100644 --- a/ruff.toml +++ b/ruff.toml @@ -68,3 +68,4 @@ split-on-trailing-comma = true "tests/e2e/utils/data_builder/__init__.py" = ["F401"] "tests/e2e/utils/data_fetcher/__init__.py" = ["F401"] "aws_lambda_powertools/utilities/data_classes/s3_event.py" = ["A003"] +"aws_lambda_powertools/utilities/parser/models/__init__.py" = ["E402"] diff --git a/tests/functional/batch/sample_models.py b/tests/functional/batch/sample_models.py index 556ff0ebf8a..72029e154d5 100644 --- a/tests/functional/batch/sample_models.py +++ b/tests/functional/batch/sample_models.py @@ -35,12 +35,15 @@ class OrderDynamoDB(BaseModel): # so Pydantic can auto-initialize nested Order model @validator("Message", pre=True) def transform_message_to_dict(cls, value: Dict[Literal["S"], str]): - return json.loads(value["S"]) + try: + return json.loads(value["S"]) + except TypeError: + raise ValueError class OrderDynamoDBChangeRecord(DynamoDBStreamChangedRecordModel): - NewImage: Optional[OrderDynamoDB] - OldImage: Optional[OrderDynamoDB] + NewImage: Optional[OrderDynamoDB] = None + OldImage: Optional[OrderDynamoDB] = None class OrderDynamoDBRecord(DynamoDBStreamRecordModel): diff --git a/tests/functional/parser/conftest.py b/tests/functional/parser/conftest.py index 34199a322b2..41347bc5fa9 100644 --- a/tests/functional/parser/conftest.py +++ b/tests/functional/parser/conftest.py @@ -6,6 +6,15 @@ from aws_lambda_powertools.utilities.parser import BaseEnvelope +@pytest.fixture +def pydanticv2_only(): + from pydantic import __version__ + + version = __version__.split(".") + if version[0] != "2": + pytest.skip("pydanticv2 test only") + + @pytest.fixture def dummy_event(): return {"payload": {"message": "hello world"}} diff --git a/tests/functional/parser/test_parser.py b/tests/functional/parser/test_parser.py index c439134071c..1f948655917 100644 --- a/tests/functional/parser/test_parser.py +++ b/tests/functional/parser/test_parser.py @@ -1,6 +1,7 @@ import json from typing import Dict, Union +import pydantic import pytest from aws_lambda_powertools.utilities.parser import ( @@ -53,6 +54,27 @@ def handle_no_envelope(event: Dict, _: LambdaContext): handle_no_envelope(dummy_event["payload"], LambdaContext()) +@pytest.mark.usefixtures("pydanticv2_only") +def test_pydanticv2_validation(): + class FakeModel(pydantic.BaseModel): + region: str + event_name: str + version: int + + # WHEN using the validator for v2 + @pydantic.field_validator("version", mode="before") + def validate_field(cls, value): + return int(value) + + event_raw = {"region": "us-east-1", "event_name": "aws-powertools", "version": "10"} + event_parsed = FakeModel(**event_raw) + + # THEN parse the event as expected + assert event_parsed.region == event_raw["region"] + assert event_parsed.event_name == event_raw["event_name"] + assert event_parsed.version == int(event_raw["version"]) + + @pytest.mark.parametrize("invalid_schema", [None, str, bool(), [], (), object]) def test_parser_with_invalid_schema_type(dummy_event, invalid_schema): @event_parser(model=invalid_schema) diff --git a/tests/functional/test_utilities_batch.py b/tests/functional/test_utilities_batch.py index 1831ef973d9..e146d65744f 100644 --- a/tests/functional/test_utilities_batch.py +++ b/tests/functional/test_utilities_batch.py @@ -501,8 +501,8 @@ def transform_message_to_dict(cls, value: Dict[Literal["S"], str]): return json.loads(value["S"]) class OrderDynamoDBChangeRecord(DynamoDBStreamChangedRecordModel): - NewImage: Optional[OrderDynamoDB] - OldImage: Optional[OrderDynamoDB] + NewImage: Optional[OrderDynamoDB] = None + OldImage: Optional[OrderDynamoDB] = None class OrderDynamoDBRecord(DynamoDBStreamRecordModel): dynamodb: OrderDynamoDBChangeRecord @@ -545,8 +545,8 @@ def transform_message_to_dict(cls, value: Dict[Literal["S"], str]): return json.loads(value["S"]) class OrderDynamoDBChangeRecord(DynamoDBStreamChangedRecordModel): - NewImage: Optional[OrderDynamoDB] - OldImage: Optional[OrderDynamoDB] + NewImage: Optional[OrderDynamoDB] = None + OldImage: Optional[OrderDynamoDB] = None class OrderDynamoDBRecord(DynamoDBStreamRecordModel): dynamodb: OrderDynamoDBChangeRecord diff --git a/tests/unit/parser/schemas.py b/tests/unit/parser/schemas.py index 1da0213ff45..fd2f29697dc 100644 --- a/tests/unit/parser/schemas.py +++ b/tests/unit/parser/schemas.py @@ -22,8 +22,8 @@ class MyDynamoBusiness(BaseModel): class MyDynamoScheme(DynamoDBStreamChangedRecordModel): - NewImage: Optional[MyDynamoBusiness] - OldImage: Optional[MyDynamoBusiness] + NewImage: Optional[MyDynamoBusiness] = None + OldImage: Optional[MyDynamoBusiness] = None class MyDynamoDBStreamRecordModel(DynamoDBStreamRecordModel): diff --git a/tests/unit/parser/test_apigw.py b/tests/unit/parser/test_apigw.py index a65d181cc54..b2ed294ff7a 100644 --- a/tests/unit/parser/test_apigw.py +++ b/tests/unit/parser/test_apigw.py @@ -138,7 +138,9 @@ def test_apigw_event_with_invalid_websocket_request(): errors = err.value.errors() assert len(errors) == 1 expected_msg = "messageId is available only when the `eventType` is `MESSAGE`" - assert errors[0]["msg"] == expected_msg + # Pydantic v2 adds "Value error," to the error string. + # So to maintain compatibility with v1 and v2, we've changed the way we test this. + assert expected_msg in errors[0]["msg"] assert expected_msg in str(err.value) diff --git a/tests/unit/parser/test_cloudwatch.py b/tests/unit/parser/test_cloudwatch.py index bc8bf0776f9..48d296c40ef 100644 --- a/tests/unit/parser/test_cloudwatch.py +++ b/tests/unit/parser/test_cloudwatch.py @@ -86,7 +86,9 @@ def test_handle_invalid_cloudwatch_trigger_event_no_envelope(): with pytest.raises(ValidationError) as context: CloudWatchLogsModel(**raw_event) - assert context.value.errors()[0]["msg"] == "unable to decompress data" + # Pydantic v2 adds "Value error," to the error string. + # So to maintain compatibility with v1 and v2, we've changed the way we test this. + assert "unable to decompress data" in context.value.errors()[0]["msg"] def test_handle_invalid_event_with_envelope():