diff --git a/aws_lambda_powertools/utilities/validation/base.py b/aws_lambda_powertools/utilities/validation/base.py index eab7f89064d..bacd25a4efa 100644 --- a/aws_lambda_powertools/utilities/validation/base.py +++ b/aws_lambda_powertools/utilities/validation/base.py @@ -1,5 +1,5 @@ import logging -from typing import Any, Dict +from typing import Any, Dict, Optional import fastjsonschema import jmespath @@ -11,7 +11,7 @@ logger = logging.getLogger(__name__) -def validate_data_against_schema(data: Dict, schema: Dict): +def validate_data_against_schema(data: Dict, schema: Dict, formats: Optional[Dict] = None): """Validate dict data against given JSON Schema Parameters @@ -20,6 +20,8 @@ def validate_data_against_schema(data: Dict, schema: Dict): Data set to be validated schema : Dict JSON Schema to validate against + formats: Dict + Custom formats containing a key (e.g. int64) and a value expressed as regex or callback returning bool Raises ------ @@ -29,12 +31,12 @@ def validate_data_against_schema(data: Dict, schema: Dict): When JSON schema provided is invalid """ try: - fastjsonschema.validate(definition=schema, data=data) + fastjsonschema.validate(definition=schema, data=data, formats=formats) + except (TypeError, AttributeError, fastjsonschema.JsonSchemaDefinitionException) as e: + raise InvalidSchemaFormatError(f"Schema received: {schema}, Formats: {formats}. Error: {e}") except fastjsonschema.JsonSchemaException as e: message = f"Failed schema validation. Error: {e.message}, Path: {e.path}, Data: {e.value}" # noqa: B306, E501 raise SchemaValidationError(message) - except (TypeError, AttributeError) as e: - raise InvalidSchemaFormatError(f"Schema received: {schema}. Error: {e}") def unwrap_event_from_envelope(data: Dict, envelope: str, jmespath_options: Dict) -> Any: diff --git a/aws_lambda_powertools/utilities/validation/validator.py b/aws_lambda_powertools/utilities/validation/validator.py index c404e90f55a..23a7241fd32 100644 --- a/aws_lambda_powertools/utilities/validation/validator.py +++ b/aws_lambda_powertools/utilities/validation/validator.py @@ -1,5 +1,5 @@ import logging -from typing import Any, Callable, Dict, Union +from typing import Any, Callable, Dict, Optional, Union from ...middleware_factory import lambda_handler_decorator from .base import unwrap_event_from_envelope, validate_data_against_schema @@ -13,12 +13,35 @@ def validator( event: Union[Dict, str], context: Any, inbound_schema: Dict = None, + inbound_formats: Optional[Dict] = None, outbound_schema: Dict = None, + outbound_formats: Optional[Dict] = None, envelope: str = None, jmespath_options: Dict = None, ) -> Any: """Lambda handler decorator to validate incoming/outbound data using a JSON Schema + Parameters + ---------- + handler : Callable + Method to annotate on + event : Dict + Lambda event to be validated + context : Any + Lambda context object + inbound_schema : Dict + JSON Schema to validate incoming event + outbound_schema : Dict + JSON Schema to validate outbound event + envelope : Dict + JMESPath expression to filter data against + jmespath_options : Dict + Alternative JMESPath options to be included when filtering expr + inbound_formats: Dict + Custom formats containing a key (e.g. int64) and a value expressed as regex or callback returning bool + outbound_formats: Dict + Custom formats containing a key (e.g. int64) and a value expressed as regex or callback returning bool + Example ------- @@ -78,23 +101,6 @@ def handler(event, context): def handler(event, context): return event - Parameters - ---------- - handler : Callable - Method to annotate on - event : Dict - Lambda event to be validated - context : Any - Lambda context object - inbound_schema : Dict - JSON Schema to validate incoming event - outbound_schema : Dict - JSON Schema to validate outbound event - envelope : Dict - JMESPath expression to filter data against - jmespath_options : Dict - Alternative JMESPath options to be included when filtering expr - Returns ------- Any @@ -114,22 +120,44 @@ def handler(event, context): if inbound_schema: logger.debug("Validating inbound event") - validate_data_against_schema(data=event, schema=inbound_schema) + validate_data_against_schema(data=event, schema=inbound_schema, formats=inbound_formats) response = handler(event, context) if outbound_schema: logger.debug("Validating outbound event") - validate_data_against_schema(data=response, schema=outbound_schema) + validate_data_against_schema(data=response, schema=outbound_schema, formats=outbound_formats) return response -def validate(event: Dict, schema: Dict = None, envelope: str = None, jmespath_options: Dict = None): +def validate( + event: Dict, + schema: Dict = None, + formats: Optional[Dict] = None, + envelope: str = None, + jmespath_options: Dict = None, +): """Standalone function to validate event data using a JSON Schema Typically used when you need more control over the validation process. + Parameters + ---------- + event : Dict + Lambda event to be validated + schema : Dict + JSON Schema to validate incoming event + envelope : Dict + JMESPath expression to filter data against + jmespath_options : Dict + Alternative JMESPath options to be included when filtering expr + formats: Dict + Custom formats containing a key (e.g. int64) and a value expressed as regex or callback returning bool + + Example + ------- + **Validate event** from aws_lambda_powertools.utilities.validation import validate @@ -178,17 +206,6 @@ def handler(event, context): validate(event=event, schema=json_schema_dict, envelope="awslogs.powertools_base64_gzip(data) | powertools_json(@).logEvents[*]") return event - Parameters - ---------- - event : Dict - Lambda event to be validated - schema : Dict - JSON Schema to validate incoming event - envelope : Dict - JMESPath expression to filter data against - jmespath_options : Dict - Alternative JMESPath options to be included when filtering expr - Raises ------ SchemaValidationError @@ -201,4 +218,4 @@ def handler(event, context): if envelope: event = unwrap_event_from_envelope(data=event, envelope=envelope, jmespath_options=jmespath_options) - validate_data_against_schema(data=event, schema=schema) + validate_data_against_schema(data=event, schema=schema, formats=formats) diff --git a/docs/content/utilities/validation.mdx b/docs/content/utilities/validation.mdx index 74b762a096e..2a30dd74998 100644 --- a/docs/content/utilities/validation.mdx +++ b/docs/content/utilities/validation.mdx @@ -69,6 +69,40 @@ def handler(event, context): return event ``` +### Validating custom formats + +> New in 1.10.0 +> **NOTE**: JSON Schema DRAFT 7 [has many new built-in formats](https://json-schema.org/understanding-json-schema/reference/string.html#format) such as date, time, and specifically a regex format which might be a better replacement for a custom format, if you do have control over the schema. + +If you have JSON Schemas with custom formats, for example having a `int64` for high precision integers, you can pass an optional validation to handle each type using `formats` parameter - Otherwise it'll fail validation: + +**Example of custom integer format** + +```json +{ + "lastModifiedTime": { + "format": "int64", + "type": "integer" + } +} +``` + +For each format defined in a dictionary key, you must use a regex, or a function that returns a boolean to instruct the validator on how to proceed when encountering that type. + +```python +from aws_lambda_powertools.utilities.validation import validate + +event = {} # some event +schema_with_custom_format = {} # some JSON schema that defines a custom format + +custom_format = { + "int64": True, # simply ignore it, + "positive": lambda x: False if x < 0 else True +} + +validate(event=event, schema=schema_with_custom_format, formats=custom_format) +``` + ## Unwrapping events prior to validation You might want to validate only a portion of your event - This is where the `envelope` parameter is for. diff --git a/tests/functional/validator/conftest.py b/tests/functional/validator/conftest.py index 740355db70b..ab7a26012ba 100644 --- a/tests/functional/validator/conftest.py +++ b/tests/functional/validator/conftest.py @@ -245,6 +245,80 @@ def eventbridge_event(): } +@pytest.fixture +def eventbridge_cloudtrail_s3_head_object_event(): + return { + "account": "123456789012", + "detail": { + "additionalEventData": { + "AuthenticationMethod": "AuthHeader", + "CipherSuite": "ECDHE-RSA-AES128-GCM-SHA256", + "SignatureVersion": "SigV4", + "bytesTransferredIn": 0, + "bytesTransferredOut": 0, + "x-amz-id-2": "ejUr9Nd/4IO1juF/a6GOcu+PKrVX6dOH6jDjQOeCJvtARUqzxrhHGrhEt04cqYtAZVqcSEXYqo0=", + }, + "awsRegion": "us-west-1", + "eventCategory": "Data", + "eventID": "be4fdb30-9508-4984-b071-7692221899ae", + "eventName": "HeadObject", + "eventSource": "s3.amazonaws.com", + "eventTime": "2020-12-22T10:05:29Z", + "eventType": "AwsApiCall", + "eventVersion": "1.07", + "managementEvent": False, + "readOnly": True, + "recipientAccountId": "123456789012", + "requestID": "A123B1C123D1E123", + "requestParameters": { + "Host": "lambda-artifacts-deafc19498e3f2df.s3.us-west-1.amazonaws.com", + "bucketName": "lambda-artifacts-deafc19498e3f2df", + "key": "path1/path2/path3/file.zip", + }, + "resources": [ + { + "ARN": "arn:aws:s3:::lambda-artifacts-deafc19498e3f2df/path1/path2/path3/file.zip", + "type": "AWS::S3::Object", + }, + { + "ARN": "arn:aws:s3:::lambda-artifacts-deafc19498e3f2df", + "accountId": "123456789012", + "type": "AWS::S3::Bucket", + }, + ], + "responseElements": None, + "sourceIPAddress": "AWS Internal", + "userAgent": "AWS Internal", + "userIdentity": { + "accessKeyId": "ABCDEFGHIJKLMNOPQR12", + "accountId": "123456789012", + "arn": "arn:aws:sts::123456789012:assumed-role/role-name1/1234567890123", + "invokedBy": "AWS Internal", + "principalId": "ABCDEFGHIJKLMN1OPQRST:1234567890123", + "sessionContext": { + "attributes": {"creationDate": "2020-12-09T09:58:24Z", "mfaAuthenticated": "false"}, + "sessionIssuer": { + "accountId": "123456789012", + "arn": "arn:aws:iam::123456789012:role/role-name1", + "principalId": "ABCDEFGHIJKLMN1OPQRST", + "type": "Role", + "userName": "role-name1", + }, + }, + "type": "AssumedRole", + }, + "vpcEndpointId": "vpce-a123cdef", + }, + "detail-type": "AWS API Call via CloudTrail", + "id": "e0bad426-0a70-4424-b53a-eb902ebf5786", + "region": "us-west-1", + "resources": [], + "source": "aws.s3", + "time": "2020-12-22T10:05:29Z", + "version": "0", + } + + @pytest.fixture def sqs_event(): return { @@ -356,3 +430,138 @@ def cloudwatch_logs_schema(): ], }, } + + +@pytest.fixture +def eventbridge_schema_registry_cloudtrail_v2_s3(): + return { + "$schema": "http://json-schema.org/draft-04/schema#", + "definitions": { + "AWSAPICallViaCloudTrail": { + "properties": { + "additionalEventData": {"$ref": "#/definitions/AdditionalEventData"}, + "awsRegion": {"type": "string"}, + "errorCode": {"type": "string"}, + "errorMessage": {"type": "string"}, + "eventID": {"type": "string"}, + "eventName": {"type": "string"}, + "eventSource": {"type": "string"}, + "eventTime": {"format": "date-time", "type": "string"}, + "eventType": {"type": "string"}, + "eventVersion": {"type": "string"}, + "recipientAccountId": {"type": "string"}, + "requestID": {"type": "string"}, + "requestParameters": {"$ref": "#/definitions/RequestParameters"}, + "resources": {"items": {"type": "object"}, "type": "array"}, + "responseElements": {"type": ["object", "null"]}, + "sourceIPAddress": {"type": "string"}, + "userAgent": {"type": "string"}, + "userIdentity": {"$ref": "#/definitions/UserIdentity"}, + "vpcEndpointId": {"type": "string"}, + "x-amazon-open-api-schema-readOnly": {"type": "boolean"}, + }, + "required": [ + "eventID", + "awsRegion", + "eventVersion", + "responseElements", + "sourceIPAddress", + "eventSource", + "requestParameters", + "resources", + "userAgent", + "readOnly", + "userIdentity", + "eventType", + "additionalEventData", + "vpcEndpointId", + "requestID", + "eventTime", + "eventName", + "recipientAccountId", + ], + "type": "object", + }, + "AdditionalEventData": { + "properties": { + "objectRetentionInfo": {"$ref": "#/definitions/ObjectRetentionInfo"}, + "x-amz-id-2": {"type": "string"}, + }, + "required": ["x-amz-id-2"], + "type": "object", + }, + "Attributes": { + "properties": { + "creationDate": {"format": "date-time", "type": "string"}, + "mfaAuthenticated": {"type": "string"}, + }, + "required": ["mfaAuthenticated", "creationDate"], + "type": "object", + }, + "LegalHoldInfo": { + "properties": { + "isUnderLegalHold": {"type": "boolean"}, + "lastModifiedTime": {"format": "int64", "type": "integer"}, + }, + "type": "object", + }, + "ObjectRetentionInfo": { + "properties": { + "legalHoldInfo": {"$ref": "#/definitions/LegalHoldInfo"}, + "retentionInfo": {"$ref": "#/definitions/RetentionInfo"}, + }, + "type": "object", + }, + "RequestParameters": { + "properties": { + "bucketName": {"type": "string"}, + "key": {"type": "string"}, + "legal-hold": {"type": "string"}, + "retention": {"type": "string"}, + }, + "required": ["bucketName", "key"], + "type": "object", + }, + "RetentionInfo": { + "properties": { + "lastModifiedTime": {"format": "int64", "type": "integer"}, + "retainUntilMode": {"type": "string"}, + "retainUntilTime": {"format": "int64", "type": "integer"}, + }, + "type": "object", + }, + "SessionContext": { + "properties": {"attributes": {"$ref": "#/definitions/Attributes"}}, + "required": ["attributes"], + "type": "object", + }, + "UserIdentity": { + "properties": { + "accessKeyId": {"type": "string"}, + "accountId": {"type": "string"}, + "arn": {"type": "string"}, + "principalId": {"type": "string"}, + "sessionContext": {"$ref": "#/definitions/SessionContext"}, + "type": {"type": "string"}, + }, + "required": ["accessKeyId", "sessionContext", "accountId", "principalId", "type", "arn"], + "type": "object", + }, + }, + "properties": { + "account": {"type": "string"}, + "detail": {"$ref": "#/definitions/AWSAPICallViaCloudTrail"}, + "detail-type": {"type": "string"}, + "id": {"type": "string"}, + "region": {"type": "string"}, + "resources": {"items": {"type": "string"}, "type": "array"}, + "source": {"type": "string"}, + "time": {"format": "date-time", "type": "string"}, + "version": {"type": "string"}, + }, + "required": ["detail-type", "resources", "id", "source", "time", "detail", "region", "version", "account"], + "title": "AWSAPICallViaCloudTrail", + "type": "object", + "x-amazon-events-detail-type": "AWS API Call via CloudTrail", + "x-amazon-events-source": "aws.s3", + } diff --git a/tests/functional/validator/test_validator.py b/tests/functional/validator/test_validator.py index c0e12792e73..4a773571ddc 100644 --- a/tests/functional/validator/test_validator.py +++ b/tests/functional/validator/test_validator.py @@ -37,6 +37,28 @@ def test_validate_invalid_schema_format(raw_event): validate(event=raw_event, schema="schema.json") +def test_validate_accept_schema_custom_format( + eventbridge_schema_registry_cloudtrail_v2_s3, eventbridge_cloudtrail_s3_head_object_event +): + validate( + event=eventbridge_cloudtrail_s3_head_object_event, + schema=eventbridge_schema_registry_cloudtrail_v2_s3, + formats={"int64": lambda v: True}, + ) + + +@pytest.mark.parametrize("invalid_format", [None, bool(), {}, [], object]) +def test_validate_invalid_custom_format( + eventbridge_schema_registry_cloudtrail_v2_s3, eventbridge_cloudtrail_s3_head_object_event, invalid_format +): + with pytest.raises(exceptions.InvalidSchemaFormatError): + validate( + event=eventbridge_cloudtrail_s3_head_object_event, + schema=eventbridge_schema_registry_cloudtrail_v2_s3, + formats=invalid_format, + ) + + def test_validate_invalid_envelope_expression(schema, wrapped_event): with pytest.raises(exceptions.InvalidEnvelopeExpressionError): validate(event=wrapped_event, schema=schema, envelope=True)