Skip to content

Commit 0b29cef

Browse files
feat(parser): Adds DDB deserialization to DynamoDBStreamChangedRecordModel (aws-powertools#4401)
* adds DDB deserialiser to model * minor refactor to move the deserializer to a shared place * fix docstring * add tests for deserializer * fix tests to match implementation * fix functional tests for batch --------- Co-authored-by: Leandro Damascena <lcdama@amazon.pt>
1 parent be673f6 commit 0b29cef

File tree

8 files changed

+171
-112
lines changed

8 files changed

+171
-112
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
from decimal import Clamped, Context, Decimal, Inexact, Overflow, Rounded, Underflow
2+
from typing import Any, Callable, Dict, Optional, Sequence, Set
3+
4+
# NOTE: DynamoDB supports up to 38 digits precision
5+
# Therefore, this ensures our Decimal follows what's stored in the table
6+
DYNAMODB_CONTEXT = Context(
7+
Emin=-128,
8+
Emax=126,
9+
prec=38,
10+
traps=[Clamped, Overflow, Inexact, Rounded, Underflow],
11+
)
12+
13+
14+
class TypeDeserializer:
15+
"""
16+
Deserializes DynamoDB types to Python types.
17+
18+
It's based on boto3's [DynamoDB TypeDeserializer](https://boto3.amazonaws.com/v1/documentation/api/latest/_modules/boto3/dynamodb/types.html).
19+
20+
The only notable difference is that for Binary (`B`, `BS`) values we return Python Bytes directly,
21+
since we don't support Python 2.
22+
"""
23+
24+
def deserialize(self, value: Dict) -> Any:
25+
"""Deserialize DynamoDB data types into Python types.
26+
27+
Parameters
28+
----------
29+
value: Any
30+
DynamoDB value to be deserialized to a python type
31+
32+
33+
Here are the various conversions:
34+
35+
DynamoDB Python
36+
-------- ------
37+
{'NULL': True} None
38+
{'BOOL': True/False} True/False
39+
{'N': Decimal(value)} Decimal(value)
40+
{'S': string} string
41+
{'B': bytes} bytes
42+
{'NS': [str(value)]} set([str(value)])
43+
{'SS': [string]} set([string])
44+
{'BS': [bytes]} set([bytes])
45+
{'L': list} list
46+
{'M': dict} dict
47+
48+
Parameters
49+
----------
50+
value: Any
51+
DynamoDB value to be deserialized to a python type
52+
53+
Returns
54+
--------
55+
any
56+
Python native type converted from DynamoDB type
57+
"""
58+
59+
dynamodb_type = list(value.keys())[0]
60+
deserializer: Optional[Callable] = getattr(self, f"_deserialize_{dynamodb_type}".lower(), None)
61+
if deserializer is None:
62+
raise TypeError(f"Dynamodb type {dynamodb_type} is not supported")
63+
64+
return deserializer(value[dynamodb_type])
65+
66+
def _deserialize_null(self, value: bool) -> None:
67+
return None
68+
69+
def _deserialize_bool(self, value: bool) -> bool:
70+
return value
71+
72+
def _deserialize_n(self, value: str) -> Decimal:
73+
return DYNAMODB_CONTEXT.create_decimal(value)
74+
75+
def _deserialize_s(self, value: str) -> str:
76+
return value
77+
78+
def _deserialize_b(self, value: bytes) -> bytes:
79+
return value
80+
81+
def _deserialize_ns(self, value: Sequence[str]) -> Set[Decimal]:
82+
return set(map(self._deserialize_n, value))
83+
84+
def _deserialize_ss(self, value: Sequence[str]) -> Set[str]:
85+
return set(map(self._deserialize_s, value))
86+
87+
def _deserialize_bs(self, value: Sequence[bytes]) -> Set[bytes]:
88+
return set(map(self._deserialize_b, value))
89+
90+
def _deserialize_l(self, value: Sequence[Dict]) -> Sequence[Any]:
91+
return [self.deserialize(v) for v in value]
92+
93+
def _deserialize_m(self, value: Dict) -> Dict:
94+
return {k: self.deserialize(v) for k, v in value.items()}

aws_lambda_powertools/utilities/data_classes/dynamo_db_stream_event.py

+2-94
Original file line numberDiff line numberDiff line change
@@ -1,101 +1,9 @@
1-
from decimal import Clamped, Context, Decimal, Inexact, Overflow, Rounded, Underflow
21
from enum import Enum
3-
from typing import Any, Callable, Dict, Iterator, Optional, Sequence, Set
2+
from typing import Any, Dict, Iterator, Optional
43

4+
from aws_lambda_powertools.shared.dynamodb_deserializer import TypeDeserializer
55
from aws_lambda_powertools.utilities.data_classes.common import DictWrapper
66

7-
# NOTE: DynamoDB supports up to 38 digits precision
8-
# Therefore, this ensures our Decimal follows what's stored in the table
9-
DYNAMODB_CONTEXT = Context(
10-
Emin=-128,
11-
Emax=126,
12-
prec=38,
13-
traps=[Clamped, Overflow, Inexact, Rounded, Underflow],
14-
)
15-
16-
17-
class TypeDeserializer:
18-
"""
19-
Deserializes DynamoDB types to Python types.
20-
21-
It's based on boto3's [DynamoDB TypeDeserializer](https://boto3.amazonaws.com/v1/documentation/api/latest/_modules/boto3/dynamodb/types.html).
22-
23-
The only notable difference is that for Binary (`B`, `BS`) values we return Python Bytes directly,
24-
since we don't support Python 2.
25-
"""
26-
27-
def deserialize(self, value: Dict) -> Any:
28-
"""Deserialize DynamoDB data types into Python types.
29-
30-
Parameters
31-
----------
32-
value: Any
33-
DynamoDB value to be deserialized to a python type
34-
35-
36-
Here are the various conversions:
37-
38-
DynamoDB Python
39-
-------- ------
40-
{'NULL': True} None
41-
{'BOOL': True/False} True/False
42-
{'N': Decimal(value)} Decimal(value)
43-
{'S': string} string
44-
{'B': bytes} bytes
45-
{'NS': [str(value)]} set([str(value)])
46-
{'SS': [string]} set([string])
47-
{'BS': [bytes]} set([bytes])
48-
{'L': list} list
49-
{'M': dict} dict
50-
51-
Parameters
52-
----------
53-
value: Any
54-
DynamoDB value to be deserialized to a python type
55-
56-
Returns
57-
--------
58-
any
59-
Python native type converted from DynamoDB type
60-
"""
61-
62-
dynamodb_type = list(value.keys())[0]
63-
deserializer: Optional[Callable] = getattr(self, f"_deserialize_{dynamodb_type}".lower(), None)
64-
if deserializer is None:
65-
raise TypeError(f"Dynamodb type {dynamodb_type} is not supported")
66-
67-
return deserializer(value[dynamodb_type])
68-
69-
def _deserialize_null(self, value: bool) -> None:
70-
return None
71-
72-
def _deserialize_bool(self, value: bool) -> bool:
73-
return value
74-
75-
def _deserialize_n(self, value: str) -> Decimal:
76-
return DYNAMODB_CONTEXT.create_decimal(value)
77-
78-
def _deserialize_s(self, value: str) -> str:
79-
return value
80-
81-
def _deserialize_b(self, value: bytes) -> bytes:
82-
return value
83-
84-
def _deserialize_ns(self, value: Sequence[str]) -> Set[Decimal]:
85-
return set(map(self._deserialize_n, value))
86-
87-
def _deserialize_ss(self, value: Sequence[str]) -> Set[str]:
88-
return set(map(self._deserialize_s, value))
89-
90-
def _deserialize_bs(self, value: Sequence[bytes]) -> Set[bytes]:
91-
return set(map(self._deserialize_b, value))
92-
93-
def _deserialize_l(self, value: Sequence[Dict]) -> Sequence[Any]:
94-
return [self.deserialize(v) for v in value]
95-
96-
def _deserialize_m(self, value: Dict) -> Dict:
97-
return {k: self.deserialize(v) for k, v in value.items()}
98-
997

1008
class StreamViewType(Enum):
1019
"""The type of data from the modified DynamoDB item that was captured in this stream record"""

aws_lambda_powertools/utilities/parser/models/dynamodb.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,17 @@
11
from datetime import datetime
22
from typing import Any, Dict, List, Optional, Type, Union
33

4-
from pydantic import BaseModel
4+
from pydantic import BaseModel, field_validator
55

6+
from aws_lambda_powertools.shared.dynamodb_deserializer import TypeDeserializer
67
from aws_lambda_powertools.utilities.parser.types import Literal
78

9+
_DESERIALIZER = TypeDeserializer()
10+
811

912
class DynamoDBStreamChangedRecordModel(BaseModel):
1013
ApproximateCreationDateTime: Optional[datetime] = None
11-
Keys: Dict[str, Dict[str, Any]]
14+
Keys: Dict[str, Any]
1215
NewImage: Optional[Union[Dict[str, Any], Type[BaseModel], BaseModel]] = None
1316
OldImage: Optional[Union[Dict[str, Any], Type[BaseModel], BaseModel]] = None
1417
SequenceNumber: str
@@ -26,6 +29,10 @@ class DynamoDBStreamChangedRecordModel(BaseModel):
2629
# raise TypeError("DynamoDB streams model failed validation, missing both new & old stream images") # noqa: ERA001,E501
2730
# return values # noqa: ERA001
2831

32+
@field_validator("Keys", "NewImage", "OldImage", mode="before")
33+
def deserialize_field(cls, value):
34+
return {k: _DESERIALIZER.deserialize(v) for k, v in value.items()}
35+
2936

3037
class UserIdentity(BaseModel):
3138
type: Literal["Service"] # noqa: VNE003, A003

tests/functional/batch/sample_models.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ class OrderDynamoDB(BaseModel):
3838
@field_validator("Message", mode="before")
3939
def transform_message_to_dict(cls, value: Dict[Literal["S"], str]):
4040
try:
41-
return json.loads(value["S"])
41+
return json.loads(value)
4242
except TypeError:
4343
raise ValueError
4444

tests/functional/test_utilities_batch.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -526,7 +526,7 @@ class OrderDynamoDB(BaseModel):
526526
# so Pydantic can auto-initialize nested Order model
527527
@field_validator("Message", mode="before")
528528
def transform_message_to_dict(cls, value: Dict[Literal["S"], str]):
529-
return json.loads(value["S"])
529+
return json.loads(value)
530530

531531
class OrderDynamoDBChangeRecord(DynamoDBStreamChangedRecordModel):
532532
NewImage: Optional[OrderDynamoDB] = None
@@ -570,7 +570,7 @@ class OrderDynamoDB(BaseModel):
570570
# so Pydantic can auto-initialize nested Order model
571571
@field_validator("Message", mode="before")
572572
def transform_message_to_dict(cls, value: Dict[Literal["S"], str]):
573-
return json.loads(value["S"])
573+
return json.loads(value)
574574

575575
class OrderDynamoDBChangeRecord(DynamoDBStreamChangedRecordModel):
576576
NewImage: Optional[OrderDynamoDB] = None

tests/unit/parser/schemas.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import Dict, List, Optional
1+
from typing import List, Optional
22

33
from pydantic import BaseModel
44

@@ -13,12 +13,11 @@
1313
SqsModel,
1414
SqsRecordModel,
1515
)
16-
from aws_lambda_powertools.utilities.parser.types import Literal
1716

1817

1918
class MyDynamoBusiness(BaseModel):
20-
Message: Dict[Literal["S"], str]
21-
Id: Dict[Literal["N"], int]
19+
Message: str
20+
Id: int
2221

2322

2423
class MyDynamoScheme(DynamoDBStreamChangedRecordModel):

tests/unit/parser/test_dynamodb.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -21,19 +21,19 @@ def test_dynamo_db_stream_trigger_event():
2121

2222
new_image = parserd_event[0]["NewImage"]
2323
new_image_raw = raw_event["Records"][0]["dynamodb"]["NewImage"]
24-
assert new_image.Message["S"] == new_image_raw["Message"]["S"]
25-
assert new_image.Id["N"] == float(new_image_raw["Id"]["N"])
24+
assert new_image.Message == new_image_raw["Message"]["S"]
25+
assert new_image.Id == float(new_image_raw["Id"]["N"])
2626

2727
# record index 1
2828
old_image = parserd_event[1]["OldImage"]
2929
old_image_raw = raw_event["Records"][1]["dynamodb"]["OldImage"]
30-
assert old_image.Message["S"] == old_image_raw["Message"]["S"]
31-
assert old_image.Id["N"] == float(old_image_raw["Id"]["N"])
30+
assert old_image.Message == old_image_raw["Message"]["S"]
31+
assert old_image.Id == float(old_image_raw["Id"]["N"])
3232

3333
new_image = parserd_event[1]["NewImage"]
3434
new_image_raw = raw_event["Records"][1]["dynamodb"]["NewImage"]
35-
assert new_image.Message["S"] == new_image_raw["Message"]["S"]
36-
assert new_image.Id["N"] == float(new_image_raw["Id"]["N"])
35+
assert new_image.Message == new_image_raw["Message"]["S"]
36+
assert new_image.Id == float(new_image_raw["Id"]["N"])
3737

3838

3939
def test_dynamo_db_stream_trigger_event_no_envelope():
@@ -65,12 +65,12 @@ def test_dynamo_db_stream_trigger_event_no_envelope():
6565
keys = dynamodb.Keys
6666
raw_keys = raw_dynamodb["Keys"]
6767
assert keys is not None
68-
id_key = keys["Id"]
69-
assert id_key["N"] == raw_keys["Id"]["N"]
68+
id_key = keys.get("Id")
69+
assert id_key == int(raw_keys["Id"]["N"])
7070

7171
message_key = dynamodb.NewImage.Message
7272
assert message_key is not None
73-
assert message_key["S"] == "New item!"
73+
assert message_key == "New item!"
7474

7575

7676
def test_validate_event_does_not_conform_with_model_no_envelope():
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
from typing import Any, Dict, Optional
2+
3+
import pytest
4+
5+
from aws_lambda_powertools.shared.dynamodb_deserializer import TypeDeserializer
6+
7+
8+
class DeserialiserModel:
9+
def __init__(self, data: dict):
10+
self._data = data
11+
self._deserializer = TypeDeserializer()
12+
13+
def _deserialize_dynamodb_dict(self) -> Optional[Dict[str, Any]]:
14+
if self._data is None:
15+
return None
16+
17+
return {k: self._deserializer.deserialize(v) for k, v in self._data.items()}
18+
19+
@property
20+
def data(self) -> Optional[Dict[str, Any]]:
21+
"""The primary key attribute(s) for the DynamoDB item that was modified."""
22+
return self._deserialize_dynamodb_dict()
23+
24+
25+
def test_deserializer():
26+
model = DeserialiserModel(
27+
{
28+
"Id": {"S": "Id-123"},
29+
"Name": {"S": "John Doe"},
30+
"ZipCode": {"N": 12345},
31+
"Things": {"L": [{"N": 0}, {"N": 1}, {"N": 2}, {"N": 3}]},
32+
"MoreThings": {"M": {"a": {"S": "foo"}, "b": {"S": "bar"}}},
33+
},
34+
)
35+
36+
assert model.data.get("Id") == "Id-123"
37+
assert model.data.get("Name") == "John Doe"
38+
assert model.data.get("ZipCode") == 12345
39+
assert model.data.get("Things") == [0, 1, 2, 3]
40+
assert model.data.get("MoreThings") == {"a": "foo", "b": "bar"}
41+
42+
43+
def test_deserializer_error():
44+
model = DeserialiserModel(
45+
{
46+
"Id": {"X": None},
47+
},
48+
)
49+
50+
with pytest.raises(TypeError):
51+
model.data.get("Id")

0 commit comments

Comments
 (0)