From 2b47b9c1716907d5768324e48a1f02763c1bb62a Mon Sep 17 00:00:00 2001 From: Ivan Mincik Date: Sun, 8 Nov 2020 22:05:09 +0100 Subject: [PATCH] feat(infra): Dataset Space endpoint implementation (#38) * feat(infra): Dataset Space endpoint implementation Implemented features: * POST request - create dataset * PATCH request - update dataset * GET request - retrieve single or multiple datasets * DELETE request - delete dataset --- .github/workflows/ci.yml | 6 + .pylintrc | 3 +- backend/endpoints/__init__.py | 0 backend/endpoints/datasets/__init__.py | 0 backend/endpoints/datasets/bundle.sh | 8 + backend/endpoints/datasets/create.py | 56 ++++ backend/endpoints/datasets/delete.py | 47 +++ backend/endpoints/datasets/entrypoint.py | 55 ++++ backend/endpoints/datasets/get.py | 99 ++++++ backend/endpoints/datasets/list.py | 24 ++ backend/endpoints/datasets/model.py | 80 +++++ backend/endpoints/datasets/requirements.txt | 2 + backend/endpoints/datasets/update.py | 74 +++++ backend/endpoints/utils.py | 15 + backend/tests/__init__.py | 0 backend/tests/conftest.py | 54 ++++ backend/tests/test_datasets_endpoint.py | 331 ++++++++++++++++++++ infra/data_stores/data_lake_stack.py | 42 ++- infra/lambda/dataset-handler/function.py | 13 - infra/requirements.txt | 1 + infra/tests/test_lambda_functions.py | 32 ++ 21 files changed, 921 insertions(+), 21 deletions(-) create mode 100644 backend/endpoints/__init__.py create mode 100644 backend/endpoints/datasets/__init__.py create mode 100644 backend/endpoints/datasets/bundle.sh create mode 100644 backend/endpoints/datasets/create.py create mode 100644 backend/endpoints/datasets/delete.py create mode 100644 backend/endpoints/datasets/entrypoint.py create mode 100644 backend/endpoints/datasets/get.py create mode 100644 backend/endpoints/datasets/list.py create mode 100644 backend/endpoints/datasets/model.py create mode 100644 backend/endpoints/datasets/requirements.txt create mode 100644 backend/endpoints/datasets/update.py create mode 100644 backend/endpoints/utils.py create mode 100644 backend/tests/__init__.py create mode 100644 backend/tests/conftest.py create mode 100644 backend/tests/test_datasets_endpoint.py delete mode 100644 infra/lambda/dataset-handler/function.py create mode 100644 infra/tests/test_lambda_functions.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0fcfa7844..0f09157d4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,6 +24,7 @@ jobs: python -m pip install --upgrade pip pip install -r requirements-dev.txt pip install -r infra/requirements.txt + pip install -r backend/endpoints/datasets/requirements.txt - name: Check last commit message if: github.event_name == 'push' @@ -97,6 +98,7 @@ jobs: python -m pip install --upgrade pip pip install -r requirements-dev.txt pip install -r infra/requirements.txt + pip install -r backend/endpoints/datasets/requirements.txt - name: Use Node.js 12.x for CDK deployment uses: actions/setup-node@v2.1.2 @@ -134,6 +136,10 @@ jobs: run: | pytest infra/tests/ + - name: Run AWS backend tests + run: | + pytest backend/tests/ + - name: Destroy AWS stack used for testing run: | cdk destroy --force geospatial-data-lake diff --git a/.pylintrc b/.pylintrc index f5039e751..b11220aa6 100644 --- a/.pylintrc +++ b/.pylintrc @@ -1,7 +1,8 @@ [MASTER] disable = bad-continuation, - invalid-name + invalid-name, + duplicate-code [FORMAT] max-line-length=100 [MISCELLANEOUS] diff --git a/backend/endpoints/__init__.py b/backend/endpoints/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/endpoints/datasets/__init__.py b/backend/endpoints/datasets/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/endpoints/datasets/bundle.sh b/backend/endpoints/datasets/bundle.sh new file mode 100644 index 000000000..9b2a11188 --- /dev/null +++ b/backend/endpoints/datasets/bundle.sh @@ -0,0 +1,8 @@ +set -o errexit + +pip install --requirement=datasets/requirements.txt --target=/asset-output +mkdir --parents /asset-output/endpoints/datasets +touch /asset-output/endpoints/__init__.py +touch /asset-output/endpoints/datasets/__init__.py +cp --archive --update --verbose datasets/*.py /asset-output/endpoints/datasets/ +cp --archive --update --verbose utils.py /asset-output/endpoints/ diff --git a/backend/endpoints/datasets/create.py b/backend/endpoints/datasets/create.py new file mode 100644 index 000000000..354206def --- /dev/null +++ b/backend/endpoints/datasets/create.py @@ -0,0 +1,56 @@ +"""Create dataset function.""" + +from endpoints.datasets.model import DatasetModel +from endpoints.utils import error_response, success_response +from jsonschema import ValidationError, validate + + +def create_dataset(payload): + """POST: Create Dataset.""" + + BODY_SCHEMA = { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["IMAGE", "RASTER"], + }, + "title": {"type": "string"}, + "owning_group": {"type": "string"}, + }, + "required": ["type", "title", "owning_group"], + } + + # request body validation + req_body = payload["body"] + try: + validate(req_body, BODY_SCHEMA) + except ValidationError as err: + return error_response(400, err.message) + + # check for duplicate type/title + if DatasetModel.datasets_tile_idx.count( + hash_key=f"TYPE#{req_body['type']}", + range_key_condition=(DatasetModel.title == f"{req_body['title']}"), + ): + return error_response( + 409, f"dataset '{req_body['title']}' of type '{req_body['type']}' already exists" + ) + + # create dataset + dataset = DatasetModel( + type=f"TYPE#{req_body['type']}", + title=req_body["title"], + owning_group=req_body["owning_group"], + ) + dataset.save() + dataset.refresh(consistent_read=True) + + # return response + resp_body = {} + resp_body = dict(dataset) + + resp_body["id"] = dataset.dataset_id + resp_body["type"] = dataset.dataset_type + + return success_response(201, resp_body) diff --git a/backend/endpoints/datasets/delete.py b/backend/endpoints/datasets/delete.py new file mode 100644 index 000000000..3e0341ec7 --- /dev/null +++ b/backend/endpoints/datasets/delete.py @@ -0,0 +1,47 @@ +"""Delete dataset function.""" + +from endpoints.datasets.model import DatasetModel +from endpoints.utils import error_response, success_response +from jsonschema import ValidationError, validate +from pynamodb.exceptions import DoesNotExist + + +def delete_dataset(payload): + """DELETE: Delete Dataset.""" + + BODY_SCHEMA = { + "type": "object", + "properties": { + "id": {"type": "string"}, + "type": { + "type": "string", + "enum": ["IMAGE", "RASTER"], + }, + }, + "required": ["id", "type"], + } + + # request body validation + req_body = payload["body"] + try: + validate(req_body, BODY_SCHEMA) + except ValidationError as err: + return error_response(400, err.message) + + # get dataset to delete + try: + dataset = DatasetModel.get( + hash_key=f"DATASET#{req_body['id']}", + range_key=f"TYPE#{req_body['type']}", + consistent_read=True, + ) + except DoesNotExist: + return error_response( + 404, f"dataset '{req_body['id']}' of type '{req_body['type']}' does not exist" + ) + + # delete dataset + dataset.delete() + + resp_body = {} + return success_response(204, resp_body) diff --git a/backend/endpoints/datasets/entrypoint.py b/backend/endpoints/datasets/entrypoint.py new file mode 100644 index 000000000..41b14ac65 --- /dev/null +++ b/backend/endpoints/datasets/entrypoint.py @@ -0,0 +1,55 @@ +""" +Dataset endpoint Lambda function. +""" + +from endpoints.datasets.create import create_dataset +from endpoints.datasets.delete import delete_dataset +from endpoints.datasets.get import get_dataset_filter, get_dataset_single +from endpoints.datasets.list import list_datasets +from endpoints.datasets.update import update_dataset +from endpoints.utils import error_response +from jsonschema import ValidationError, validate + +REQUEST_SCHEMA = { + "type": "object", + "properties": { + "httpMethod": {"type": "string", "enum": ["GET", "POST", "PATCH", "DELETE"]}, + "body": {"type": "object"}, + }, + "required": ["httpMethod", "body"], +} + +# TODO: implement GET response paging +# TODO: allow Dataset delete only if no Dataset Version exists + + +def lambda_handler( # pylint:disable=inconsistent-return-statements,too-many-return-statements + event, _context +): + """Main Lambda entry point.""" + + # request validation + try: + validate(event, REQUEST_SCHEMA) + method = event["httpMethod"] + except ValidationError as err: + return error_response(400, err.message) + + if method == "POST": + return create_dataset(event) + + if method == "GET": + if "id" in event["body"] and "type" in event["body"]: + return get_dataset_single(event) + + if "title" in event["body"] or "owning_group" in event["body"]: + return get_dataset_filter(event) + + if event["body"] == {}: + return list_datasets() + + if method == "PATCH": + return update_dataset(event) + + if method == "DELETE": + return delete_dataset(event) diff --git a/backend/endpoints/datasets/get.py b/backend/endpoints/datasets/get.py new file mode 100644 index 000000000..5a141456a --- /dev/null +++ b/backend/endpoints/datasets/get.py @@ -0,0 +1,99 @@ +"""Get datasets functions.""" + +from endpoints.datasets.model import DatasetModel +from endpoints.utils import error_response, success_response +from jsonschema import ValidationError, validate +from pynamodb.exceptions import DoesNotExist + + +def get_dataset_single(payload): + """GET: Get single Dataset.""" + + BODY_SCHEMA = { + "type": "object", + "properties": { + "id": {"type": "string"}, + "type": { + "type": "string", + "enum": ["IMAGE", "RASTER"], + }, + }, + "required": ["id", "type"], + } + + # request body validation + req_body = payload["body"] + try: + validate(req_body, BODY_SCHEMA) + except ValidationError as err: + return error_response(400, err.message) + + # get dataset + try: + dataset = DatasetModel.get( + hash_key=f"DATASET#{req_body['id']}", + range_key=f"TYPE#{req_body['type']}", + consistent_read=True, + ) + except DoesNotExist: + return error_response( + 404, f"dataset '{req_body['id']}' of type '{req_body['type']}' does not exist" + ) + + # return response + resp_body = {} + resp_body = dict(dataset) + + resp_body["id"] = dataset.dataset_id + resp_body["type"] = dataset.dataset_type + + return success_response(200, resp_body) + + +def get_dataset_filter(payload): + """GET: Get Datasets by filter.""" + + BODY_SCHEMA = { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["IMAGE", "RASTER"], + }, + "title": {"type": "string"}, + "owning_group": {"type": "string"}, + }, + "required": ["type"], + "minProperties": 2, + "maxProperties": 2, + } + + # request body validation + req_body = payload["body"] + try: + validate(req_body, BODY_SCHEMA) + except ValidationError as err: + return error_response(400, err.message) + + # dataset query by filter + if "title" in req_body: + datasets = DatasetModel.datasets_tile_idx.query( + hash_key=f"TYPE#{req_body['type']}", + range_key_condition=DatasetModel.title == f"{req_body['title']}", + ) + + if "owning_group" in req_body: + datasets = DatasetModel.datasets_owning_group_idx.query( + hash_key=f"TYPE#{req_body['type']}", + range_key_condition=DatasetModel.owning_group == f"{req_body['owning_group']}", + ) + + # return response + resp_body = [] + for dataset in datasets: + resp_item = dict(dataset) + resp_item["id"] = dataset.dataset_id + resp_item["type"] = dataset.dataset_type + resp_body.append(resp_item) + + return success_response(200, resp_body) diff --git a/backend/endpoints/datasets/list.py b/backend/endpoints/datasets/list.py new file mode 100644 index 000000000..25aa46189 --- /dev/null +++ b/backend/endpoints/datasets/list.py @@ -0,0 +1,24 @@ +"""List all datasets function.""" + +from endpoints.datasets.model import DatasetModel +from endpoints.utils import success_response + + +def list_datasets(): + """GET: List all Datasets.""" + + # list all datasets + datasets = DatasetModel.scan( + filter_condition=DatasetModel.id.startswith("DATASET#") + & DatasetModel.type.startswith("TYPE#") + ) + + # return response + resp_body = [] + for dataset in datasets: + resp_item = dict(dataset) + resp_item["id"] = dataset.dataset_id + resp_item["type"] = dataset.dataset_type + resp_body.append(resp_item) + + return success_response(200, resp_body) diff --git a/backend/endpoints/datasets/model.py b/backend/endpoints/datasets/model.py new file mode 100644 index 000000000..f243221d7 --- /dev/null +++ b/backend/endpoints/datasets/model.py @@ -0,0 +1,80 @@ +"""Dataset object DynamoDB model.""" + +import uuid +from datetime import datetime, timezone + +from pynamodb.attributes import UTCDateTimeAttribute, UnicodeAttribute +from pynamodb.indexes import AllProjection, GlobalSecondaryIndex +from pynamodb.models import Model + + +class DatasetsTitleIdx(GlobalSecondaryIndex): + """Dataset type/title global index.""" + + class Meta: # pylint:disable=too-few-public-methods + """Meta class.""" + + index_name = "datasets_title" + read_capacity_units = 1 + write_capacity_units = 1 + projection = AllProjection() + + type = UnicodeAttribute(hash_key=True, attr_name="sk") + title = UnicodeAttribute(range_key=True) + + +class DatasetsOwningGroupIdx(GlobalSecondaryIndex): + """Dataset type/owning_group global index.""" + + class Meta: # pylint:disable=too-few-public-methods + """Meta class.""" + + index_name = "datasets_owning_group" + read_capacity_units = 1 + write_capacity_units = 1 + projection = AllProjection() + + type = UnicodeAttribute(hash_key=True, attr_name="sk") + owning_group = UnicodeAttribute(range_key=True) + + +class DatasetModel(Model): + """Dataset model.""" + + class Meta: # pylint:disable=too-few-public-methods + """Meta class.""" + + table_name = "datasets" + region = "ap-southeast-2" # TODO: don't hardcode region + + id = UnicodeAttribute( + hash_key=True, attr_name="pk", default=f"DATASET#{uuid.uuid1().hex}", null=False + ) + type = UnicodeAttribute(range_key=True, attr_name="sk", null=False) + title = UnicodeAttribute(null=False) + owning_group = UnicodeAttribute(null=False) + created_at = UTCDateTimeAttribute(null=False, default=datetime.now(timezone.utc)) + updated_at = UTCDateTimeAttribute() + + datasets_tile_idx = DatasetsTitleIdx() + datasets_owning_group_idx = DatasetsOwningGroupIdx() + + def save( + self, conditional_operator=None, **expected_values + ): # pylint:disable=unused-argument,arguments-differ + self.updated_at = datetime.now(timezone.utc) + super().save() + + @property + def dataset_id(self): + """Dataset ID value.""" + return self.id.split("#")[1] + + @property + def dataset_type(self): + """Dataset type value.""" + return self.type.split("#")[1] + + def __iter__(self): + for name, attr in self.get_attributes().items(): + yield name, attr.serialize(getattr(self, name)) diff --git a/backend/endpoints/datasets/requirements.txt b/backend/endpoints/datasets/requirements.txt new file mode 100644 index 000000000..8f3bac02b --- /dev/null +++ b/backend/endpoints/datasets/requirements.txt @@ -0,0 +1,2 @@ +jsonschema +pynamodb diff --git a/backend/endpoints/datasets/update.py b/backend/endpoints/datasets/update.py new file mode 100644 index 000000000..d7bbd2713 --- /dev/null +++ b/backend/endpoints/datasets/update.py @@ -0,0 +1,74 @@ +"""Update dataset function.""" + +from endpoints.datasets.model import DatasetModel +from endpoints.utils import error_response, success_response +from jsonschema import ValidationError, validate +from pynamodb.exceptions import DoesNotExist + + +def update_dataset(payload): + """PATCH: Update Dataset.""" + + BODY_SCHEMA = { + "type": "object", + "properties": { + "id": {"type": "string"}, + "type": { + "type": "string", + "enum": ["IMAGE", "RASTER"], + }, + "title": {"type": "string"}, + "owning_group": {"type": "string"}, + }, + "required": [ + "id", + "type", + ], + "minProperties": 3, + } + + # request body validation + req_body = payload["body"] + try: + validate(req_body, BODY_SCHEMA) + except ValidationError as err: + return error_response(400, err.message) + + # check for duplicate type/title + if DatasetModel.datasets_tile_idx.count( + hash_key=f"TYPE#{req_body['type']}", + range_key_condition=(DatasetModel.title == f"{req_body['title']}"), + ): + return error_response( + 409, f"dataset '{req_body['title']}' of type '{req_body['type']}' already exists" + ) + + # get dataset to update + try: + dataset = DatasetModel.get( + hash_key=f"DATASET#{req_body['id']}", + range_key=f"TYPE#{req_body['type']}", + consistent_read=True, + ) + except DoesNotExist: + return error_response( + 404, f"dataset '{req_body['id']}' of type '{req_body['type']}' does not exist" + ) + + # update dataset + for attr in DatasetModel.get_attributes(): + if attr not in ("id", "type"): + if attr in req_body: + setattr(dataset, attr, req_body[attr]) + + dataset.save() + dataset.refresh(consistent_read=True) + + # return response + resp_body = {} + resp_body = dict(dataset) + + resp_body["id"] = dataset.dataset_id + resp_body["type"] = dataset.dataset_type + + return success_response(200, resp_body) diff --git a/backend/endpoints/utils.py b/backend/endpoints/utils.py new file mode 100644 index 000000000..2536efddc --- /dev/null +++ b/backend/endpoints/utils.py @@ -0,0 +1,15 @@ +"""Utility functions.""" + +from http.client import responses as http_responses + + +def error_response(code, message): + """Return error response content as string.""" + + return {"statusCode": code, "body": {"message": f"{http_responses[code]}: {message}."}} + + +def success_response(code, body): + """Return success response content as string.""" + + return {"statusCode": code, "body": body} diff --git a/backend/tests/__init__.py b/backend/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py new file mode 100644 index 000000000..5621dce40 --- /dev/null +++ b/backend/tests/conftest.py @@ -0,0 +1,54 @@ +""" +Pytest configuration file. +""" + +import logging +from datetime import datetime, timedelta, timezone + +import pytest +from endpoints.datasets.model import DatasetModel + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +@pytest.fixture() +def db_prepare(): + """ + Prepare DB with some dataset records and clean it up after test is + finished. + """ + + items = [ + DatasetModel( + id="DATASET#111abc", + type="TYPE#RASTER", + title="Dataset ABC", + owning_group="A_ABC_XYZ", + created_at=datetime.now(timezone.utc) - timedelta(days=10), + updated_at=datetime.now(timezone.utc) - timedelta(days=1), + ), + DatasetModel( + id="DATASET#222xyz", + type="TYPE#RASTER", + title="Dataset XYZ", + owning_group="A_ABC_XYZ", + created_at=datetime.now(timezone.utc) - timedelta(days=100), + updated_at=datetime.now(timezone.utc) - timedelta(days=10), + ), + ] + + logger.debug("Running DB Setup") + + with DatasetModel.batch_write() as batch: + for item in items: + batch.save(item) + + yield # teardown + + logger.debug("Running DB Teardown") + + for item in DatasetModel.scan(): + item.delete() + + return True diff --git a/backend/tests/test_datasets_endpoint.py b/backend/tests/test_datasets_endpoint.py new file mode 100644 index 000000000..be6bd4ceb --- /dev/null +++ b/backend/tests/test_datasets_endpoint.py @@ -0,0 +1,331 @@ +""" +Dataset endpoint Lambda function tests. Working Data Lake AWS environment is +required (run '$ cdk deploy' before running tests). +""" + +import logging +import re + +from endpoints.datasets import entrypoint + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +def test_should_fail_if_request_not_containing_method(): + """Test if request fails correctly if not containing method attribute.""" + + body = {} + + resp = entrypoint.lambda_handler({"body": body}, "context") + logger.info("Response: %s", resp) + + assert resp["statusCode"] == 400 + assert resp["body"]["message"] == "Bad Request: 'httpMethod' is a required property." + + +def test_should_fail_if_request_not_containing_body(): + """Test if request fails correctly if not containing body.""" + + method = "POST" + + resp = entrypoint.lambda_handler({"httpMethod": method}, "context") + logger.info("Response: %s", resp) + + assert resp["statusCode"] == 400 + assert resp["body"]["message"] == "Bad Request: 'body' is a required property." + + +def test_should_create_dataset(db_prepare): # pylint:disable=unused-argument + """Test Dataset creation using POST method.""" + + method = "POST" + body = {} + body["type"] = "RASTER" + body["title"] = "Dataset 123" + body["owning_group"] = "A_ABC_XYZ" + + resp = entrypoint.lambda_handler({"httpMethod": method, "body": body}, "context") + logger.info("Response: %s", resp) + + assert resp["statusCode"] == 201 + assert len(resp["body"]["id"]) == 32 # 32 characters long UUID + assert resp["body"]["type"] == body["type"] + assert resp["body"]["title"] == body["title"] + assert resp["body"]["owning_group"] == body["owning_group"] + + +def test_should_fail_if_post_request_not_containing_mandatory_attribute(): + """Test if POST request fails correctly if not containing mandatory attribute.""" + + method = "POST" + body = {} + # body["type"] = "RASTER" # type attribute is missing + body["title"] = "Dataset 123" + body["owning_group"] = "A_ABC_XYZ" + + resp = entrypoint.lambda_handler({"httpMethod": method, "body": body}, "context") + logger.info("Response: %s", resp) + + assert resp["statusCode"] == 400 + assert resp["body"]["message"] == "Bad Request: 'type' is a required property." + + +def test_should_fail_if_post_request_containing_incorrect_dataset_type(): + """Test if POST request fails correctly if containing incorrect dataset type.""" + + method = "POST" + body = {} + body["type"] = "INCORRECT_TYPE" + body["title"] = "Dataset 123" + body["owning_group"] = "A_ABC_XYZ" + + resp = entrypoint.lambda_handler({"httpMethod": method, "body": body}, "context") + logger.info("Response: %s", resp) + + assert resp["statusCode"] == 400 + assert re.search("^Bad Request: 'INCORRECT_TYPE' is not one of .*", resp["body"]["message"]) + + +def test_shoud_fail_if_post_request_containing_duplicate_dataset_title( + db_prepare, +): # pylint:disable=unused-argument + """ + Test if POST request fails correctly if containing duplicate dataset + title. + """ + + method = "POST" + body = {} + body["type"] = "RASTER" + body["title"] = "Dataset ABC" + body["owning_group"] = "A_ABC_XYZ" + + resp = entrypoint.lambda_handler({"httpMethod": method, "body": body}, "context") + logger.info("Response: %s", resp) + + assert resp["statusCode"] == 409 + assert ( + resp["body"]["message"] + == "Conflict: dataset 'Dataset ABC' of type 'RASTER' already exists." + ) + + +def test_should_return_single_dataset(db_prepare): # pylint:disable=unused-argument + """Test retrieving single Dataset using GET method.""" + + method = "GET" + body = {} + body["id"] = "111abc" + body["type"] = "RASTER" + + resp = entrypoint.lambda_handler({"httpMethod": method, "body": body}, "context") + logger.info("Response: %s", resp) + + assert resp["statusCode"] == 200 + assert resp["body"]["id"] == "111abc" + assert resp["body"]["type"] == "RASTER" + assert resp["body"]["title"] == "Dataset ABC" + + +def test_should_return_all_datasets(db_prepare): # pylint:disable=unused-argument + """Test retrieving all Datasets using GET method.""" + + method = "GET" + body = {} + + resp = entrypoint.lambda_handler({"httpMethod": method, "body": body}, "context") + logger.info("Response: %s", resp) + + assert resp["statusCode"] == 200 + assert len(resp["body"]) == 2 + assert resp["body"][0]["id"] in ("111abc", "222xyz") + assert resp["body"][0]["type"] == "RASTER" + assert resp["body"][0]["title"] in ("Dataset ABC", "Dataset XYZ") + + +def test_should_return_single_dataset_filtered_by_type_and_title( + db_prepare, +): # pylint:disable=unused-argument + """ + Test filtering Datasets by type and title. Must return single dataset, + because type/title combination must be unique. + """ + + method = "GET" + body = {} + body["type"] = "RASTER" + body["title"] = "Dataset ABC" + + resp = entrypoint.lambda_handler({"httpMethod": method, "body": body}, "context") + logger.info("Response: %s", resp) + + assert resp["statusCode"] == 200 + assert len(resp["body"]) == 1 + assert resp["body"][0]["id"] == "111abc" + assert resp["body"][0]["type"] == "RASTER" + assert resp["body"][0]["title"] == "Dataset ABC" + + +def test_should_return_multiple_datasets_filtered_by_type_and_owning_group( + db_prepare, +): # pylint:disable=unused-argument + """ + Test filtering Datasets by type and title. + """ + + method = "GET" + body = {} + body["type"] = "RASTER" + body["owning_group"] = "A_ABC_XYZ" + + resp = entrypoint.lambda_handler({"httpMethod": method, "body": body}, "context") + logger.info("Response: %s", resp) + + assert resp["statusCode"] == 200 + assert len(resp["body"]) == 2 + assert resp["body"][0]["id"] == "111abc" + assert resp["body"][0]["type"] == "RASTER" + assert resp["body"][0]["owning_group"] == "A_ABC_XYZ" + + +def test_should_fail_if_get_request_containing_tile_and_owning_group_filter( + db_prepare, +): # pylint:disable=unused-argument + """ + Test if GET request fails correctly if filter contains both tile and + owning_group attributes. + """ + + method = "GET" + body = {} + body["type"] = "RASTER" + body["title"] = "Dataset ABC" + body["owning_group"] = "A_ABC_XYZ" + + resp = entrypoint.lambda_handler({"httpMethod": method, "body": body}, "context") + logger.info("Response: %s", resp) + + assert resp["statusCode"] == 400 + assert re.search("^Bad Request: .* has too many properties", resp["body"]["message"]) + + +def test_should_fail_if_get_request_requests_not_existing_dataset(): + """ + Test if GET request fails correctly if not existing dataset ID is specified. + """ + method = "GET" + body = {} + body["id"] = "NOT_EXISTING_ID" + body["type"] = "RASTER" + + resp = entrypoint.lambda_handler({"httpMethod": method, "body": body}, "context") + logger.info("Response: %s", resp) + + assert resp["statusCode"] == 404 + assert ( + resp["body"]["message"] + == "Not Found: dataset 'NOT_EXISTING_ID' of type 'RASTER' does not exist." + ) + + +def test_should_update_dataset(db_prepare): # pylint:disable=unused-argument + """Test Dataset update using PATCH method.""" + + method = "PATCH" + body = {} + body["id"] = "111abc" + body["type"] = "RASTER" + body["title"] = "New Dataset ABC" + body["owning_group"] = "A_ABC_XYZ" + + resp = entrypoint.lambda_handler({"httpMethod": method, "body": body}, "context") + logger.info("Response: %s", resp) + + assert resp["statusCode"] == 200 + assert resp["body"]["title"] == "New Dataset ABC" + + +def test_should_fail_if_updating_with_already_existing_dataset_title( + db_prepare, +): # pylint:disable=unused-argument + """ + Test if PATCH request fails correctly if trying to update dataset with + already existing dataset title. + """ + + method = "PATCH" + body = {} + body["id"] = "111abc" + body["type"] = "RASTER" + body["title"] = "Dataset XYZ" + body["owning_group"] = "A_ABC_XYZ" + + resp = entrypoint.lambda_handler({"httpMethod": method, "body": body}, "context") + logger.info("Response: %s", resp) + + assert resp["statusCode"] == 409 + assert ( + resp["body"]["message"] + == "Conflict: dataset 'Dataset XYZ' of type 'RASTER' already exists." + ) + + +def test_should_fail_if_updating_not_existing_dataset(db_prepare): # pylint:disable=unused-argument + """ + Test if PATCH request fails correctly if trying to update not existing + dataset. + """ + method = "PATCH" + body = {} + body["id"] = "NOT_EXISTING_ID" + body["type"] = "RASTER" + body["title"] = "New Dataset ABC" + body["owning_group"] = "A_ABC_XYZ" + + resp = entrypoint.lambda_handler({"httpMethod": method, "body": body}, "context") + logger.info("Response: %s", resp) + + assert resp["statusCode"] == 404 + assert ( + resp["body"]["message"] + == "Not Found: dataset 'NOT_EXISTING_ID' of type 'RASTER' does not exist." + ) + + +def test_should_delete_dataset(db_prepare): # pylint:disable=unused-argument + """Test Dataset deletion using DELETE method.""" + + method = "DELETE" + body = {} + body["id"] = "111abc" + body["type"] = "RASTER" + + resp = entrypoint.lambda_handler({"httpMethod": method, "body": body}, "context") + logger.info("Response: %s", resp) + + assert resp["statusCode"] == 204 + assert resp["body"] == {} + + +def test_should_fail_if_deleting_not_existing_dataset(db_prepare): # pylint:disable=unused-argument + """ + Test if DELETE request fails correctly if trying to update not existing + dataset. + """ + + method = "DELETE" + body = {} + body["id"] = "NOT_EXISTING_ID" + body["type"] = "RASTER" + body["title"] = "Dataset ABC" + body["owning_group"] = "A_ABC_XYZ" + + resp = entrypoint.lambda_handler({"httpMethod": method, "body": body}, "context") + logger.info("Response: %s", resp) + + assert resp["statusCode"] == 404 + assert ( + resp["body"]["message"] + == "Not Found: dataset 'NOT_EXISTING_ID' of type 'RASTER' does not exist." + ) diff --git a/infra/data_stores/data_lake_stack.py b/infra/data_stores/data_lake_stack.py index b50d58d01..8831cb6a8 100644 --- a/infra/data_stores/data_lake_stack.py +++ b/infra/data_stores/data_lake_stack.py @@ -2,6 +2,8 @@ Data Lake AWS resources definitions. """ +import os + from aws_cdk import aws_dynamodb, aws_lambda, aws_s3, core from aws_cdk.core import Tags @@ -40,22 +42,48 @@ def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: self, "data-lake-application-db", table_name="datasets", - partition_key=aws_dynamodb.Attribute( - name="pk", type=aws_dynamodb.AttributeType("STRING") - ), - sort_key=aws_dynamodb.Attribute(name="sk", type=aws_dynamodb.AttributeType("STRING")), + partition_key=aws_dynamodb.Attribute(name="pk", type=aws_dynamodb.AttributeType.STRING), + sort_key=aws_dynamodb.Attribute(name="sk", type=aws_dynamodb.AttributeType.STRING), point_in_time_recovery=True, removal_policy=REMOVAL_POLICY, ) Tags.of(db_datasets_table).add("ApplicationLayer", "application-db") # Lambda Handler Functions + lambda_path = "../backend/endpoints/datasets" dataset_handler_function = aws_lambda.Function( self, - "dataset-handler-function", - handler="function.lambda_handler", + "datasets-endpoint-function", + function_name="datasets-endpoint", + handler="endpoints.datasets.entrypoint.lambda_handler", runtime=aws_lambda.Runtime.PYTHON_3_6, - code=aws_lambda.Code.from_asset(path="lambda/dataset-handler"), + code=aws_lambda.Code.from_asset( + path=os.path.dirname(lambda_path), + bundling=core.BundlingOptions( + image=aws_lambda.Runtime.PYTHON_3_6.bundling_docker_image, # pylint:disable=no-member + command=[ + "bash", + "-c", + open(f"{lambda_path}/bundle.sh", "r").read(), + ], + ), + ), + ) + db_datasets_table.add_global_secondary_index( + index_name="datasets_title", + partition_key=aws_dynamodb.Attribute(name="sk", type=aws_dynamodb.AttributeType.STRING), + sort_key=aws_dynamodb.Attribute(name="title", type=aws_dynamodb.AttributeType.STRING), + ) + db_datasets_table.add_global_secondary_index( + index_name="datasets_owning_group", + partition_key=aws_dynamodb.Attribute(name="sk", type=aws_dynamodb.AttributeType.STRING), + sort_key=aws_dynamodb.Attribute( + name="owning_group", type=aws_dynamodb.AttributeType.STRING + ), ) db_datasets_table.grant_read_write_data(dataset_handler_function) + db_datasets_table.grant( + dataset_handler_function, "dynamodb:DescribeTable" + ) # required by pynamodb + Tags.of(dataset_handler_function).add("ApplicationLayer", "api") diff --git a/infra/lambda/dataset-handler/function.py b/infra/lambda/dataset-handler/function.py deleted file mode 100644 index af77bdbf7..000000000 --- a/infra/lambda/dataset-handler/function.py +++ /dev/null @@ -1,13 +0,0 @@ -""" -Dataset Space Lambda handler function. -""" - - -def lambda_handler(event, context): # pylint:disable=unused-argument - """Main Lambda entry point.""" - - return True - - -if __name__ == "__main__": - lambda_handler({"x": "x", "y": "y"}, "context") diff --git a/infra/requirements.txt b/infra/requirements.txt index b6fd21cf8..42a8a0a5e 100644 --- a/infra/requirements.txt +++ b/infra/requirements.txt @@ -3,3 +3,4 @@ aws-cdk.aws-iam aws-cdk.aws-dynamodb aws-cdk.aws-lambda awscli +jsonschema diff --git a/infra/tests/test_lambda_functions.py b/infra/tests/test_lambda_functions.py new file mode 100644 index 000000000..960a52a9e --- /dev/null +++ b/infra/tests/test_lambda_functions.py @@ -0,0 +1,32 @@ +""" +Lambda functions integration tests. +""" + +import json +import uuid + +import boto3 + +LAMBDA = boto3.client("lambda") + + +def test_should_launch_datasets_endpoint_lambda_function(): + """ + Test if datasets endpoint lambda can be successfully launched and has required permission to + create dataset in DB. + """ + + method = "POST" + body = {} + body["type"] = "RASTER" + body["title"] = f"Dataset {uuid.uuid1}.hex" + body["owning_group"] = "A_ABC_XYZ" + + resp = LAMBDA.invoke( + FunctionName="datasets-endpoint", + Payload=json.dumps({"httpMethod": method, "body": body}), + InvocationType="RequestResponse", + ) + json_resp = json.loads(resp["Payload"].read().decode("utf-8")) + + assert json_resp["statusCode"] == 201