Skip to content
This repository has been archived by the owner on Nov 23, 2023. It is now read-only.

Commit

Permalink
feat(infra): Dataset Space endpoint implementation (#38)
Browse files Browse the repository at this point in the history
* feat(infra): Dataset Space endpoint implementation

Implemented features:
* POST request - create dataset
* PATCH request - update dataset
* GET request - retrieve single or multiple datasets
* DELETE request - delete dataset
  • Loading branch information
imincik authored Nov 8, 2020
1 parent c9dbbb0 commit 2b47b9c
Show file tree
Hide file tree
Showing 21 changed files with 921 additions and 21 deletions.
6 changes: 6 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ jobs:
python -m pip install --upgrade pip
pip install -r requirements-dev.txt
pip install -r infra/requirements.txt
pip install -r backend/endpoints/datasets/requirements.txt
- name: Check last commit message
if: github.event_name == 'push'
Expand Down Expand Up @@ -97,6 +98,7 @@ jobs:
python -m pip install --upgrade pip
pip install -r requirements-dev.txt
pip install -r infra/requirements.txt
pip install -r backend/endpoints/datasets/requirements.txt
- name: Use Node.js 12.x for CDK deployment
uses: actions/setup-node@v2.1.2
Expand Down Expand Up @@ -134,6 +136,10 @@ jobs:
run: |
pytest infra/tests/
- name: Run AWS backend tests
run: |
pytest backend/tests/
- name: Destroy AWS stack used for testing
run: |
cdk destroy --force geospatial-data-lake
Expand Down
3 changes: 2 additions & 1 deletion .pylintrc
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
[MASTER]
disable =
bad-continuation,
invalid-name
invalid-name,
duplicate-code
[FORMAT]
max-line-length=100
[MISCELLANEOUS]
Expand Down
Empty file added backend/endpoints/__init__.py
Empty file.
Empty file.
8 changes: 8 additions & 0 deletions backend/endpoints/datasets/bundle.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
set -o errexit

pip install --requirement=datasets/requirements.txt --target=/asset-output
mkdir --parents /asset-output/endpoints/datasets
touch /asset-output/endpoints/__init__.py
touch /asset-output/endpoints/datasets/__init__.py
cp --archive --update --verbose datasets/*.py /asset-output/endpoints/datasets/
cp --archive --update --verbose utils.py /asset-output/endpoints/
56 changes: 56 additions & 0 deletions backend/endpoints/datasets/create.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
"""Create dataset function."""

from endpoints.datasets.model import DatasetModel
from endpoints.utils import error_response, success_response
from jsonschema import ValidationError, validate


def create_dataset(payload):
"""POST: Create Dataset."""

BODY_SCHEMA = {
"type": "object",
"properties": {
"type": {
"type": "string",
"enum": ["IMAGE", "RASTER"],
},
"title": {"type": "string"},
"owning_group": {"type": "string"},
},
"required": ["type", "title", "owning_group"],
}

# request body validation
req_body = payload["body"]
try:
validate(req_body, BODY_SCHEMA)
except ValidationError as err:
return error_response(400, err.message)

# check for duplicate type/title
if DatasetModel.datasets_tile_idx.count(
hash_key=f"TYPE#{req_body['type']}",
range_key_condition=(DatasetModel.title == f"{req_body['title']}"),
):
return error_response(
409, f"dataset '{req_body['title']}' of type '{req_body['type']}' already exists"
)

# create dataset
dataset = DatasetModel(
type=f"TYPE#{req_body['type']}",
title=req_body["title"],
owning_group=req_body["owning_group"],
)
dataset.save()
dataset.refresh(consistent_read=True)

# return response
resp_body = {}
resp_body = dict(dataset)

resp_body["id"] = dataset.dataset_id
resp_body["type"] = dataset.dataset_type

return success_response(201, resp_body)
47 changes: 47 additions & 0 deletions backend/endpoints/datasets/delete.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
"""Delete dataset function."""

from endpoints.datasets.model import DatasetModel
from endpoints.utils import error_response, success_response
from jsonschema import ValidationError, validate
from pynamodb.exceptions import DoesNotExist


def delete_dataset(payload):
"""DELETE: Delete Dataset."""

BODY_SCHEMA = {
"type": "object",
"properties": {
"id": {"type": "string"},
"type": {
"type": "string",
"enum": ["IMAGE", "RASTER"],
},
},
"required": ["id", "type"],
}

# request body validation
req_body = payload["body"]
try:
validate(req_body, BODY_SCHEMA)
except ValidationError as err:
return error_response(400, err.message)

# get dataset to delete
try:
dataset = DatasetModel.get(
hash_key=f"DATASET#{req_body['id']}",
range_key=f"TYPE#{req_body['type']}",
consistent_read=True,
)
except DoesNotExist:
return error_response(
404, f"dataset '{req_body['id']}' of type '{req_body['type']}' does not exist"
)

# delete dataset
dataset.delete()

resp_body = {}
return success_response(204, resp_body)
55 changes: 55 additions & 0 deletions backend/endpoints/datasets/entrypoint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
"""
Dataset endpoint Lambda function.
"""

from endpoints.datasets.create import create_dataset
from endpoints.datasets.delete import delete_dataset
from endpoints.datasets.get import get_dataset_filter, get_dataset_single
from endpoints.datasets.list import list_datasets
from endpoints.datasets.update import update_dataset
from endpoints.utils import error_response
from jsonschema import ValidationError, validate

REQUEST_SCHEMA = {
"type": "object",
"properties": {
"httpMethod": {"type": "string", "enum": ["GET", "POST", "PATCH", "DELETE"]},
"body": {"type": "object"},
},
"required": ["httpMethod", "body"],
}

# TODO: implement GET response paging
# TODO: allow Dataset delete only if no Dataset Version exists


def lambda_handler( # pylint:disable=inconsistent-return-statements,too-many-return-statements
event, _context
):
"""Main Lambda entry point."""

# request validation
try:
validate(event, REQUEST_SCHEMA)
method = event["httpMethod"]
except ValidationError as err:
return error_response(400, err.message)

if method == "POST":
return create_dataset(event)

if method == "GET":
if "id" in event["body"] and "type" in event["body"]:
return get_dataset_single(event)

if "title" in event["body"] or "owning_group" in event["body"]:
return get_dataset_filter(event)

if event["body"] == {}:
return list_datasets()

if method == "PATCH":
return update_dataset(event)

if method == "DELETE":
return delete_dataset(event)
99 changes: 99 additions & 0 deletions backend/endpoints/datasets/get.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
"""Get datasets functions."""

from endpoints.datasets.model import DatasetModel
from endpoints.utils import error_response, success_response
from jsonschema import ValidationError, validate
from pynamodb.exceptions import DoesNotExist


def get_dataset_single(payload):
"""GET: Get single Dataset."""

BODY_SCHEMA = {
"type": "object",
"properties": {
"id": {"type": "string"},
"type": {
"type": "string",
"enum": ["IMAGE", "RASTER"],
},
},
"required": ["id", "type"],
}

# request body validation
req_body = payload["body"]
try:
validate(req_body, BODY_SCHEMA)
except ValidationError as err:
return error_response(400, err.message)

# get dataset
try:
dataset = DatasetModel.get(
hash_key=f"DATASET#{req_body['id']}",
range_key=f"TYPE#{req_body['type']}",
consistent_read=True,
)
except DoesNotExist:
return error_response(
404, f"dataset '{req_body['id']}' of type '{req_body['type']}' does not exist"
)

# return response
resp_body = {}
resp_body = dict(dataset)

resp_body["id"] = dataset.dataset_id
resp_body["type"] = dataset.dataset_type

return success_response(200, resp_body)


def get_dataset_filter(payload):
"""GET: Get Datasets by filter."""

BODY_SCHEMA = {
"type": "object",
"properties": {
"type": {
"type": "string",
"enum": ["IMAGE", "RASTER"],
},
"title": {"type": "string"},
"owning_group": {"type": "string"},
},
"required": ["type"],
"minProperties": 2,
"maxProperties": 2,
}

# request body validation
req_body = payload["body"]
try:
validate(req_body, BODY_SCHEMA)
except ValidationError as err:
return error_response(400, err.message)

# dataset query by filter
if "title" in req_body:
datasets = DatasetModel.datasets_tile_idx.query(
hash_key=f"TYPE#{req_body['type']}",
range_key_condition=DatasetModel.title == f"{req_body['title']}",
)

if "owning_group" in req_body:
datasets = DatasetModel.datasets_owning_group_idx.query(
hash_key=f"TYPE#{req_body['type']}",
range_key_condition=DatasetModel.owning_group == f"{req_body['owning_group']}",
)

# return response
resp_body = []
for dataset in datasets:
resp_item = dict(dataset)
resp_item["id"] = dataset.dataset_id
resp_item["type"] = dataset.dataset_type
resp_body.append(resp_item)

return success_response(200, resp_body)
24 changes: 24 additions & 0 deletions backend/endpoints/datasets/list.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
"""List all datasets function."""

from endpoints.datasets.model import DatasetModel
from endpoints.utils import success_response


def list_datasets():
"""GET: List all Datasets."""

# list all datasets
datasets = DatasetModel.scan(
filter_condition=DatasetModel.id.startswith("DATASET#")
& DatasetModel.type.startswith("TYPE#")
)

# return response
resp_body = []
for dataset in datasets:
resp_item = dict(dataset)
resp_item["id"] = dataset.dataset_id
resp_item["type"] = dataset.dataset_type
resp_body.append(resp_item)

return success_response(200, resp_body)
Loading

0 comments on commit 2b47b9c

Please sign in to comment.