Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MAINTENANCE] run lambda as docker in integration tests #117

Merged
merged 5 commits into from
Sep 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/run-integration-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,4 @@ jobs:
run: |
curl http://localhost:4566/_localstack/health -i
- name: Run tests
run: make run-integration-tests QA_BUCKET=dqg-settings-local HOST=172.17.0.1 test=data_test
run: make run-integration-tests HOST=172.17.0.1
17 changes: 10 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
HOST := host.docker.internal
PORT := 4566
QA_BUCKET := integration-test-bucket
QA_BUCKET := dqg-settings-local
IMAGE_VERSION := latest

run-localstack:
Expand All @@ -17,13 +17,16 @@ build-lambda-img:

build-integration-tests-img: build-lambda-img
cd ./tests/integration_tests/$(test) && \
docker build --build-arg="IMAGE_NAME=$(test)" \
--build-arg="VERSION=$(IMAGE_VERSION)" \
-t "$(test)_integration_tests" .
docker build -t "$(test)_integration_tests" .

run-data-test-local: build-lambda-img
docker_id=$$(docker run -d -p 9000:8080 --env BUCKET=$(QA_BUCKET) \
--env S3_HOST=$(HOST) --env S3_PORT=$(PORT) --env ENVIRONMENT=local --env REPORTS_WEB=test \
--env AWS_ACCESS_KEY_ID=test --env AWS_SECRET_ACCESS_KEY=test --env AWS_DEFAULT_REGION=us-east-1 $(test))

run-integration-tests:
S3_HOST=$(HOST) docker-compose up --abort-on-container-exit --build

run-integration-tests: build-integration-tests-img
docker run --env BUCKET=$(QA_BUCKET) \
--env S3_HOST=$(HOST) --env S3_PORT=$(PORT) $(test)_integration_tests

build-unit-tests-img:
cd ./functions/$(test) && \
Expand Down
24 changes: 24 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
version: '3'
services:
data-test:
build:
context: ./functions/data_test
environment:
- BUCKET=dqg-settings-local
- S3_HOST=$S3_HOST
- S3_PORT=4566
- ENVIRONMENT=local
- REPORTS_WEB=test
- AWS_ACCESS_KEY_ID=test
- AWS_SECRET_ACCESS_KEY=test
- AWS_DEFAULT_REGION=us-east-1
data-integration-test:
build:
context: ./tests/integration_tests/data_test
environment:
- S3_HOST=$S3_HOST
- LAMBDA_HOST=data-test
- LAMBDA_PORT=8080
depends_on:
- data-test

2 changes: 2 additions & 0 deletions functions/data_test/data_test/data_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,14 @@
from datasource import prepare_final_ds, get_source_name, get_file_extension
from loguru import logger


def handler(event, context):
logger.info("Starting data test")
if os.environ['ENVIRONMENT'] == 'local':
endpoint_url = (f"http://{os.environ['S3_HOST']}:"
f"{os.environ['S3_PORT']}")
s3 = boto3.resource("s3", endpoint_url=endpoint_url)
wr.config.s3_endpoint_url = endpoint_url
logger.debug("ENVIRONMENT is local")
else:
s3 = boto3.resource("s3")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ datasources:
batch_kwargs_generators:
pandas_s3_generator:
class_name: S3GlobReaderBatchKwargsGenerator
bucket: dataplatform-data-qa-env-fast-data-qa
bucket: dqg-settings-local
assets:
your_first_data_asset_name:
prefix: data/ # trailing slash is important
Expand All @@ -27,27 +27,27 @@ validation_operators:
- name: store_validation_result
action:
class_name: StoreValidationResultAction
# - name: store_evaluation_params
# action:
# class_name: StoreEvaluationParametersAction
# - name: update_data_docs
# action:
# class_name: UpdateDataDocsAction
- name: store_evaluation_params
action:
class_name: StoreEvaluationParametersAction
- name: update_data_docs
action:
class_name: UpdateDataDocsAction

stores:
expectations_S3_store:
class_name: ExpectationsStore
store_backend:
class_name: TupleS3StoreBackend
bucket: 'dataplatform-data-qa-env-fast-data-qa'
prefix: 'dataplatform-data-qa-env-fast-data-qa/great_expectations/expectations/'
bucket: 'dqg-settings-local'
prefix: 'dqg-settings-local/great_expectations/expectations/'

validations_S3_store:
class_name: ValidationsStore
store_backend:
class_name: TupleS3StoreBackend
bucket: 'dataplatform-data-qa-env-fast-data-qa'
prefix: 'dataplatform-data-qa-env-fast-data-qa/great_expectations/uncommitted/validations/'
bucket: 'dqg-settings-local'
prefix: 'dqg-settings-local/great_expectations/uncommitted/validations/'

evaluation_parameter_store:
class_name: EvaluationParameterStore
Expand All @@ -62,9 +62,11 @@ data_docs_sites:
show_how_to_buttons: false
store_backend:
class_name: TupleS3StoreBackend
bucket: dataplatform-data-qa-env-fast-data-qa
bucket: dqg-settings-local
prefix: "data_docs/"
site_index_builder:
class_name: DefaultSiteIndexBuilder

anonymous_usage_statistics:
data_context_id: f6a15c13-c249-416b-8beb-2e540a245773
enabled: false
Expand Down
13 changes: 3 additions & 10 deletions tests/integration_tests/data_test/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,12 +1,5 @@
ARG IMAGE_NAME=data-test
ARG VERSION=latest
FROM ${IMAGE_NAME}:${VERSION}
RUN pip install pytest==7.3.1
ENV ENVIRONMENT local
ENV REPORTS_WEB test
ENV AWS_ACCESS_KEY_ID test
ENV AWS_SECRET_ACCESS_KEY test
ENV AWS_DEFAULT_REGION us-east-1
FROM python:3.9
RUN pip install pytest==7.3.1 jsonschema==4.0.1 boto3==1.26.66 requests==2.31.0
COPY test_data ./test_data
COPY ./*.py ./
ENTRYPOINT ["pytest", "-qvs", "test_data_test.py"]
CMD ["pytest", "-qvs", "test_data_test.py"]"
133 changes: 94 additions & 39 deletions tests/integration_tests/data_test/test_data_test.py
Original file line number Diff line number Diff line change
@@ -1,61 +1,116 @@
import data_test as ds
import awswrangler as wr
import os
from jsonschema import validate
import boto3
import pytest
import requests
import json
import os

schema = {
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "object",
"properties": {
"path": {"type": "array", "items": [{"type": "string"}]},
"file": {"type": "string"},
"profiling": {"type": "string"},
"test_suite": {"type": "string"},
"suite_name": {"type": "string"},
"folder_key": {"type": "string"},
"run_name": {"type": "string"},
"validate_id": {"type": "string"}
"path": {
"type": "array",
"items": [
{
"type": "string"
}
]
},
"file": {
"type": "string"
},
"profiling": {
"type": "string"
},
"test_suite": {
"type": "string"
},
"suite_name": {
"type": "string"
},
"folder_key": {
"type": "string"
},
"run_name": {
"type": "string"
},
"validate_id": {
"type": "string"
}
},
"required": [
"path", "file", "profiling", "test_suite", "suite_name",
"folder_key", "run_name", "validate_id"
"path",
"file",
"profiling",
"test_suite",
"suite_name",
"folder_key",
"run_name",
"validate_id"
]
}


@pytest.fixture(scope="function")
def s3_test_data(request):
url = f"http://{os.environ['S3_HOST']}:4566"
wr.config.s3_endpoint_url = url
b_name = "dataqa"
bucket_name = "dataqa"
file_name = request.param
file_path = f"{b_name}/{file_name}"
local_path = f"./test_data/{file_name}"
event = {
"run_name": "local_test",
"source_root": b_name,
"source_data": file_path,
"engine": "s3"
}
s3 = boto3.resource("s3", endpoint_url=url)
qa_bucket_name = os.environ['BUCKET']
gx_config_local_path = "great_expectations/great_expectations.yml"
config_path = f"{qa_bucket_name}/great_expectations/great_expectations.yml"
s3.Bucket(qa_bucket_name).download_file(config_path, gx_config_local_path)
s3.create_bucket(Bucket=b_name)
s3.Object(b_name, file_path).put(Body=open(local_path, 'rb'))
result = ds.handler(event, {})
validate(instance=result, schema=schema)
file_path = f"{bucket_name}/{file_name}"
s3 = _create_boto_s3_resource()
_upload_file_to_s3(s3, bucket_name, file_path, file_name)
response = _invoke_lambda(file_path)
json_response = json.loads(response.text)
validate(instance=json_response, schema=schema)
yield file_path
s3.Object(b_name, file_path).delete()
_delete_s3_file(s3, bucket_name, file_path)


@pytest.mark.parametrize("s3_test_data", ["titanic.csv",
"titanic.parquet",
"titanic.json",
"titanic_nested.json"],
@pytest.mark.parametrize("s3_test_data",
["titanic.csv",
"titanic.parquet",
"titanic.json",
"titanic_nested.json"],
indirect=True)
def test_data_test(s3_test_data):
def test_data_test(s3_test_data: str):
pass


def _delete_s3_file(s3, bucket_name: str, file_path: str):
s3.Object(bucket_name, file_path).delete()


def _upload_file_to_s3(s3, bucket_name: str, file_path: str, file_name: str):
local_path = f"./test_data/{file_name}"
s3.create_bucket(Bucket=bucket_name)
s3.Object(bucket_name, file_path).put(Body=open(local_path, 'rb'))


def _create_boto_s3_resource():
host = os.environ["S3_HOST"]
url = f"http://{host}:4566"
s3 = boto3.resource("s3", endpoint_url=url,
aws_access_key_id="test",
aws_secret_access_key="test")
return s3


def _invoke_lambda(file_path: str):
lambda_host = os.environ["LAMBDA_HOST"]
lambda_port = os.environ["LAMBDA_PORT"]
lambda_url = f"http://{lambda_host}:{lambda_port}/2015-03-31/functions/function/invocations"

payload = json.dumps({
"run_name": "local_test",
"source_root": "dataqa",
"source_data": f"{file_path}",
"engine": "s3"
})
headers = {
'Content-Type': 'application/json'
}
response = requests.request("POST",
lambda_url,
headers=headers,
data=payload)
return response
Loading