Skip to content

Commit

Permalink
[MAINTENANCE] run lambda as docker in integration tests (#117)
Browse files Browse the repository at this point in the history
* draft: update integration tests for running against lambda

* fix: add new line

* feat: run tests as compose

* fix: added a new line
  • Loading branch information
a-chumagin authored Sep 11, 2023
1 parent ee474b6 commit c33ab86
Show file tree
Hide file tree
Showing 7 changed files with 148 additions and 69 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/run-integration-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,4 @@ jobs:
run: |
curl http://localhost:4566/_localstack/health -i
- name: Run tests
run: make run-integration-tests QA_BUCKET=dqg-settings-local HOST=172.17.0.1 test=data_test
run: make run-integration-tests HOST=172.17.0.1
17 changes: 10 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
HOST := host.docker.internal
PORT := 4566
QA_BUCKET := integration-test-bucket
QA_BUCKET := dqg-settings-local
IMAGE_VERSION := latest

run-localstack:
Expand All @@ -17,13 +17,16 @@ build-lambda-img:

build-integration-tests-img: build-lambda-img
cd ./tests/integration_tests/$(test) && \
docker build --build-arg="IMAGE_NAME=$(test)" \
--build-arg="VERSION=$(IMAGE_VERSION)" \
-t "$(test)_integration_tests" .
docker build -t "$(test)_integration_tests" .

run-data-test-local: build-lambda-img
docker_id=$$(docker run -d -p 9000:8080 --env BUCKET=$(QA_BUCKET) \
--env S3_HOST=$(HOST) --env S3_PORT=$(PORT) --env ENVIRONMENT=local --env REPORTS_WEB=test \
--env AWS_ACCESS_KEY_ID=test --env AWS_SECRET_ACCESS_KEY=test --env AWS_DEFAULT_REGION=us-east-1 $(test))

run-integration-tests:
S3_HOST=$(HOST) docker-compose up --abort-on-container-exit --build

run-integration-tests: build-integration-tests-img
docker run --env BUCKET=$(QA_BUCKET) \
--env S3_HOST=$(HOST) --env S3_PORT=$(PORT) $(test)_integration_tests

build-unit-tests-img:
cd ./functions/$(test) && \
Expand Down
24 changes: 24 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
version: '3'
services:
data-test:
build:
context: ./functions/data_test
environment:
- BUCKET=dqg-settings-local
- S3_HOST=$S3_HOST
- S3_PORT=4566
- ENVIRONMENT=local
- REPORTS_WEB=test
- AWS_ACCESS_KEY_ID=test
- AWS_SECRET_ACCESS_KEY=test
- AWS_DEFAULT_REGION=us-east-1
data-integration-test:
build:
context: ./tests/integration_tests/data_test
environment:
- S3_HOST=$S3_HOST
- LAMBDA_HOST=data-test
- LAMBDA_PORT=8080
depends_on:
- data-test

2 changes: 2 additions & 0 deletions functions/data_test/data_test/data_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,14 @@
from datasource import prepare_final_ds, get_source_name, get_file_extension
from loguru import logger


def handler(event, context):
logger.info("Starting data test")
if os.environ['ENVIRONMENT'] == 'local':
endpoint_url = (f"http://{os.environ['S3_HOST']}:"
f"{os.environ['S3_PORT']}")
s3 = boto3.resource("s3", endpoint_url=endpoint_url)
wr.config.s3_endpoint_url = endpoint_url
logger.debug("ENVIRONMENT is local")
else:
s3 = boto3.resource("s3")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ datasources:
batch_kwargs_generators:
pandas_s3_generator:
class_name: S3GlobReaderBatchKwargsGenerator
bucket: dataplatform-data-qa-env-fast-data-qa
bucket: dqg-settings-local
assets:
your_first_data_asset_name:
prefix: data/ # trailing slash is important
Expand All @@ -27,27 +27,27 @@ validation_operators:
- name: store_validation_result
action:
class_name: StoreValidationResultAction
# - name: store_evaluation_params
# action:
# class_name: StoreEvaluationParametersAction
# - name: update_data_docs
# action:
# class_name: UpdateDataDocsAction
- name: store_evaluation_params
action:
class_name: StoreEvaluationParametersAction
- name: update_data_docs
action:
class_name: UpdateDataDocsAction

stores:
expectations_S3_store:
class_name: ExpectationsStore
store_backend:
class_name: TupleS3StoreBackend
bucket: 'dataplatform-data-qa-env-fast-data-qa'
prefix: 'dataplatform-data-qa-env-fast-data-qa/great_expectations/expectations/'
bucket: 'dqg-settings-local'
prefix: 'dqg-settings-local/great_expectations/expectations/'

validations_S3_store:
class_name: ValidationsStore
store_backend:
class_name: TupleS3StoreBackend
bucket: 'dataplatform-data-qa-env-fast-data-qa'
prefix: 'dataplatform-data-qa-env-fast-data-qa/great_expectations/uncommitted/validations/'
bucket: 'dqg-settings-local'
prefix: 'dqg-settings-local/great_expectations/uncommitted/validations/'

evaluation_parameter_store:
class_name: EvaluationParameterStore
Expand All @@ -62,9 +62,11 @@ data_docs_sites:
show_how_to_buttons: false
store_backend:
class_name: TupleS3StoreBackend
bucket: dataplatform-data-qa-env-fast-data-qa
bucket: dqg-settings-local
prefix: "data_docs/"
site_index_builder:
class_name: DefaultSiteIndexBuilder

anonymous_usage_statistics:
data_context_id: f6a15c13-c249-416b-8beb-2e540a245773
enabled: false
Expand Down
13 changes: 3 additions & 10 deletions tests/integration_tests/data_test/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,12 +1,5 @@
ARG IMAGE_NAME=data-test
ARG VERSION=latest
FROM ${IMAGE_NAME}:${VERSION}
RUN pip install pytest==7.3.1
ENV ENVIRONMENT local
ENV REPORTS_WEB test
ENV AWS_ACCESS_KEY_ID test
ENV AWS_SECRET_ACCESS_KEY test
ENV AWS_DEFAULT_REGION us-east-1
FROM python:3.9
RUN pip install pytest==7.3.1 jsonschema==4.0.1 boto3==1.26.66 requests==2.31.0
COPY test_data ./test_data
COPY ./*.py ./
ENTRYPOINT ["pytest", "-qvs", "test_data_test.py"]
CMD ["pytest", "-qvs", "test_data_test.py"]"
133 changes: 94 additions & 39 deletions tests/integration_tests/data_test/test_data_test.py
Original file line number Diff line number Diff line change
@@ -1,61 +1,116 @@
import data_test as ds
import awswrangler as wr
import os
from jsonschema import validate
import boto3
import pytest
import requests
import json
import os

schema = {
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "object",
"properties": {
"path": {"type": "array", "items": [{"type": "string"}]},
"file": {"type": "string"},
"profiling": {"type": "string"},
"test_suite": {"type": "string"},
"suite_name": {"type": "string"},
"folder_key": {"type": "string"},
"run_name": {"type": "string"},
"validate_id": {"type": "string"}
"path": {
"type": "array",
"items": [
{
"type": "string"
}
]
},
"file": {
"type": "string"
},
"profiling": {
"type": "string"
},
"test_suite": {
"type": "string"
},
"suite_name": {
"type": "string"
},
"folder_key": {
"type": "string"
},
"run_name": {
"type": "string"
},
"validate_id": {
"type": "string"
}
},
"required": [
"path", "file", "profiling", "test_suite", "suite_name",
"folder_key", "run_name", "validate_id"
"path",
"file",
"profiling",
"test_suite",
"suite_name",
"folder_key",
"run_name",
"validate_id"
]
}


@pytest.fixture(scope="function")
def s3_test_data(request):
url = f"http://{os.environ['S3_HOST']}:4566"
wr.config.s3_endpoint_url = url
b_name = "dataqa"
bucket_name = "dataqa"
file_name = request.param
file_path = f"{b_name}/{file_name}"
local_path = f"./test_data/{file_name}"
event = {
"run_name": "local_test",
"source_root": b_name,
"source_data": file_path,
"engine": "s3"
}
s3 = boto3.resource("s3", endpoint_url=url)
qa_bucket_name = os.environ['BUCKET']
gx_config_local_path = "great_expectations/great_expectations.yml"
config_path = f"{qa_bucket_name}/great_expectations/great_expectations.yml"
s3.Bucket(qa_bucket_name).download_file(config_path, gx_config_local_path)
s3.create_bucket(Bucket=b_name)
s3.Object(b_name, file_path).put(Body=open(local_path, 'rb'))
result = ds.handler(event, {})
validate(instance=result, schema=schema)
file_path = f"{bucket_name}/{file_name}"
s3 = _create_boto_s3_resource()
_upload_file_to_s3(s3, bucket_name, file_path, file_name)
response = _invoke_lambda(file_path)
json_response = json.loads(response.text)
validate(instance=json_response, schema=schema)
yield file_path
s3.Object(b_name, file_path).delete()
_delete_s3_file(s3, bucket_name, file_path)


@pytest.mark.parametrize("s3_test_data", ["titanic.csv",
"titanic.parquet",
"titanic.json",
"titanic_nested.json"],
@pytest.mark.parametrize("s3_test_data",
["titanic.csv",
"titanic.parquet",
"titanic.json",
"titanic_nested.json"],
indirect=True)
def test_data_test(s3_test_data):
def test_data_test(s3_test_data: str):
pass


def _delete_s3_file(s3, bucket_name: str, file_path: str):
s3.Object(bucket_name, file_path).delete()


def _upload_file_to_s3(s3, bucket_name: str, file_path: str, file_name: str):
local_path = f"./test_data/{file_name}"
s3.create_bucket(Bucket=bucket_name)
s3.Object(bucket_name, file_path).put(Body=open(local_path, 'rb'))


def _create_boto_s3_resource():
host = os.environ["S3_HOST"]
url = f"http://{host}:4566"
s3 = boto3.resource("s3", endpoint_url=url,
aws_access_key_id="test",
aws_secret_access_key="test")
return s3


def _invoke_lambda(file_path: str):
lambda_host = os.environ["LAMBDA_HOST"]
lambda_port = os.environ["LAMBDA_PORT"]
lambda_url = f"http://{lambda_host}:{lambda_port}/2015-03-31/functions/function/invocations"

payload = json.dumps({
"run_name": "local_test",
"source_root": "dataqa",
"source_data": f"{file_path}",
"engine": "s3"
})
headers = {
'Content-Type': 'application/json'
}
response = requests.request("POST",
lambda_url,
headers=headers,
data=payload)
return response

0 comments on commit c33ab86

Please sign in to comment.