diff --git a/functions/data_test/profiling.py b/functions/data_test/profiling.py index 2922cca..9eeac1e 100755 --- a/functions/data_test/profiling.py +++ b/functions/data_test/profiling.py @@ -8,12 +8,11 @@ from Expectation_report_new import ExpectationsReportNew from pandas_profiling.expectations_report import ExpectationsReport from datetime import datetime -from great_expectations import DataContext from great_expectations.data_context import BaseDataContext from great_expectations.data_context.types.base import (DataContextConfig, S3StoreBackendDefaults) import yaml - +DEFAULT_CONFIG_FILE_PATH = "great_expectations/great_expectations.yml" if os.environ['ENVIRONMENT'] == 'local': endpoint_url = f"http://{os.environ['S3_HOST']}:{os.environ['S3_PORT']}" @@ -66,11 +65,7 @@ def __init__(self, typeset, *args, **kwargs): def change_ge_config(datasource_root): - context_ge = DataContext() - - configfile_raw = context_ge.get_config().to_yaml_str() - configfile = yaml.safe_load(configfile_raw) - + configfile = read_gx_config_file() datasources = { "pandas_s3": { "class_name": "PandasDatasource", @@ -151,7 +146,15 @@ def remove_suffix(input_string, suffix): return input_string -def profile_data(df, suite_name, cloudfront, datasource_root, source_covered, +def read_gx_config_file(path=None) -> dict: + if path is None: + path = DEFAULT_CONFIG_FILE_PATH + with open(path, "r") as config_file: + configfile = yaml.safe_load(config_file) + return configfile + + +def profile_data(df, suite_name, cloudfront, datasource_root, source_covered, mapping_config, run_name): qa_bucket = s3.Bucket(qa_bucket_name) config = change_ge_config(datasource_root) diff --git a/tests/unit_tests/data_test/test_profiling.py b/tests/unit_tests/data_test/test_profiling.py index 2f90e59..d40444e 100644 --- a/tests/unit_tests/data_test/test_profiling.py +++ b/tests/unit_tests/data_test/test_profiling.py @@ -1,12 +1,28 @@ import pytest -from functions.data_test.profiling import (add_local_s3_to_stores) +from functions.data_test.profiling import (add_local_s3_to_stores, + read_gx_config_file) + +ENDPOINT_URL = "http://localhost:4566" @pytest.mark.parametrize("stores, expected_output", [ ({"store1": {"store_backend": {"type": "s3", "bucket": "my-bucket"}}}, - {"store1": {"store_backend": {"type": "s3", "bucket": "my-bucket", "boto3_options": {"endpoint_url": "http://localhost:4566"}}}}), + {"store1": {"store_backend": {"type": "s3", "bucket": "my-bucket", + "boto3_options": + {"endpoint_url": ENDPOINT_URL}}}}), ({}, {}) ]) def test_add_local_s3_to_stores(stores, expected_output): - endpoint_url = "http://localhost:4566" - assert add_local_s3_to_stores(stores, endpoint_url) == expected_output + assert add_local_s3_to_stores(stores, ENDPOINT_URL) == expected_output + + +def test_gx_config_file(): + config_file = read_gx_config_file() + assert config_file["config_version"] == 2.0 + + +def test_gx_config_file_path_is_not_none(tmpdir): + p = tmpdir.mkdir("config").join("great_expectations.yml") + p.write("config_version: 10.0") + config_file = read_gx_config_file(path=p) + assert config_file["config_version"] == 10.0