Skip to content

Commit

Permalink
[MAINTAINCE] read config file instead using datacontext (#74)
Browse files Browse the repository at this point in the history
* feat: read config file
  • Loading branch information
a-chumagin authored May 16, 2023
1 parent 3d10d13 commit a9bbd95
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 12 deletions.
19 changes: 11 additions & 8 deletions functions/data_test/profiling.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,11 @@
from Expectation_report_new import ExpectationsReportNew
from pandas_profiling.expectations_report import ExpectationsReport
from datetime import datetime
from great_expectations import DataContext
from great_expectations.data_context import BaseDataContext
from great_expectations.data_context.types.base import (DataContextConfig,
S3StoreBackendDefaults)
import yaml

DEFAULT_CONFIG_FILE_PATH = "great_expectations/great_expectations.yml"

if os.environ['ENVIRONMENT'] == 'local':
endpoint_url = f"http://{os.environ['S3_HOST']}:{os.environ['S3_PORT']}"
Expand Down Expand Up @@ -66,11 +65,7 @@ def __init__(self, typeset, *args, **kwargs):


def change_ge_config(datasource_root):
context_ge = DataContext()

configfile_raw = context_ge.get_config().to_yaml_str()
configfile = yaml.safe_load(configfile_raw)

configfile = read_gx_config_file()
datasources = {
"pandas_s3": {
"class_name": "PandasDatasource",
Expand Down Expand Up @@ -151,7 +146,15 @@ def remove_suffix(input_string, suffix):
return input_string


def profile_data(df, suite_name, cloudfront, datasource_root, source_covered,
def read_gx_config_file(path=None) -> dict:
if path is None:
path = DEFAULT_CONFIG_FILE_PATH
with open(path, "r") as config_file:
configfile = yaml.safe_load(config_file)
return configfile


def profile_data(df, suite_name, cloudfront, datasource_root, source_covered,
mapping_config, run_name):
qa_bucket = s3.Bucket(qa_bucket_name)
config = change_ge_config(datasource_root)
Expand Down
24 changes: 20 additions & 4 deletions tests/unit_tests/data_test/test_profiling.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,28 @@
import pytest
from functions.data_test.profiling import (add_local_s3_to_stores)
from functions.data_test.profiling import (add_local_s3_to_stores,
read_gx_config_file)

ENDPOINT_URL = "http://localhost:4566"


@pytest.mark.parametrize("stores, expected_output", [
({"store1": {"store_backend": {"type": "s3", "bucket": "my-bucket"}}},
{"store1": {"store_backend": {"type": "s3", "bucket": "my-bucket", "boto3_options": {"endpoint_url": "http://localhost:4566"}}}}),
{"store1": {"store_backend": {"type": "s3", "bucket": "my-bucket",
"boto3_options":
{"endpoint_url": ENDPOINT_URL}}}}),
({}, {})
])
def test_add_local_s3_to_stores(stores, expected_output):
endpoint_url = "http://localhost:4566"
assert add_local_s3_to_stores(stores, endpoint_url) == expected_output
assert add_local_s3_to_stores(stores, ENDPOINT_URL) == expected_output


def test_gx_config_file():
config_file = read_gx_config_file()
assert config_file["config_version"] == 2.0


def test_gx_config_file_path_is_not_none(tmpdir):
p = tmpdir.mkdir("config").join("great_expectations.yml")
p.write("config_version: 10.0")
config_file = read_gx_config_file(path=p)
assert config_file["config_version"] == 10.0

0 comments on commit a9bbd95

Please sign in to comment.