-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[ETL-374] Fix failing test for setup_external_storage (#46)
* add command line arg for pytest for integration test, update readme --------- Co-authored-by: Rixing Xu <rxu@w197.local>
- Loading branch information
Showing
4 changed files
with
47 additions
and
42 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
FROM amazon/aws-glue-libs:glue_libs_4.0.0_image_01 | ||
|
||
RUN pip3 install synapseclient~=2.7 pyarrow~=11.0 pytest-datadir | ||
RUN pip3 install moto~=4.1 synapseclient~=2.7 pyarrow~=11.0 datacompy~=0.8 pytest-datadir | ||
ENTRYPOINT ["bash", "-l"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,52 +1,29 @@ | ||
import boto3 | ||
import pytest | ||
import synapseclient | ||
from pyarrow import fs, parquet | ||
from pyarrow import fs | ||
|
||
from src.scripts.setup_external_storage import setup_external_storage | ||
|
||
@pytest.fixture | ||
def test_parquet_folder(): | ||
"""TODO: Replace with production parquet folder synapse ID | ||
or save as external environment variable""" | ||
parquet_folder = "syn51079888" | ||
return parquet_folder | ||
|
||
@pytest.fixture() | ||
def test_synapse_folder_id(pytestconfig): | ||
yield pytestconfig.getoption("test_synapse_folder_id") | ||
|
||
@pytest.fixture | ||
def test_synapse_client(): | ||
"""Returns a synapse client from credentials stored in SSM""" | ||
aws_session = boto3.session.Session(profile_name="default", region_name="us-east-1") | ||
ssm_parameter = "synapse-recover-auth" | ||
if ssm_parameter is not None: | ||
ssm_client = aws_session.client("ssm") | ||
token = ssm_client.get_parameter(Name=ssm_parameter, WithDecryption=True) | ||
test_synapse_client = synapseclient.Synapse() | ||
test_synapse_client.login(authToken=token["Parameter"]["Value"]) | ||
else: # try cached credentials | ||
test_synapse_client = synapseclient.login() | ||
return test_synapse_client | ||
|
||
@pytest.fixture() | ||
def test_ssm_parameter(pytestconfig): | ||
yield pytestconfig.getoption("test_ssm_parameter") | ||
|
||
def test_setup_external_storage_success(test_parquet_folder, test_synapse_client): | ||
|
||
@pytest.mark.integration() | ||
def test_setup_external_storage_success(test_synapse_folder_id, test_ssm_parameter): | ||
"""This test tests that it can get the STS token credentials and view and list the | ||
parquet files in the S3 bucket location to verify that it has access""" | ||
files in the S3 bucket location to verify that it has access""" | ||
test_synapse_client = setup_external_storage.get_synapse_client( | ||
ssm_parameter=test_ssm_parameter, aws_session=boto3 | ||
) | ||
# Get STS credentials | ||
token = test_synapse_client.get_sts_storage_token( | ||
entity=test_parquet_folder, permission="read_only", output_format="json" | ||
) | ||
|
||
# Pass STS credentials to Arrow filesystem interface | ||
s3 = fs.S3FileSystem( | ||
access_key=token["accessKeyId"], | ||
secret_key=token["secretAccessKey"], | ||
session_token=token["sessionToken"], | ||
region="us-east-1", | ||
entity=test_synapse_folder_id, permission="read_only", output_format="json" | ||
) | ||
|
||
# get file info | ||
base_s3_uri = "{}/{}".format(token["bucket"], token["baseKey"]) | ||
parquet_datasets = s3.get_file_info(fs.FileSelector(base_s3_uri, recursive=False)) | ||
|
||
# list objects in bucket, if permissions exist, would work | ||
conn = boto3.client("s3") # again assumes boto.cfg setup, assume AWS S3 | ||
conn.list_objects(Bucket=token["bucket"])["Contents"] |