Skip to content

Commit

Permalink
[ETL-374] Fix failing test for setup_external_storage (#46)
Browse files Browse the repository at this point in the history
* add command line arg for pytest for integration test, update readme

---------

Co-authored-by: Rixing Xu <rxu@w197.local>
  • Loading branch information
rxu17 and Rixing Xu authored May 5, 2023
1 parent 927f5f5 commit 0ec4eec
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 42 deletions.
2 changes: 1 addition & 1 deletion tests/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM amazon/aws-glue-libs:glue_libs_4.0.0_image_01

RUN pip3 install synapseclient~=2.7 pyarrow~=11.0 pytest-datadir
RUN pip3 install moto~=4.1 synapseclient~=2.7 pyarrow~=11.0 datacompy~=0.8 pytest-datadir
ENTRYPOINT ["bash", "-l"]
17 changes: 15 additions & 2 deletions tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,23 @@ pytest with other tests because they have to be run in a Dockerfile:
- test_s3_to_glue_lambda.py
- test_setup_external_storage.py

Example)

#### Running tests for lambda
Run the following command from the repo root to run tests for the lambda function (in develop).
You can run this locally or inside the docker image.

```shell script
python3 -m pytest tests/test_s3_to_glue_lambda.py -v
```

#### Running tests for setup external storage
Run the following command from the repo root to run the integration test for the setup external storage script to check that the STS
access has been set for a given synapse folder (in develop).

This test takes in two command line arguments:

- test-synapse-folder-id - synapse id of the folder to check STS access for
- test-ssm-parameter - ssm parameter to get AWS credentials for otherwise leave blank and it will pull credentials from the environment

```shell script
python3 -m pytest tests/test_setup_external_storage.py --test-synapse-folder-id <put_synapse_folder_id_here> --test-ssm-parameter <put_ssm_parameter_here_or_leave_blank>
```
15 changes: 15 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,3 +256,18 @@ def staging_dataset_with_empty_columns():
@pytest.fixture()
def staging_dataset_empty():
return pd.DataFrame()


def pytest_addoption(parser):
parser.addoption(
"--test-synapse-folder-id",
action="store",
default=None,
help="ID of the synapse folder to check STS access. Required.",
)
parser.addoption(
"--test-ssm-parameter",
action="store",
default=None,
help="The SSM parameter to use to check STS access. Optional",
)
55 changes: 16 additions & 39 deletions tests/test_setup_external_storage.py
Original file line number Diff line number Diff line change
@@ -1,52 +1,29 @@
import boto3
import pytest
import synapseclient
from pyarrow import fs, parquet
from pyarrow import fs

from src.scripts.setup_external_storage import setup_external_storage

@pytest.fixture
def test_parquet_folder():
"""TODO: Replace with production parquet folder synapse ID
or save as external environment variable"""
parquet_folder = "syn51079888"
return parquet_folder

@pytest.fixture()
def test_synapse_folder_id(pytestconfig):
yield pytestconfig.getoption("test_synapse_folder_id")

@pytest.fixture
def test_synapse_client():
"""Returns a synapse client from credentials stored in SSM"""
aws_session = boto3.session.Session(profile_name="default", region_name="us-east-1")
ssm_parameter = "synapse-recover-auth"
if ssm_parameter is not None:
ssm_client = aws_session.client("ssm")
token = ssm_client.get_parameter(Name=ssm_parameter, WithDecryption=True)
test_synapse_client = synapseclient.Synapse()
test_synapse_client.login(authToken=token["Parameter"]["Value"])
else: # try cached credentials
test_synapse_client = synapseclient.login()
return test_synapse_client

@pytest.fixture()
def test_ssm_parameter(pytestconfig):
yield pytestconfig.getoption("test_ssm_parameter")

def test_setup_external_storage_success(test_parquet_folder, test_synapse_client):

@pytest.mark.integration()
def test_setup_external_storage_success(test_synapse_folder_id, test_ssm_parameter):
"""This test tests that it can get the STS token credentials and view and list the
parquet files in the S3 bucket location to verify that it has access"""
files in the S3 bucket location to verify that it has access"""
test_synapse_client = setup_external_storage.get_synapse_client(
ssm_parameter=test_ssm_parameter, aws_session=boto3
)
# Get STS credentials
token = test_synapse_client.get_sts_storage_token(
entity=test_parquet_folder, permission="read_only", output_format="json"
)

# Pass STS credentials to Arrow filesystem interface
s3 = fs.S3FileSystem(
access_key=token["accessKeyId"],
secret_key=token["secretAccessKey"],
session_token=token["sessionToken"],
region="us-east-1",
entity=test_synapse_folder_id, permission="read_only", output_format="json"
)

# get file info
base_s3_uri = "{}/{}".format(token["bucket"], token["baseKey"])
parquet_datasets = s3.get_file_info(fs.FileSelector(base_s3_uri, recursive=False))

# list objects in bucket, if permissions exist, would work
conn = boto3.client("s3") # again assumes boto.cfg setup, assume AWS S3
conn.list_objects(Bucket=token["bucket"])["Contents"]

0 comments on commit 0ec4eec

Please sign in to comment.