Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Dataset integration tests - Tables, Folders #1391

Merged
merged 49 commits into from
Sep 13, 2024
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
b188538
Add integration tests for datasets - basic queries and conftest
dlpzx Jul 1, 2024
45d1407
add list + get queries, add persistent datasets, begin create/update/…
noah-paige Jul 1, 2024
cd27097
Add integration test role in Environment stack + session in conftest …
dlpzx Jul 2, 2024
d04b525
simplified conftests for datasets
dlpzx Jul 2, 2024
5e5507e
create integration role with region in name
noah-paige Jul 2, 2024
fa69dde
New environment type: IntegrationTests + ssm param with tooling accou…
dlpzx Jul 3, 2024
3e19596
Error on cdk add_to_policy
dlpzx Jul 3, 2024
c05de67
Add filter term include tags datasets
noah-paige Jul 4, 2024
8f2a918
Add sample data and tests for dataset role access
noah-paige Jul 4, 2024
9b2c711
Add sample data and tests for dataset role access
noah-paige Jul 4, 2024
2dcd60f
Add assume role permissions to codebuild role
dlpzx Jul 8, 2024
c261da7
Add naming checks in clients + create table
dlpzx Jul 8, 2024
1e9732b
Add permissions, confidentiality and commented tests
dlpzx Jul 8, 2024
5ea8b6b
revert persistent environment
dlpzx Jul 8, 2024
520a34e
Fix check_stack_ready in dataset creation
dlpzx Jul 8, 2024
972c883
Revert session environment and add tests
dlpzx Jul 8, 2024
7b1c942
fix integration role datasets
noah-paige Jul 8, 2024
d9042dc
Fix presigned URL upload test
noah-paige Jul 9, 2024
928c3aa
Merge remote-tracking branch 'refs/remotes/origin/main' into feat/int…
dlpzx Jul 9, 2024
b633938
Uncomment drafted table/folder tests
dlpzx Jul 9, 2024
2330021
Merge branch 'refs/heads/main' into feat/integration-tests-datasets-pt2
dlpzx Sep 5, 2024
a857d0a
Ruff and readme
dlpzx Sep 5, 2024
5968fd3
Split dataset tests and added signature of each test for all APIs. Fi…
dlpzx Sep 5, 2024
052bc7e
Added all dataset query definitions and placeholders for tests
dlpzx Sep 6, 2024
9ad774e
Started parametrization of tests
dlpzx Sep 6, 2024
146c45e
Started parametrization of tests
dlpzx Sep 6, 2024
0907ea3
Started parametrization of tests
dlpzx Sep 6, 2024
7f68d3b
Started parametrization of tests
dlpzx Sep 6, 2024
98c7667
Added persistent tables and folders
dlpzx Sep 9, 2024
92f23e3
Remove unnecessary tests in folders
dlpzx Sep 9, 2024
3907001
Fix issues with KMS datasets
dlpzx Sep 9, 2024
3553d12
Temporary changes for persistent datasets
dlpzx Sep 9, 2024
9be25bb
Add paramtrization in profiling, confidentiality, fix issue in glue t…
dlpzx Sep 9, 2024
1709bb5
Fix s3_table tests, parametrized dataset tests
dlpzx Sep 9, 2024
6889c47
Retouch preview_table tests
dlpzx Sep 9, 2024
3d67e2e
Fixed profiling tables tests
dlpzx Sep 9, 2024
4fd56af
Fixed profiling tables tests
dlpzx Sep 9, 2024
56b12c5
Fix everything except for persistent-sse-s3 tests
dlpzx Sep 9, 2024
ade89e8
Fix API query to filter by tags + add README detail
dlpzx Sep 10, 2024
72259c4
Wrong SSM parameter in README
dlpzx Sep 10, 2024
3b74b03
Merge remote-tracking branch 'refs/remotes/origin/main' into feat/int…
dlpzx Sep 10, 2024
196fb6e
Moving fixture parameters to conftest
dlpzx Sep 10, 2024
d3bb8be
Update requisite in README
dlpzx Sep 10, 2024
ec38ec0
PR review comments - functions to create AWS imported resources, names
dlpzx Sep 11, 2024
ccb6887
PR review comments - 2
dlpzx Sep 11, 2024
01c65c8
Merge branch 'refs/heads/main' into feat/integration-tests-datasets-pt2
dlpzx Sep 11, 2024
5358677
Issue persistent buckets
dlpzx Sep 11, 2024
590909b
Rewrite if-clause existing infra and resource for imported dataset
dlpzx Sep 12, 2024
0674531
Small return issue
dlpzx Sep 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions tests_new/integration_tests/modules/s3_datasets/global_conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,36 @@ def session_s3_dataset1(client1, group1, org1, session_env1, session_id, testdat
delete_s3_dataset(client1, session_env1['environmentUri'], ds)



@pytest.fixture(scope='session')
def session_s3_dataset2_with_table(client1, group1, org1, session_env1, session_id, testdata):
ds = None
try:
ds = create_s3_dataset(
client1,
owner='someone',
group=group1,
org_uri=org1['organizationUri'],
env_uri=session_env1['environmentUri'],
tags=[session_id],
)
creds = generate_dataset_access_token(client1, ds.datasetUri)
dataset_session = boto3.Session(
aws_access_key_id=creds['AccessKey'],
aws_secret_access_key=creds['SessionKey'],
aws_session_token=creds['sessionToken'],
)
GlueClient(dataset_session, ds.region).create_table(
database_name=ds.GlueDatabaseName, table_name='integrationtest', bucket=ds.S3Bucket
)
response = sync_tables(client1, datasetUri=ds.datasetUri)

yield ds, response.get('nodes', [])[0]
finally:
if ds:
delete_s3_dataset(client1, session_env1['environmentUri'], ds)


@pytest.fixture(scope='session')
def session_imported_sse_s3_dataset1(
client1, group1, org1, session_env1, session_id, testdata, session_env1_aws_client, resources_prefix
Expand Down
113 changes: 113 additions & 0 deletions tests_new/integration_tests/modules/s3_datasets/test_s3_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,3 +214,116 @@ def test_get_dataset_presigned_url_upload_data_unauthorized(client2, session_s3_
assert_that(get_dataset_presigned_role_url).raises(GqlError).when_called_with(
client2, dataset_uri, input={'prefix': 'sample_data', 'fileName': 'name'}
).contains('UnauthorizedOperation', 'CREDENTIALS_DATASET', dataset_uri)




def test_start_crawler(client1, session_s3_dataset1):
dataset_uri = session_s3_dataset1.datasetUri
response = start_glue_crawler(client1, datasetUri=dataset_uri, input=None)
assert_that(response.get('Name')).is_equal_to(session_s3_dataset1.GlueCrawlerName)
assert_that(response.get('status')).is_in(['Pending', 'Running'])
# TODO: check it can run successfully + check sending prefix


def test_start_crawler_unauthorized(client2, session_s3_dataset1):
dataset_uri = session_s3_dataset1.datasetUri
assert_that(start_glue_crawler).raises(GqlError).when_called_with(client2, dataset_uri).contains(
'UnauthorizedOperation', 'CRAWL_DATASET', dataset_uri
)


def test_sync_tables(client1, session_s3_dataset1):
dataset_uri = session_s3_dataset1.datasetUri
response = sync_tables(client1, datasetUri=dataset_uri)
assert_that(response.count).is_equal_to(2)


def test_sync_tables_unauthorized(client2, session_s3_dataset1):
dataset_uri = session_s3_dataset1.datasetUri
assert_that(sync_tables).raises(GqlError).when_called_with(client2, dataset_uri).contains(
'UnauthorizedOperation', 'SYNC_DATASET', dataset_uri
)


def test_start_table_profiling(client1, session_s3_dataset2_with_table):
dataset, table = session_s3_dataset2_with_table
table_uri = table.tableUri
dataset_uri = dataset.datasetUri
response = start_dataset_profiling_run(
client1, input={'datasetUri': dataset_uri, 'tableUri': table_uri, 'GlueTableName': table.GlueTableName}
)
assert_that(response.datasetUri).is_equal_to(dataset_uri)
assert_that(response.status).is_equal_to('RUNNING')
assert_that(response.GlueTableName).is_equal_to(table.GlueTableName)


def test_start_table_profiling_unauthorized(client2, session_s3_dataset1):
dataset_uri = session_s3_dataset1.datasetUri
assert_that(start_dataset_profiling_run).raises(GqlError).when_called_with(client2, dataset_uri).contains(
'UnauthorizedOperation', 'PROFILE_DATASET_TABLE', dataset_uri
)


def test_preview_table(client1, session_s3_dataset2_with_table):
dataset, table = session_s3_dataset2_with_table
table_uri = table.tableUri
response = preview_table(client1, table_uri)
assert_that(response.rows).exists()


def test_preview_table_unauthorized(client2, session_s3_dataset2_with_table):
dataset, table = session_s3_dataset2_with_table
table_uri = table.tableUri
# TODO: confidentiality levels
assert_that(preview_table).raises(GqlError).when_called_with(client2, table_uri, {}).contains(
'UnauthorizedOperation', 'PREVIEW_DATASET_TABLE', table_uri
)


def test_delete_table(client1, session_s3_dataset2_with_table):
dataset, table = session_s3_dataset2_with_table
# todo


def test_delete_table_unauthorized(client2, session_s3_dataset2_with_table):
dataset, table = session_s3_dataset2_with_table
table_uri = table.tableUri
assert_that(delete_table).raises(GqlError).when_called_with(client2, table_uri).contains(
'UnauthorizedOperation', 'DELETE_DATASET_TABLE', table_uri
)


def test_create_folder(client1, session_s3_dataset1):
dataset_uri = session_s3_dataset1.datasetUri
response = create_folder(
client1, datasetUri=dataset_uri, input={'prefix': 'folderCreatedInTest', 'label': 'labelFolder'}
)
assert_that(response.S3Prefix).is_equal_to('folderCreatedInTest')
assert_that(response.label).is_equal_to('labelFolder')


def test_create_folder_unauthorized(client2, session_s3_dataset1):
dataset_uri = session_s3_dataset1.datasetUri
assert_that(create_folder).raises(GqlError).when_called_with(client2, dataset_uri, {}).contains(
'UnauthorizedOperation', 'CREATE_DATASET_FOLDER', dataset_uri
)


def test_delete_folder(client1, session_s3_dataset1):
dataset_uri = session_s3_dataset1.datasetUri
location = create_folder(
client1, datasetUri=dataset_uri, input={'prefix': 'folderToDelete', 'label': 'folderToDelete'}
)
response = delete_folder(client1, location.locationUri)
assert_that(response).is_equal_to(True)


def test_delete_folder_unauthorized(client1, client2, session_s3_dataset1):
dataset_uri = session_s3_dataset1.datasetUri
location = create_folder(
client1, datasetUri=dataset_uri, input={'prefix': 'folderToDelete', 'label': 'folderToDelete'}
)
assert_that(delete_folder).raises(GqlError).when_called_with(client2, location.locationUri).contains(
'UnauthorizedOperation', 'DELETE_DATASET_FOLDER', location.locationUri
)
Loading