Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Storage] Make sky storage delete parallel #1990 #2058

Merged
merged 11 commits into from
Jun 28, 2023
3 changes: 1 addition & 2 deletions sky/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3251,8 +3251,7 @@ def storage_delete(names: List[str], all: bool): # pylint: disable=redefined-bu
else:
names = _get_glob_storages(names)

for name in names:
sky.storage_delete(name)
subprocess_utils.run_in_parallel(sky.storage_delete, names)


@cli.group(cls=_NaturalOrderGroup)
Expand Down
59 changes: 59 additions & 0 deletions tests/test_smoke.py
Original file line number Diff line number Diff line change
Expand Up @@ -2351,6 +2351,28 @@ def tmp_scratch_storage_obj(self, tmp_bucket_name):
# Stores must be added in the test.
yield from self.yield_storage_object(name=tmp_bucket_name)

@pytest.fixture
def tmp_multiple_scratch_storage_obj(self):
# Creates a list of 5 storage objects with no source to create
# multiple scratch storages.
# Stores for each object in the list must be added in the test.
storage_mult_obj = []
for _ in range(5):
timestamp = str(time.time()).replace('.', '')
store_obj = storage_lib.Storage(name=f'sky-test-{timestamp}')
storage_mult_obj.append(store_obj)
yield storage_mult_obj
Copy link
Collaborator

@romilbhardwaj romilbhardwaj Jun 22, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there some way to get storage objects from yield_storage_object here? That way deletion after test is handled by that code. If not, we should add deletion code after the yield statement here (refer to yield_storage_object).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We are unable to use multiple yield statements in fixture as they are not supported. Hence, had to resort to this specific way of implementation. I am adding the deletion code after the yield statement as suggested.

for storage_obj in storage_mult_obj:
handle = global_user_state.get_handle_from_storage_name(
storage_obj.name)
if handle:
# If handle exists, delete manually
# TODO(romilb): This is potentially risky - if the delete method has
# bugs, this can cause resource leaks. Ideally we should manually
# eject storage from global_user_state and delete the bucket using
# boto3 directly.
storage_obj.delete()

@pytest.fixture
def tmp_local_storage_obj(self, tmp_bucket_name, tmp_source):
# Creates a temporary storage object. Stores must be added in the test.
Expand Down Expand Up @@ -2434,6 +2456,43 @@ def test_new_bucket_creation_and_deletion(self, tmp_local_storage_obj,
out = subprocess.check_output(['sky', 'storage', 'ls'])
assert tmp_local_storage_obj.name not in out.decode('utf-8')

@pytest.mark.xdist_group('multiple_bucket_deletion')
@pytest.mark.parametrize('store_type', [
storage_lib.StoreType.S3, storage_lib.StoreType.GCS,
pytest.param(storage_lib.StoreType.R2, marks=pytest.mark.cloudflare)
])
romilbhardwaj marked this conversation as resolved.
Show resolved Hide resolved
def test_multiple_buckets_creation_and_deletion(
self, tmp_multiple_scratch_storage_obj, store_type):
# Creates multiple new buckets(5 buckets) with a local source
# and deletes them.
storage_obj_name = []
for store_obj in tmp_multiple_scratch_storage_obj:
store_obj.add_store(store_type)
storage_obj_name.append(store_obj.name)

# Run sky storage ls to check if all storage objects exists in the
# output filtered by store type
out_all = subprocess.check_output(['sky', 'storage', 'ls'])
out = [
item.split()[0]
for item in out_all.decode('utf-8').splitlines()
if store_type.value in item
]
assert all([item in out for item in storage_obj_name])

# Run sky storage delete all to delete all storage objects
subprocess.check_output(['sky', 'storage', 'delete', '-a'])

# Run sky storage ls to check if all storage objects filtered by store
# type are deleted
out_all = subprocess.check_output(['sky', 'storage', 'ls'])
out = [
item.split()[0]
for item in out_all.decode('utf-8').splitlines()
if store_type.value in item
]
assert all([item not in out for item in storage_obj_name])

@pytest.mark.parametrize('store_type', [
storage_lib.StoreType.S3, storage_lib.StoreType.GCS,
pytest.param(storage_lib.StoreType.R2, marks=pytest.mark.cloudflare)
Expand Down