Skip to content

Commit

Permalink
[hailtop] ensure delete also deletes single files (#14345)
Browse files Browse the repository at this point in the history
I rearranged some fixtures as well.
  • Loading branch information
danking authored Feb 23, 2024
1 parent bb3de9b commit 3135946
Show file tree
Hide file tree
Showing 6 changed files with 106 additions and 99 deletions.
11 changes: 10 additions & 1 deletion hail/python/hailtop/aiotools/delete.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,17 @@ async def delete(paths: Iterator[str]) -> None:
async with sema:
with SimpleCopyToolProgressBar(description='files', transient=True, total=0) as file_pbar:
listener = file_pbar.make_listener()

async def remove(path):
try:
await fs.remove(path)
except FileNotFoundError:
await fs.rmtree(sema, path, listener=listener)
file_pbar.update(1) # only advance if file or directory removal was successful, not on error

for grouped_paths in grouped(5_000, paths):
await asyncio.gather(*[fs.rmtree(sema, path, listener=listener) for path in grouped_paths])
file_pbar.update(0, total=file_pbar.total() + len(grouped_paths))
await asyncio.gather(*[remove(path) for path in grouped_paths])


async def main() -> None:
Expand Down
45 changes: 45 additions & 0 deletions hail/python/test/hailtop/inter_cloud/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from typing import Tuple, AsyncIterator, Dict
import secrets
import os
import pytest
import asyncio
import functools
from hailtop.utils import bounded_gather2
from hailtop.aiotools.router_fs import RouterAsyncFS, AsyncFS


@pytest.fixture(scope='module')
async def router_filesystem() -> AsyncIterator[Tuple[asyncio.Semaphore, AsyncFS, Dict[str, str]]]:
token = secrets.token_hex(16)

async with RouterAsyncFS() as fs:
file_base = f'/tmp/{token}/'
await fs.mkdir(file_base)

gs_bucket = os.environ['HAIL_TEST_GCS_BUCKET']
gs_base = f'gs://{gs_bucket}/tmp/{token}/'

s3_bucket = os.environ['HAIL_TEST_S3_BUCKET']
s3_base = f's3://{s3_bucket}/tmp/{token}/'

azure_account = os.environ['HAIL_TEST_AZURE_ACCOUNT']
azure_container = os.environ['HAIL_TEST_AZURE_CONTAINER']
azure_base = f'https://{azure_account}.blob.core.windows.net/{azure_container}/tmp/{token}/'

bases = {'file': file_base, 'gs': gs_base, 's3': s3_base, 'azure-https': azure_base}

sema = asyncio.Semaphore(50)
async with sema:
yield (sema, fs, bases)
await bounded_gather2(
sema,
functools.partial(fs.rmtree, sema, file_base),
functools.partial(fs.rmtree, sema, gs_base),
functools.partial(fs.rmtree, sema, s3_base),
functools.partial(fs.rmtree, sema, azure_base),
)

assert not await fs.isdir(file_base)
assert not await fs.isdir(gs_base)
assert not await fs.isdir(s3_base)
assert not await fs.isdir(azure_base)
50 changes: 1 addition & 49 deletions hail/python/test/hailtop/inter_cloud/test_copy.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@


from .generate_copy_test_specs import run_test_spec, create_test_file, create_test_dir

from .copy_test_specs import COPY_TEST_SPECS
from .utils import fresh_dir


# This fixture is for test_copy_behavior. It runs a series of copy
Expand All @@ -35,54 +35,6 @@ async def cloud_scheme(request):
yield request.param


@pytest.fixture(scope='module')
async def router_filesystem(request) -> AsyncIterator[Tuple[asyncio.Semaphore, AsyncFS, Dict[str, str]]]:
token = secrets.token_hex(16)

with ThreadPoolExecutor() as thread_pool:
async with RouterAsyncFS(
local_kwargs={'thread_pool': thread_pool},
s3_kwargs={'thread_pool': thread_pool},
) as fs:
file_base = f'/tmp/{token}/'
await fs.mkdir(file_base)

gs_bucket = os.environ['HAIL_TEST_GCS_BUCKET']
gs_base = f'gs://{gs_bucket}/tmp/{token}/'

s3_bucket = os.environ['HAIL_TEST_S3_BUCKET']
s3_base = f's3://{s3_bucket}/tmp/{token}/'

azure_account = os.environ['HAIL_TEST_AZURE_ACCOUNT']
azure_container = os.environ['HAIL_TEST_AZURE_CONTAINER']
azure_base = f'https://{azure_account}.blob.core.windows.net/{azure_container}/tmp/{token}/'

bases = {'file': file_base, 'gs': gs_base, 's3': s3_base, 'azure-https': azure_base}

sema = asyncio.Semaphore(50)
async with sema:
yield (sema, fs, bases)
await bounded_gather2(
sema,
functools.partial(fs.rmtree, sema, file_base),
functools.partial(fs.rmtree, sema, gs_base),
functools.partial(fs.rmtree, sema, s3_base),
functools.partial(fs.rmtree, sema, azure_base),
)

assert not await fs.isdir(file_base)
assert not await fs.isdir(gs_base)
assert not await fs.isdir(s3_base)
assert not await fs.isdir(azure_base)


async def fresh_dir(fs, bases, scheme):
token = secrets.token_hex(16)
dir = f'{bases[scheme]}{token}/'
await fs.mkdir(dir)
return dir


@pytest.fixture(
params=[
'file/file',
Expand Down
38 changes: 38 additions & 0 deletions hail/python/test/hailtop/inter_cloud/test_delete.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from typing import Tuple, Dict
import asyncio
import pytest

from .utils import fresh_dir
from hailtop.aiotools.fs import AsyncFS
from hailtop.aiotools.delete import delete


@pytest.fixture(params=['file', 'gs', 's3', 'azure-https'])
async def test_delete_one_file(request, router_filesystem: Tuple[asyncio.Semaphore, AsyncFS, Dict[str, str]]):
sema, fs, bases = router_filesystem
scheme = request.param
dirname = await fresh_dir(fs, bases, scheme)

url = f'{dirname}/file'
await fs.write(url, b'hello world')
assert await fs.isfile(url)
await delete(iter([url]))
assert not await fs.isfile(url)


@pytest.fixture(params=['file', 'gs', 's3', 'azure-https'])
async def test_delete_folder(request, router_filesystem: Tuple[asyncio.Semaphore, AsyncFS, Dict[str, str]]):
sema, fs, bases = router_filesystem
scheme = request.param
dirname = await fresh_dir(fs, bases, scheme)

url = f'{dirname}/folder'
await asyncio.gather(
fs.write(f'{url}/1', b'hello world'),
fs.write(f'{url}/2', b'hello world'),
fs.write(f'{url}/3', b'hello world'),
fs.write(f'{url}/4', b'hello world'),
)
assert await fs.isdir(url)
await delete(iter([url]))
assert not await fs.isdir(url)
51 changes: 2 additions & 49 deletions hail/python/test/hailtop/inter_cloud/test_diff.py
Original file line number Diff line number Diff line change
@@ -1,59 +1,12 @@
from typing import Tuple, AsyncIterator, Dict
import secrets
import os
from typing import Tuple, Dict
import asyncio
import pytest
import functools

from hailtop.aiotools.fs import AsyncFS
from hailtop.frozendict import frozendict
from hailtop.aiotools.diff import diff, DiffException
from hailtop.utils import bounded_gather2
from hailtop.aiotools.router_fs import RouterAsyncFS


@pytest.fixture(scope='module')
async def router_filesystem() -> AsyncIterator[Tuple[asyncio.Semaphore, AsyncFS, Dict[str, str]]]:
token = secrets.token_hex(16)

async with RouterAsyncFS() as fs:
file_base = f'/tmp/{token}/'
await fs.mkdir(file_base)

gs_bucket = os.environ['HAIL_TEST_GCS_BUCKET']
gs_base = f'gs://{gs_bucket}/tmp/{token}/'

s3_bucket = os.environ['HAIL_TEST_S3_BUCKET']
s3_base = f's3://{s3_bucket}/tmp/{token}/'

azure_account = os.environ['HAIL_TEST_AZURE_ACCOUNT']
azure_container = os.environ['HAIL_TEST_AZURE_CONTAINER']
azure_base = f'https://{azure_account}.blob.core.windows.net/{azure_container}/tmp/{token}/'

bases = {'file': file_base, 'gs': gs_base, 's3': s3_base, 'azure-https': azure_base}

sema = asyncio.Semaphore(50)
async with sema:
yield (sema, fs, bases)
await bounded_gather2(
sema,
functools.partial(fs.rmtree, sema, file_base),
functools.partial(fs.rmtree, sema, gs_base),
functools.partial(fs.rmtree, sema, s3_base),
functools.partial(fs.rmtree, sema, azure_base),
)

assert not await fs.isdir(file_base)
assert not await fs.isdir(gs_base)
assert not await fs.isdir(s3_base)
assert not await fs.isdir(azure_base)


async def fresh_dir(fs, bases, scheme):
token = secrets.token_hex(16)
dir = f'{bases[scheme]}{token}/'
await fs.mkdir(dir)
return dir
from .utils import fresh_dir


@pytest.fixture(
Expand Down
10 changes: 10 additions & 0 deletions hail/python/test/hailtop/inter_cloud/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from typing import Dict
import secrets
from hailtop.aiotools.fs import AsyncFS


async def fresh_dir(fs: AsyncFS, bases: Dict[str, str], scheme: str):
token = secrets.token_hex(16)
dir = f'{bases[scheme]}{token}/'
await fs.mkdir(dir)
return dir

0 comments on commit 3135946

Please sign in to comment.