Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

18 enhance cli to support multiple urls #19

Merged
merged 4 commits into from
Mar 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 13 additions & 10 deletions src/curldl/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ def _parse_arguments(cls) -> argparse.Namespace:
parser.add_argument('-V', '--version', action='version', version=version, help='show program version and exit')

parser.add_argument('-b', '--basedir', default='.', help='base download folder')
parser.add_argument('-o', '--output', help='basedir-relative path to the downloaded file, '
'infer from URL if unspecified')
output_arg = parser.add_argument('-o', '--output', nargs=1, help='basedir-relative path to the '
'downloaded file, infer from URL if unspecified')
parser.add_argument('-s', '--size', type=int, help='expected download file size')

parser.add_argument('-a', '--algo', choices=hash_algos, default='sha256',
Expand All @@ -60,28 +60,31 @@ def _parse_arguments(cls) -> argparse.Namespace:
metavar='LEVEL', help='logging level: ' + ', '.join(log_choices))
parser.add_argument('-v', '--verbose', action='store_true', help='log metadata and headers (implies -l debug)')

parser.add_argument('url', help='URL to download')
parser.add_argument('url', nargs='+', help='URL(s) to download')

args = parser.parse_args()
cls._configure_logger(args)

return cls._infer_arguments(args)
return cls._infer_arguments(output_arg, args)

@classmethod
def _infer_arguments(cls, args: argparse.Namespace) -> argparse.Namespace:
def _infer_arguments(cls, output_arg: argparse.Action, args: argparse.Namespace) -> argparse.Namespace:
"""Infer missing arguments"""
if not args.output:
url_path = urllib.parse.unquote(urllib.parse.urlparse(args.url).path)
args.output = os.path.basename(url_path)
log.info('Saving output to: %s', args.output)
args.output = [os.path.basename(urllib.parse.unquote(urllib.parse.urlparse(url).path)) for url in args.url]
log.info('Saving download(s) to: %s', ', '.join(args.output))

elif len(args.output) != len(args.url):
raise argparse.ArgumentError(output_arg, 'Cannot specify output file when downloading multiple URLs')

return args

def main(self) -> object:
"""Command-line program entry point"""
downloader = Downloader(self.args.basedir, progress=self.args.progress, verbose=self.args.verbose)
downloader.download(self.args.url, rel_path=self.args.output, size=self.args.size,
digests=self.args.digest and {self.args.algo: self.args.digest})
for url, output in zip(self.args.url, self.args.output):
downloader.download(url, rel_path=output, size=self.args.size,
digests=self.args.digest and {self.args.algo: self.args.digest})
return 0

@staticmethod
Expand Down
63 changes: 63 additions & 0 deletions tests/functional/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""CLI entry point functional tests"""
import argparse
import inspect
import logging
import os.path
Expand All @@ -10,6 +11,7 @@
import pytest
import toml
from _pytest.capture import CaptureFixture
from _pytest.logging import LogCaptureFixture
from pytest_httpserver import HTTPServer
from pytest_mock import MockerFixture

Expand Down Expand Up @@ -107,3 +109,64 @@ def test_download_file(mocker: MockerFixture, tmp_path: pathlib.Path, httpserver
assert file_path.is_file()
with open(file_path, 'rb') as file:
assert file.read() == file_data


@pytest.mark.parametrize('specify_output_size', [False, True])
@pytest.mark.parametrize('specify_output_digest', [False, True])
def test_download_multiple_files(mocker: MockerFixture, caplog: LogCaptureFixture,
tmp_path: pathlib.Path, httpserver: HTTPServer,
specify_output_size: bool, specify_output_digest: bool) -> None:
"""Verify that multiple file arguments are downloaded and verification functions correctly,
(including that digest verification is attempted on all files)"""
caplog.set_level(logging.DEBUG)

url_count = 5
file_datas = [bytes(chr(ord('x') + idx), 'ascii') * 128 for idx in range(url_count)]
file_names = [f'file{idx}.txt' for idx in range(url_count)]
file_digest = '150fa3fbdc899bd0b8f95a9fb6027f564d953762'
should_succeed = not specify_output_digest

arguments = ['-b', str(tmp_path), '-l', 'debug', '-v', '-p']
if specify_output_size:
arguments += ['-s', str(128)]
if specify_output_digest:
arguments += ['-a', 'sha1', '-d', file_digest]

for idx in range(url_count):
httpserver.expect_ordered_request(
url_path := f'/loc1/loc2/{file_names[idx]};a=b', query_string={'c': 'd'}).respond_with_data(file_datas[idx])
arguments += [httpserver.url_for(url_path + '?c=d#id')]

patch_logger_config(mocker, 'debug')
patch_system_environment(mocker, arguments)

if not should_succeed:
with pytest.raises(ValueError):
cli.main()
else:
status_code = cli.main()
assert status_code == 0

httpserver.check()
for idx in range(url_count):
file_path = tmp_path / file_names[idx]
assert file_path.exists() == should_succeed or idx == 0
if file_path.exists():
with open(file_path, 'rb') as file:
assert file.read() == file_datas[idx]


def test_multiple_downloads_with_output_file(mocker: MockerFixture, tmp_path: pathlib.Path,
httpserver: HTTPServer) -> None:
"""Verify that specifying output with multiple files results in exception and now download is attempted"""
arguments = ['-b', str(tmp_path), '-o', 'file.txt', '-l', 'critical',
httpserver.url_for('/file1.txt'), httpserver.url_for('/file2.txt')]

patch_logger_config(mocker, 'critical')
patch_system_environment(mocker, arguments)

with pytest.raises(argparse.ArgumentError):
cli.main()

httpserver.check()
assert not os.listdir(tmp_path)
26 changes: 26 additions & 0 deletions tests/unit/test_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,9 +205,11 @@ def download_and_possibly_verify() -> None:
download_and_possibly_verify()
httpserver.check()

partial_timestamp = os.stat(tmp_path / 'file.bin').st_mtime
assert read_file_content(tmp_path / 'file.bin') == file_data
os.rename(tmp_path / 'file.bin', tmp_path / 'file.bin.part')
os.truncate(tmp_path / 'file.bin.part', part_size)
os.utime(tmp_path / 'file.bin.part', times=(partial_timestamp, partial_timestamp))

# Request #2 on partial file: generally fails with 503, which causes pycurl.error due to resume failure
# Rolls back to existing partial file if verification data is present
Expand Down Expand Up @@ -302,3 +304,27 @@ def test_aborted_download(tmp_path: pathlib.Path, caplog: LogCaptureFixture) ->
downloader.download(url, 'some-file.txt')

assert ex_info.value.args[0] == pycurl.E_GOT_NOTHING


@pytest.mark.parametrize('size', [100, 5000])
@pytest.mark.parametrize('specify_size', [False, True])
@pytest.mark.parametrize('always_keep_part_size', [0, 50, 2499, 2501, 8000])
def test_partial_download_keep(tmp_path: pathlib.Path, httpserver: HTTPServer, caplog: LogCaptureFixture,
size: int, specify_size: bool, always_keep_part_size: int) -> None:
"""Verify policy of keeping partial download in absence of verification data"""
caplog.set_level(logging.DEBUG)
with open(tmp_path / 'file.txt.part', 'wb') as part_file:
part_file.write(b'x' * (size // 2))

httpserver.expect_oneshot_request('/file.txt').respond_with_handler(
make_range_response_handler('/file.txt', b'y' * size))

downloader = curldl.Downloader(basedir=tmp_path, verbose=True, retry_attempts=0, min_part_bytes=0,
min_always_keep_part_bytes=always_keep_part_size)
downloader.download(httpserver.url_for('/file.txt'), 'file.txt', size=(size if specify_size else None))
httpserver.check()

assert not (tmp_path / 'file.txt.part').exists()
assert read_file_content(tmp_path / 'file.txt') == ((b'x' * (size // 2) + b'y' * (size // 2))
if specify_size or always_keep_part_size <= size // 2
else b'y' * size)