Skip to content

Commit

Permalink
18 enhance cli to support multiple urls (#19)
Browse files Browse the repository at this point in the history
  • Loading branch information
noexec authored Mar 22, 2023
2 parents 0942a20 + fbf139a commit 63416dd
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 10 deletions.
23 changes: 13 additions & 10 deletions src/curldl/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ def _parse_arguments(cls) -> argparse.Namespace:
parser.add_argument('-V', '--version', action='version', version=version, help='show program version and exit')

parser.add_argument('-b', '--basedir', default='.', help='base download folder')
parser.add_argument('-o', '--output', help='basedir-relative path to the downloaded file, '
'infer from URL if unspecified')
output_arg = parser.add_argument('-o', '--output', nargs=1, help='basedir-relative path to the '
'downloaded file, infer from URL if unspecified')
parser.add_argument('-s', '--size', type=int, help='expected download file size')

parser.add_argument('-a', '--algo', choices=hash_algos, default='sha256',
Expand All @@ -60,28 +60,31 @@ def _parse_arguments(cls) -> argparse.Namespace:
metavar='LEVEL', help='logging level: ' + ', '.join(log_choices))
parser.add_argument('-v', '--verbose', action='store_true', help='log metadata and headers (implies -l debug)')

parser.add_argument('url', help='URL to download')
parser.add_argument('url', nargs='+', help='URL(s) to download')

args = parser.parse_args()
cls._configure_logger(args)

return cls._infer_arguments(args)
return cls._infer_arguments(output_arg, args)

@classmethod
def _infer_arguments(cls, args: argparse.Namespace) -> argparse.Namespace:
def _infer_arguments(cls, output_arg: argparse.Action, args: argparse.Namespace) -> argparse.Namespace:
"""Infer missing arguments"""
if not args.output:
url_path = urllib.parse.unquote(urllib.parse.urlparse(args.url).path)
args.output = os.path.basename(url_path)
log.info('Saving output to: %s', args.output)
args.output = [os.path.basename(urllib.parse.unquote(urllib.parse.urlparse(url).path)) for url in args.url]
log.info('Saving download(s) to: %s', ', '.join(args.output))

elif len(args.output) != len(args.url):
raise argparse.ArgumentError(output_arg, 'Cannot specify output file when downloading multiple URLs')

return args

def main(self) -> object:
"""Command-line program entry point"""
downloader = Downloader(self.args.basedir, progress=self.args.progress, verbose=self.args.verbose)
downloader.download(self.args.url, rel_path=self.args.output, size=self.args.size,
digests=self.args.digest and {self.args.algo: self.args.digest})
for url, output in zip(self.args.url, self.args.output):
downloader.download(url, rel_path=output, size=self.args.size,
digests=self.args.digest and {self.args.algo: self.args.digest})
return 0

@staticmethod
Expand Down
63 changes: 63 additions & 0 deletions tests/functional/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""CLI entry point functional tests"""
import argparse
import inspect
import logging
import os.path
Expand All @@ -10,6 +11,7 @@
import pytest
import toml
from _pytest.capture import CaptureFixture
from _pytest.logging import LogCaptureFixture
from pytest_httpserver import HTTPServer
from pytest_mock import MockerFixture

Expand Down Expand Up @@ -107,3 +109,64 @@ def test_download_file(mocker: MockerFixture, tmp_path: pathlib.Path, httpserver
assert file_path.is_file()
with open(file_path, 'rb') as file:
assert file.read() == file_data


@pytest.mark.parametrize('specify_output_size', [False, True])
@pytest.mark.parametrize('specify_output_digest', [False, True])
def test_download_multiple_files(mocker: MockerFixture, caplog: LogCaptureFixture,
tmp_path: pathlib.Path, httpserver: HTTPServer,
specify_output_size: bool, specify_output_digest: bool) -> None:
"""Verify that multiple file arguments are downloaded and verification functions correctly,
(including that digest verification is attempted on all files)"""
caplog.set_level(logging.DEBUG)

url_count = 5
file_datas = [bytes(chr(ord('x') + idx), 'ascii') * 128 for idx in range(url_count)]
file_names = [f'file{idx}.txt' for idx in range(url_count)]
file_digest = '150fa3fbdc899bd0b8f95a9fb6027f564d953762'
should_succeed = not specify_output_digest

arguments = ['-b', str(tmp_path), '-l', 'debug', '-v', '-p']
if specify_output_size:
arguments += ['-s', str(128)]
if specify_output_digest:
arguments += ['-a', 'sha1', '-d', file_digest]

for idx in range(url_count):
httpserver.expect_ordered_request(
url_path := f'/loc1/loc2/{file_names[idx]};a=b', query_string={'c': 'd'}).respond_with_data(file_datas[idx])
arguments += [httpserver.url_for(url_path + '?c=d#id')]

patch_logger_config(mocker, 'debug')
patch_system_environment(mocker, arguments)

if not should_succeed:
with pytest.raises(ValueError):
cli.main()
else:
status_code = cli.main()
assert status_code == 0

httpserver.check()
for idx in range(url_count):
file_path = tmp_path / file_names[idx]
assert file_path.exists() == should_succeed or idx == 0
if file_path.exists():
with open(file_path, 'rb') as file:
assert file.read() == file_datas[idx]


def test_multiple_downloads_with_output_file(mocker: MockerFixture, tmp_path: pathlib.Path,
httpserver: HTTPServer) -> None:
"""Verify that specifying output with multiple files results in exception and now download is attempted"""
arguments = ['-b', str(tmp_path), '-o', 'file.txt', '-l', 'critical',
httpserver.url_for('/file1.txt'), httpserver.url_for('/file2.txt')]

patch_logger_config(mocker, 'critical')
patch_system_environment(mocker, arguments)

with pytest.raises(argparse.ArgumentError):
cli.main()

httpserver.check()
assert not os.listdir(tmp_path)
26 changes: 26 additions & 0 deletions tests/unit/test_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,9 +205,11 @@ def download_and_possibly_verify() -> None:
download_and_possibly_verify()
httpserver.check()

partial_timestamp = os.stat(tmp_path / 'file.bin').st_mtime
assert read_file_content(tmp_path / 'file.bin') == file_data
os.rename(tmp_path / 'file.bin', tmp_path / 'file.bin.part')
os.truncate(tmp_path / 'file.bin.part', part_size)
os.utime(tmp_path / 'file.bin.part', times=(partial_timestamp, partial_timestamp))

# Request #2 on partial file: generally fails with 503, which causes pycurl.error due to resume failure
# Rolls back to existing partial file if verification data is present
Expand Down Expand Up @@ -302,3 +304,27 @@ def test_aborted_download(tmp_path: pathlib.Path, caplog: LogCaptureFixture) ->
downloader.download(url, 'some-file.txt')

assert ex_info.value.args[0] == pycurl.E_GOT_NOTHING


@pytest.mark.parametrize('size', [100, 5000])
@pytest.mark.parametrize('specify_size', [False, True])
@pytest.mark.parametrize('always_keep_part_size', [0, 50, 2499, 2501, 8000])
def test_partial_download_keep(tmp_path: pathlib.Path, httpserver: HTTPServer, caplog: LogCaptureFixture,
size: int, specify_size: bool, always_keep_part_size: int) -> None:
"""Verify policy of keeping partial download in absence of verification data"""
caplog.set_level(logging.DEBUG)
with open(tmp_path / 'file.txt.part', 'wb') as part_file:
part_file.write(b'x' * (size // 2))

httpserver.expect_oneshot_request('/file.txt').respond_with_handler(
make_range_response_handler('/file.txt', b'y' * size))

downloader = curldl.Downloader(basedir=tmp_path, verbose=True, retry_attempts=0, min_part_bytes=0,
min_always_keep_part_bytes=always_keep_part_size)
downloader.download(httpserver.url_for('/file.txt'), 'file.txt', size=(size if specify_size else None))
httpserver.check()

assert not (tmp_path / 'file.txt.part').exists()
assert read_file_content(tmp_path / 'file.txt') == ((b'x' * (size // 2) + b'y' * (size // 2))
if specify_size or always_keep_part_size <= size // 2
else b'y' * size)

0 comments on commit 63416dd

Please sign in to comment.