Skip to content

Commit

Permalink
wip! build: Add --no-upload option
Browse files Browse the repository at this point in the history
XXX FIXME

Resolves: <#219>
  • Loading branch information
tsibley committed May 15, 2024
1 parent 14a392e commit ab8b6d1
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 3 deletions.
24 changes: 24 additions & 0 deletions nextstrain/cli/command/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,30 @@ def register_parser(subparser):
dest = "download",
action = "store_false")

# XXX FIXME: --no-upload: discuss corresponding download behaviour; use
# of --download '!...' (or making --no-download accept patterns too?).
parser.add_argument(
"--no-upload",
metavar = "<pattern>",
help = dedent(f"""\
Exclude files matching ``<pattern>`` from being uploaded as part of
the remote build. Shell-style advanced globbing is supported, but
be sure to escape wildcards or quote the whole pattern so your
shell doesn't expand them. May be passed more than once.
Currently only supported when also using :option:`--aws-batch`.
Default is to upload the entire pathogen build directory (except
for some ancillary files which are always excluded).
Besides basic glob features like single-part wildcards (``*``),
character classes (``[…]``), and brace expansion (``{{…, …}}``),
several advanced globbing features are also supported: multi-part
wildcards (``**``), extended globbing (``@(…)``, ``+(…)``, etc.),
and negation (``!…``).
{SKIP_AUTO_DEFAULT_IN_HELP}
"""),
action = "append")

# A --logs option doesn't make much sense right now for most of our
# runtimes, but I can see how it might in the future. So we're ready if
# that future comes to pass, set up --no-logs as if there's a --logs option
Expand Down
2 changes: 1 addition & 1 deletion nextstrain/cli/runner/aws_batch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ def run(opts, argv, working_volume = None, extra_env: Env = {}, cpus: int = None
print_stage("Uploading %s to S3" % local_workdir)

bucket = s3.bucket(opts.s3_bucket)
remote_workdir = s3.upload_workdir(local_workdir, bucket, run_id)
remote_workdir = s3.upload_workdir(local_workdir, bucket, run_id, opts.no_upload)

print("uploaded:", s3.object_url(remote_workdir))

Expand Down
16 changes: 14 additions & 2 deletions nextstrain/cli/runner/aws_batch/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,20 @@ def object_from_url(s3url: str) -> S3Object:
return bucket(url.netloc).Object(key)


def upload_workdir(workdir: Path, bucket: S3Bucket, run_id: str) -> S3Object:
def upload_workdir(workdir: Path, bucket: S3Bucket, run_id: str, patterns: List[str] = None) -> S3Object:
"""
Upload a ZIP archive of the local *workdir* to the remote S3 *bucket* for
the given *run_id*.
An optional list of *patterns* (shell-style advanced globs) can be passed
to selectively exclude part of the local *workdir* from being uploaded.
Returns the S3.Object instance of the uploaded archive.
"""

remote_workdir = bucket.Object(run_id + ".zip")

excluded = path_matcher([
always_excluded = path_matcher([
# Jobs don't use .git, so save the bandwidth/space/time. It may also
# contain information in history that shouldn't be uploaded.
".git/",
Expand All @@ -65,6 +68,13 @@ def upload_workdir(workdir: Path, bucket: S3Bucket, run_id: str) -> S3Object:
"__pycache__/",
])

if patterns:
deselected = glob_matcher(patterns)
else:
deselected = lambda path: False

excluded = lambda path: always_excluded(path) or deselected(path)

# Stream writes directly to the remote ZIP file
remote_file: Any
with fsspec.open(object_url(remote_workdir), "wb", auto_mkdir = False) as remote_file:
Expand All @@ -86,6 +96,8 @@ def download_workdir(remote_workdir: S3Object, workdir: Path, patterns: List[str
to selectively download only part of the remote workdir.
"""

# XXX FIXME: --no-upload: how does this interact with downloads?

excluded = path_matcher([
# Jobs don't use .git and it may also contain information that
# shouldn't be uploaded.
Expand Down

0 comments on commit ab8b6d1

Please sign in to comment.