Skip to content

Commit

Permalink
dx-launcher: run consolidate_run_tarballs as a separate top-level job (
Browse files Browse the repository at this point in the history
…#858)

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* place consolidated output within "runs" subdir of main run directory

to be consistent with prior demux workflow output, place consolidated tarball within "runs" subdir of main run directory
  • Loading branch information
mlin authored and tomkinsc committed Jul 24, 2018
1 parent 2d09ab6 commit dbeda8d
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 21 deletions.
50 changes: 50 additions & 0 deletions pipes/WDL/dx-launcher/consolidate_run_tarballs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# DNAnexus applet to build with dx-yml-build which can be found here:
# https://gist.githubusercontent.com/mlin/3cce81f54a640c3f62a2725acbc98283/raw/e071dfe1989c31f6267334106115620770e6d21c/dx-yml-build
# To build, save this file as dxapp.yml and run that script alongside.
name: consolidate_run_tarballs
title: consolidate_run_tarballs
dxapi: 1.0.0
version: 0.0.1
description: Launches demux/demux-plus workflow on each lane of a sequencing run, given the incremental uploader sentinel record.
inputSpec:
- name: upload_sentinel_record
class: record
type: "UploadSentinel"
help: Sentinel record from incremental upload tool. The RunInfo.xml and run tarballs must also reside in the current project.
- name: run_id
class: string
help: Run ID for use in folder and file names
outputSpec:
- name: consolidated_run_tarball
class: file
access:
network: ["*"]
runSpec:
systemRequirements:
main:
instanceType: mem1_ssd1_x2
distribution: Ubuntu
release: "16.04"
execDepends:
- name: pigz
interpreter: bash
code: |
#!/bin/bash
main() {
set -ex -o pipefail
# sequentially unpack the run tarballs
dx get_details "$upload_sentinel_record" | jq -r .tar_file_ids[] > tar_file_ids
mkdir run/
while read tar_file_id; do
dx cat "$tar_file_id" | pigz -dc | tar xf - -C run/ --owner root --group root --no-same-owner
done < tar_file_ids
du -sh run/
# tar the consolidated directory and upload
# TODO: consider zstd/lz4
dx mkdir -p "$run_id/runs"
tar_id=$(tar c -C run/ . | pigz -c | dx upload --brief -p --destination $(printf "%s/runs/%s.tar.gz" "$run_id" "$run_id") -)
dx-jobutil-add-output consolidated_run_tarball "$tar_id"
}
34 changes: 14 additions & 20 deletions pipes/WDL/dx-launcher/demux_launcher.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ inputSpec:
class: string
help: DNAnexus ID (workflow-xxxx) of the demux/demux-plus workflow to launch on each lane of the run. The workflow and all its parts and dependencies must reside in the current project.
default: DEFAULT_DEMUX_WORKFLOW_ID
- name: consolidate_run_tarballs_applet_id
class: string
help: DNAnexus ID (applet-xxxx) of the consolidate_run_tarballs applet to use when needed; must reside in the current project.
default: DEFAULT_CONSOLIDATE_RUN_TARBALLS_APPLET_ID
- name: folder
class: string
default: /
Expand Down Expand Up @@ -133,8 +137,16 @@ runSpec:
dx get_details "$upload_sentinel_record" | jq -r .tar_file_ids[] > tar_file_ids
if [ "$(cat tar_file_ids | wc -l)" -gt 1 ]; then
# if there are multiple run tarballs, launch a subjob to consolidate them
subjob=$(dx-jobutil-new-job consolidate_tars --instance-type="$tar_consolidation_instance_size" -i upload_sentinel_record="$upload_sentinel_record" -i folder="$folder" -i run_id="$run_id")
run_tarball="$subjob:run_tarball"
runcmd="dx run $consolidate_run_tarballs_applet_id --instance-type=$tar_consolidation_instance_size -i upload_sentinel_record=$(dx-jobutil-parse-link "$upload_sentinel_record") -i run_id=$run_id --folder $folder -y --brief"
echo "$runcmd"
set +x
if [ -n "$api_token" ]; then
# add API token to run command without copying it into the job log
runcmd="unset DX_JOB_ID; $runcmd --auth-token $(cat api_token | tr -d '\n')"
fi
subjob=$(bash -e -o pipefail -c "$runcmd")
set -x
run_tarball="$subjob:consolidated_run_tarball"
else
run_tarball=$(cat tar_file_ids | tr -d '\n')
fi
Expand Down Expand Up @@ -172,24 +184,6 @@ runSpec:
fi
}
consolidate_tars() {
set -ex -o pipefail
# sequentially unpack the run tarballs
dx get_details "$upload_sentinel_record" | jq -r .tar_file_ids[] > tar_file_ids
mkdir run/
while read tar_file_id; do
dx cat "$tar_file_id" | pigz -dc | tar xf - -C run/ --owner root --group root --no-same-owner
done < tar_file_ids
du -sh run/
# tar the consolidated directory and upload
# TODO: consider zstd/lz4
dx mkdir -p "$folder/$run_id"
tar_id=$(tar c -C run/ . | pigz -c | dx upload --brief -p --destination $(printf "%s/%s/%s.tar.gz" "$folder" "$run_id" "$run_id") -)
dx-jobutil-add-output run_tarball "$tar_id"
}
propagate_outputs() {
set -ex -o pipefail
dx find data --brief --class file --project $output_project --folder "$folder/$run_id/reads" | cut -d ':' -f2 | xargs -t -i dx-jobutil-add-output demux_outputs --class array:file "{}"
Expand Down
6 changes: 5 additions & 1 deletion travis/build-dx.sh
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,11 @@ done
# demux_plus workflow ID as a default input
demux_plus_workflow_id=$(grep demux_plus $COMPILE_SUCCESS | cut -f 2)
pushd pipes/WDL/dx-launcher
sed "s/DEFAULT_DEMUX_WORKFLOW_ID/$demux_plus_workflow_id/" demux_launcher.yml > dxapp.yml
cp consolidate_run_tarballs.yml dxapp.yml
dx_id=$(./dx-yml-build -a --destination /build/$VERSION/ | jq -r ".id")
echo -e "consolidate_run_tarballs\t$dx_id" >> $COMPILE_SUCCESS
sed "s/DEFAULT_DEMUX_WORKFLOW_ID/$demux_plus_workflow_id/" demux_launcher.yml \
| sed "s/DEFAULT_CONSOLIDATE_RUN_TARBALLS_APPLET_ID/$dx_id/" > dxapp.yml
dx_id=$(./dx-yml-build -a --destination /build/$VERSION/ | jq -r ".id")
popd
echo -e "demux_launcher\t$dx_id" >> $COMPILE_SUCCESS
Expand Down

0 comments on commit dbeda8d

Please sign in to comment.