Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dx-launcher: run consolidate_run_tarballs as a separate top-level job #858

Merged
merged 13 commits into from
Jul 24, 2018
50 changes: 50 additions & 0 deletions pipes/WDL/dx-launcher/consolidate_run_tarballs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# DNAnexus applet to build with dx-yml-build which can be found here:
# https://gist.githubusercontent.com/mlin/3cce81f54a640c3f62a2725acbc98283/raw/e071dfe1989c31f6267334106115620770e6d21c/dx-yml-build
# To build, save this file as dxapp.yml and run that script alongside.
name: consolidate_run_tarballs
title: consolidate_run_tarballs
dxapi: 1.0.0
version: 0.0.1
description: Launches demux/demux-plus workflow on each lane of a sequencing run, given the incremental uploader sentinel record.
inputSpec:
- name: upload_sentinel_record
class: record
type: "UploadSentinel"
help: Sentinel record from incremental upload tool. The RunInfo.xml and run tarballs must also reside in the current project.
- name: run_id
class: string
help: Run ID for use in folder and file names
outputSpec:
- name: consolidated_run_tarball
class: file
access:
network: ["*"]
runSpec:
systemRequirements:
main:
instanceType: mem1_ssd1_x2
distribution: Ubuntu
release: "16.04"
execDepends:
- name: pigz
interpreter: bash
code: |
#!/bin/bash

main() {
set -ex -o pipefail

# sequentially unpack the run tarballs
dx get_details "$upload_sentinel_record" | jq -r .tar_file_ids[] > tar_file_ids
mkdir run/
while read tar_file_id; do
dx cat "$tar_file_id" | pigz -dc | tar xf - -C run/ --owner root --group root --no-same-owner
done < tar_file_ids
du -sh run/

# tar the consolidated directory and upload
# TODO: consider zstd/lz4
dx mkdir -p "$run_id/runs"
tar_id=$(tar c -C run/ . | pigz -c | dx upload --brief -p --destination $(printf "%s/runs/%s.tar.gz" "$run_id" "$run_id") -)
dx-jobutil-add-output consolidated_run_tarball "$tar_id"
}
34 changes: 14 additions & 20 deletions pipes/WDL/dx-launcher/demux_launcher.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ inputSpec:
class: string
help: DNAnexus ID (workflow-xxxx) of the demux/demux-plus workflow to launch on each lane of the run. The workflow and all its parts and dependencies must reside in the current project.
default: DEFAULT_DEMUX_WORKFLOW_ID
- name: consolidate_run_tarballs_applet_id
class: string
help: DNAnexus ID (applet-xxxx) of the consolidate_run_tarballs applet to use when needed; must reside in the current project.
default: DEFAULT_CONSOLIDATE_RUN_TARBALLS_APPLET_ID
- name: folder
class: string
default: /
Expand Down Expand Up @@ -133,8 +137,16 @@ runSpec:
dx get_details "$upload_sentinel_record" | jq -r .tar_file_ids[] > tar_file_ids
if [ "$(cat tar_file_ids | wc -l)" -gt 1 ]; then
# if there are multiple run tarballs, launch a subjob to consolidate them
subjob=$(dx-jobutil-new-job consolidate_tars --instance-type="$tar_consolidation_instance_size" -i upload_sentinel_record="$upload_sentinel_record" -i folder="$folder" -i run_id="$run_id")
run_tarball="$subjob:run_tarball"
runcmd="dx run $consolidate_run_tarballs_applet_id --instance-type=$tar_consolidation_instance_size -i upload_sentinel_record=$(dx-jobutil-parse-link "$upload_sentinel_record") -i run_id=$run_id --folder $folder -y --brief"
echo "$runcmd"
set +x
if [ -n "$api_token" ]; then
# add API token to run command without copying it into the job log
runcmd="unset DX_JOB_ID; $runcmd --auth-token $(cat api_token | tr -d '\n')"
fi
subjob=$(bash -e -o pipefail -c "$runcmd")
set -x
run_tarball="$subjob:consolidated_run_tarball"
else
run_tarball=$(cat tar_file_ids | tr -d '\n')
fi
Expand Down Expand Up @@ -172,24 +184,6 @@ runSpec:
fi
}

consolidate_tars() {
set -ex -o pipefail

# sequentially unpack the run tarballs
dx get_details "$upload_sentinel_record" | jq -r .tar_file_ids[] > tar_file_ids
mkdir run/
while read tar_file_id; do
dx cat "$tar_file_id" | pigz -dc | tar xf - -C run/ --owner root --group root --no-same-owner
done < tar_file_ids
du -sh run/

# tar the consolidated directory and upload
# TODO: consider zstd/lz4
dx mkdir -p "$folder/$run_id"
tar_id=$(tar c -C run/ . | pigz -c | dx upload --brief -p --destination $(printf "%s/%s/%s.tar.gz" "$folder" "$run_id" "$run_id") -)
dx-jobutil-add-output run_tarball "$tar_id"
}

propagate_outputs() {
set -ex -o pipefail
dx find data --brief --class file --project $output_project --folder "$folder/$run_id/reads" | cut -d ':' -f2 | xargs -t -i dx-jobutil-add-output demux_outputs --class array:file "{}"
Expand Down
6 changes: 5 additions & 1 deletion travis/build-dx.sh
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,11 @@ done
# demux_plus workflow ID as a default input
demux_plus_workflow_id=$(grep demux_plus $COMPILE_SUCCESS | cut -f 2)
pushd pipes/WDL/dx-launcher
sed "s/DEFAULT_DEMUX_WORKFLOW_ID/$demux_plus_workflow_id/" demux_launcher.yml > dxapp.yml
cp consolidate_run_tarballs.yml dxapp.yml
dx_id=$(./dx-yml-build -a --destination /build/$VERSION/ | jq -r ".id")
echo -e "consolidate_run_tarballs\t$dx_id" >> $COMPILE_SUCCESS
sed "s/DEFAULT_DEMUX_WORKFLOW_ID/$demux_plus_workflow_id/" demux_launcher.yml \
| sed "s/DEFAULT_CONSOLIDATE_RUN_TARBALLS_APPLET_ID/$dx_id/" > dxapp.yml
dx_id=$(./dx-yml-build -a --destination /build/$VERSION/ | jq -r ".id")
popd
echo -e "demux_launcher\t$dx_id" >> $COMPILE_SUCCESS
Expand Down