diff --git a/CHANGELOG.md b/CHANGELOG.md index 3e5a5af..ffa2f1e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,14 +2,14 @@ ## MIPTools (development version) -- Automatically build and deploy container using Github Actions (@arisp99, #11). -- Fix build failure due to dependency changes (#7). +### App Changes -### Maintenance - -- Remove duplicated files. -- Improve bash errors. -- Make strings human readable (@arisp99, #5). +- New `bs_download` app replaces the `download` app. The new app improves the + method to download data from the Illumina BaseSpace Sequence Hub by using + command line tools (@arisp99, #13). +- The `demux` app no longer requires both a run directory and a output + directory. These directories have been combined so that fastq files are + output to the run directory. ### Documentation Overhaul @@ -17,6 +17,17 @@ - Improve clarity of README and add additional instructions on downloading or building the container. +### Bug Fixes + +- Fix build failure due to dependency changes (#7). + +### Maintenance + +- Automatically build and deploy container using Github Actions (@arisp99, #11). +- Remove duplicated files. +- Improve bash errors. +- Make strings human readable (@arisp99, #5). + ## MIPTools 1.0.0 - First major release. diff --git a/MIPTools.def b/MIPTools.def index d3810f3..fe1e200 100644 --- a/MIPTools.def +++ b/MIPTools.def @@ -186,6 +186,10 @@ From: amd64/ubuntu:20.04 # install parasight scp /opt/programs/parasight_v7.6/parasight.pl /opt/bin/parasight76.pl + # install basespace cli + BS_PATH="https://launch.basespace.illumina.com/CLI/latest/amd64-linux/bs" + wget $BS_PATH -O /opt/bin/bs + # add executable flag to executables chmod -R +xr /usr/bin chmod -R +xr /opt/bin @@ -334,88 +338,122 @@ From: amd64/ubuntu:20.04 . /opt/analysis/wrangle.sh ################################################################## -## Download App ## +## Basespace Download App ## ################################################################## -%apprun download - # Parse options +%apprun bs_download + # Exit if something fails set -e - set -u - while getopts r: opt; do - case $opt in - r) run_id=$OPTARG;; - ?) echo "Usage: singularity run --app download \\" - echo " -B /path_to_output_dir:/opt/analysis \\" - echo " -B /path_to_base_resources:/opt/resources \\" - echo " mycontainer.sif -r my_Illumina_run_ID" - echo "An 'access_token.txt' file with a valid access token is " - echo "required. It must be present in base_resources directory." - echo "A data directory where the data will be downloaded to" - echo "must be mounted to /opt/data." - exit 1;; - esac - done - # Print to CLI - echo "Downloading NextSeq run $run_id from BaseSpace." - echo "Depending on the data size, this can take very long (up to 10 h)" - echo "It is recommended to run this app in a screen (GNU screen)." - echo "A message indicating the end of download will be printed when done." - echo "Check nohup.out file in your output directory for the download log." + # Set default values for paths + output_path="/opt/analysis" + config_path="/opt/resources/basespace.cfg" + + help() { + echo "Download data from the Illumina BaseSpace Sequence Hub." + echo "" + echo "Usage:" + echo " singularity run [options] --app bs_download "\ + "[app_options]" + echo "" + echo "Options:" + echo " See 'singularity run'." + echo "" + echo "App Options:" + echo " -i Required. The run ID of the data to download." + echo " -o The path to the output directory." + echo " Default: '/opt/analysis'." + echo " -c The path to the authentication credentials file." + echo " This file is created by 'bs auth'. For additional" + echo " information see the help page for that command." + echo " Default: '/opt/resources/basespace.cfg'." + echo " -h Print the help page." + echo "" + echo "Examples:" + echo " # Set paths" + echo " $ resource_dir=/bin/MIPTools/base_resources" + echo " $ run_dir=/work/usr/example" + echo "" + echo " # Run app" + echo " $ singularity run \\" + echo " -B \$resource_dir:/opt/resources"\ + "-B \$run_dir:/opt/analysis \\" + echo " --app bs_download -i " + } - # cd and run app - # Use nohup to make command keep running even if get hangup signal - cd /opt/analysis - nohup python /opt/bin/BaseSpaceRunDownloader_v2.py \ - -r $run_id -a "$(cat /opt/resources/access_token.txt)" + # Argument handling using getopts + # Could alternatively use manual processing. This would allow for long + # form inputs. + while getopts "i:o:c:h" opt; do + case "${opt}" in + i) run_id=${OPTARG} ;; + o) ouput_path=${OPTARG} ;; + c) config_path=${OPTARG} ;; + h) help + exit 1 ;; + *) help + exit 1 ;; + esac + done + + # Ensure run_id is specified + if [ ! $run_id ]; then + echo "Argument -i must be provided" + echo "$usage" >&2 + exit 1 + fi + + # Read data from config file + # Remove whitespace from each line and export each line as a variable + export BASESPACE_API_SERVER=$(sed "1q;d" ${config_path} | sed "s/.*=.//g") + export BASESPACE_ACCESS_TOKEN=$(sed "2q;d" ${config_path} | sed "s/.*=.//g") - # Print to CLI - echo "Download finished." + # Download data + bs download run -i ${run_id} -o ${output_path} ################################################################# ## Demux App ## ################################################################# %apprun demux - # Parse options - set -e - set -u - while getopts s:p: opt; do - case $opt in - s) sample_list=$OPTARG;; - p) platform=$OPTARG;; + # Exit if something fails or if have unset object + set -eu + + while getopts "s:p:" opt; do + case ${opt} in + s) sample_list=${OPTARG} ;; + p) platform=${OPTARG} ;; ?) echo "Usage: singularity run --app demux \\" - echo " -B /path_to_run_dir:/opt/data \\" echo " -B /path_to_output_dir:/opt/analysis \\" echo " -B /path_to_base_resources:/opt/resources \\" echo " mycontainer.sif -s sample_list_file \\" echo " -p sequencing_platform (nextseq or miseq) \\" echo "The sample list file must be present in the output" echo "directory mounted to /opt/analysis." - exit 1;; + exit 1 ;; esac done # Define variables cd /opt/src template_dir="/opt/resources/templates/sample_sheet_templates/" - platform_template="$platform"_sample_sheet_template.csv - template="$template_dir$platform_template" + platform_template="${platform}"_sample_sheet_template.csv + template="${template_dir}${platform_template}" bc_dict="/opt/resources/barcode_dict.json" output_dir="/opt/analysis" - sample_list="/opt/analysis/$sample_list" + sample_list="/opt/analysis/${sample_list}" # Create a sample sheet for demultiplexing python -c 'import mip_functions as mip; mip.generate_sample_sheet( "'"$sample_list"'", "'"$bc_dict"'", "'"$template"'", "'"$platform"'", "'"$output_dir"'")' - # cd to where bcl files are. - cd /opt/data + # cd to where bcl files are + cd /opt/analysis # Create a fastq directory for saving fastqs mkdir -p /opt/analysis/fastq # Copy sample list to fastq directory - scp $sample_list /opt/analysis/fastq/ + scp ${sample_list} /opt/analysis/fastq/ # Increase limit of open number of files. ulimit -Sn $(ulimit -Hn) diff --git a/base_resources/basespace.cfg b/base_resources/basespace.cfg new file mode 100644 index 0000000..34fa528 --- /dev/null +++ b/base_resources/basespace.cfg @@ -0,0 +1,2 @@ +apiServer = https://api.basespace.illumina.com +accessToken = diff --git a/bin/BaseSpaceRunDownloader_v2.py b/bin/BaseSpaceRunDownloader_v2.py deleted file mode 100644 index 9709a53..0000000 --- a/bin/BaseSpaceRunDownloader_v2.py +++ /dev/null @@ -1,91 +0,0 @@ -from urllib.request import Request, urlopen, URLError -import json -import math -import sys -import os -import socket -import optparse - - -def arg_parser(): - parser = optparse.OptionParser() - parser.add_option('-r', dest='runid', help='Run ID: required') - parser.add_option('-a', dest='accesstoken', help='Access Token: required') - (options, args) = parser.parse_args() - try: - if options.runid is None: - raise Exception - if options.accesstoken is None: - raise Exception - except Exception: - print("Usage: BaseSpaceRunDownloader_vN.py -r " - " -a ") - sys.exit() - return options - - -def restrequest(rawrequest): - request = Request(rawrequest) - try: - response = urlopen(request) - json_string = response.read() - json_obj = json.loads(json_string) - except URLError as e: - print('Got an error code:', e) - sys.exit() - return json_obj - - -def downloadrestrequest(rawrequest, path): - dirname = RunID + os.sep + os.path.dirname(path) - if dirname != '': - if not os.path.isdir(dirname): - os.makedirs(dirname) - request = (rawrequest) - outfile = open(RunID + os.sep + path, 'wb') - try: - response = urlopen(request, timeout=1) - outfile.write(response.read()) - outfile.close() - except URLError as e: - print('Got an error code:', e) - outfile.close() - downloadrestrequest(rawrequest, path) - except socket.error: - print('Got a socket error: retrying') - outfile.close() - downloadrestrequest(rawrequest, path) - - -options = arg_parser() -RunID = options.runid -AccessToken = options.accesstoken -request = ('http://api.basespace.illumina.com/v1pre3/runs/{}/files?' - 'access_token={}').format(RunID, AccessToken) -json_obj = restrequest(request) -totalCount = json_obj['Response']['TotalCount'] -noffsets = int(math.ceil(float(totalCount)/1000.0)) -hreflist = [] -pathlist = [] -filenamelist = [] - -for index in range(noffsets): - offset = 1000*index - request = ( - 'http://api.basespace.illumina.com/v1pre3/runs/{}/files' - '?access_token={}&limit=1000&Offset={}' - ).format(RunID, AccessToken, offset) - json_obj = restrequest(request) - nfiles = len(json_obj['Response']['Items']) - for fileindex in range(nfiles): - href = json_obj['Response']['Items'][fileindex]['Href'] - hreflist.append(href) - path = json_obj['Response']['Items'][fileindex]['Path'] - pathlist.append(path) - -for index in range(len(hreflist)): - request = ( - 'http://api.basespace.illumina.com/{}/content?access_token={}' - ).format(hreflist[index], AccessToken) - print('downloading {}'.format(pathlist[index])) - downloadrestrequest(request, pathlist[index])