From 5054a3adf9a9f8d441169270286287743f3f1dbc Mon Sep 17 00:00:00 2001 From: Ivan Blagoev Topolsky Date: Wed, 30 Mar 2022 16:39:44 +0200 Subject: [PATCH] polishing the mass-importer staging - make it an option (-g, --staging) - play nicely with --append - typo fixes --- utils/README.md | 3 +++ utils/sort_samples_demultiplexstats | 22 ++++++++++++++-------- utils/sort_samples_dumb | 26 +++++++++++++++++--------- utils/sort_samples_jobinfo | 25 +++++++++++++------------ 4 files changed, 47 insertions(+), 29 deletions(-) diff --git a/utils/README.md b/utils/README.md index 1255650b4..05fdceb0c 100644 --- a/utils/README.md +++ b/utils/README.md @@ -118,6 +118,7 @@ Usage: ./sort_samples_dumb -f -b [-l ] [-L {''|--link|--symbo -L : link parameter to pass to cp when copying (default: --link) -t : tsv file (default: samples..tsv) -T : do not truncate (empty) the file before starting + -g : store list in .tsv.staging instead and only rename into final .tsv if successful -D : sample have duplicates (e.g.: across lanes) -p : prefix to prepend to fastq files (e.g.: for fusing runs) -s : suffix to append to fastq files (e.g.: for fusing runs) @@ -183,6 +184,7 @@ optional arguments: --force Force overwriting any existing file when moving -s, --summary Only display a summary of datasets, not an exhaustive list of all samples -a, --append Append to the end of movedatafiles.sh, instead of overwritting (use when calling from an external combiner wrapper) + -g, --staging Write samples list in .tsv.staging and only rename them to the final .tsv at the end of movedatafiles.sh if there were no errors. -n, --noempty skip fastq.gz files with bad yield (0 reads) -p TSV, --patchmap TSV patchmap file to rename samples @@ -239,6 +241,7 @@ optional arguments: -b LAB, --batch LAB generate batch description -s, --summary Only display a summary of datasets, not an exhaustive list of all samples -a, --append Append to the end of movedatafiles.sh, instead of overwritting (use when calling from an external combiner wrapper) + -g, --staging Write samples list in .tsv.staging and only rename them to the final .tsv at the end of movedatafiles.sh if there were no errors. -l, --forcelanes Explicitly look for sample in each lane (for replicates across lanes) -p TSV, --patchmap TSV patchmap file to rename samples diff --git a/utils/sort_samples_demultiplexstats b/utils/sort_samples_demultiplexstats index c336ac3c0..4e92122a4 100755 --- a/utils/sort_samples_demultiplexstats +++ b/utils/sort_samples_demultiplexstats @@ -29,6 +29,8 @@ argparser.add_argument('-s', '--summary', required=False, action='store_true', dest='summary', help="Only display a summary of datasets, not an exhaustive list of all samples") argparser.add_argument('-a', '--append', required=False, action='store_true', dest='append', help="Append to the end of movedatafiles.sh, instead of overwritting\n(use when calling from an external combiner wrapper)") +argparser.add_argument('-g', '--staging', required=False, + action='store_true', dest='staging', help="Write samples list in .tsv.staging and only rename them to the final .tsv at the end of movedatafiles.sh if there were no errors.") argparser.add_argument('-n', '--noempty', required=False, action='store_true', dest='noempty', help="skip fastq.gz files with bad yield (0 reads)") argparser.add_argument('-p', '--patchmap', metavar='TSV', required=False, default=None, @@ -42,6 +44,7 @@ sampleset=args.outdir link=args.link append=args.append noempty=args.noempty +staging_suffix='.staging' if args.staging else '' statsjson=os.path.join(statsdir, 'Stats/Stats.json') @@ -117,7 +120,7 @@ if not os.path.isdir(sampleset): # output files batch=f"{rundate}_{flowcell}" -tsv=open(os.path.join(sampleset,f'samples.{batch}.tsv'), 'wt') +tsv=open(os.path.join(sampleset,f'samples.{batch}.tsv{staging_suffix}'), 'wt') # shell script file with all moving instructions inside sh=open(os.path.join(sampleset,'movedatafiles.sh'), 'at' if append else 'wt') @@ -150,7 +153,7 @@ X() { # per batch directory checks print(r"[[ -d '%(download)s' ]] || fail 'No download directory:' '%(download)s'" % {'download':fastqdir}, file=sh) if qcdir: - print(r"[[ -d '%(qc)s' ]] || fail 'No download directory:' '%(qc)s'" % {'qc': qcdir}, file=sh) + print(r"[[ -d '%(qc)s' ]] || fail 'No download directory:' '%(qc)s'" % {'qc': qcdir}, file=sh) # parse info about samples @@ -197,13 +200,16 @@ for file in "${fastq[@]}"; do print('done', file=sh) -# coda: return status -if not append: print(f""" -if (( ALLOK )); then - echo All Ok - exit 0 -else +# coda: rename staging and return status +if args.staging: + print(f"(( ALLOK )) && mv -v {sampleset}/samples.{batch}.tsv{staging_suffix} {sampleset}/samples.{batch}.tsv", file=sh) + +if not append: print(""" +if (( ! ALLOK )); then echo Some errors exit 1 fi; + +echo All Ok +exit 0 """, file=sh) diff --git a/utils/sort_samples_dumb b/utils/sort_samples_dumb index 860bd2d5d..0336d153e 100755 --- a/utils/sort_samples_dumb +++ b/utils/sort_samples_dumb @@ -28,6 +28,7 @@ usage() { echo "Usage: $0 -f -b [-l ] [-L {''|--link|--symbol -L : link parameter to pass to cp when copying (default: --link) -t : tsv file (default: samples..tsv) -T : do not truncate (empty) the file before starting + -g : store list in .tsv.staging instead and only rename into final .tsv if successful -D : sample have duplicates (e.g.: across lanes) -p : prefix to prepend to fastq files (e.g.: for fusing runs) -s : suffix to append to fastq files (e.g.: for fusing runs) @@ -42,8 +43,9 @@ prefix='' suffix='' quiet=0 duplicates=0 +staging=0 mode= -while getopts "f:b:Dl:L:p:s:o:m:t:Tqh" o; do +while getopts "f:b:Dl:L:p:s:o:m:t:Tgqh" o; do case "${o}" in f) fastq_dir="${OPTARG}" # shellcheck disable=SC2206 @@ -67,6 +69,7 @@ while getopts "f:b:Dl:L:p:s:o:m:t:Tqh" o; do [[ $mode =~ ^[0-7]{,4}$ ]] || fail "Invalid characters <${mode//[0-7]/}> in <${mode}>" 'mode should be an octal chmod value, see for informations' ;; t) tsv="${OPTARG}" ;; + g) staging=1 ;; T) truncate=0 ;; q) quiet=1 ;; h) usage 0 ;; @@ -78,8 +81,14 @@ done : "${batch_name:?missing mandatory batch name, use option -b}" : "${out_dir:?missing mandatory output dir use option -o}" -: "${tsv:=${out_dir}/samples.${batch_name}.tsv.staging}" +: "${tsv:=${out_dir}/samples.${batch_name}.tsv}" +if (( staging )); then + staging_suffix=".staging" + tsv="${tsv}${staging_suffix}" +else + staging_suffix= +fi # RegEx @@ -224,12 +233,11 @@ if (( numdup )); then warn "$(( paired + numsam )) samples, but only ${numdup} duplicates" "(missing: $((numdup % ( paired + numsam ) )) )" fi fi -if (( ALLOK )); then - mv -v ${tsv} ${tsv//\.staging/} - info All Ok - exit 0 -else - warn Some errors - exit 1 +if (( ! ALLOK )); then + warn Some errors + exit 1 fi +(( staging )) && mv -v "${tsv}" "${tsv%%${staging_suffix}}" +info All Ok +exit 0 diff --git a/utils/sort_samples_jobinfo b/utils/sort_samples_jobinfo index 92fdc4faa..e84e935dd 100755 --- a/utils/sort_samples_jobinfo +++ b/utils/sort_samples_jobinfo @@ -32,6 +32,8 @@ argparser.add_argument('-s', '--summary', required=False, action='store_true', dest='summary', help="Only display a summary of datasets, not an exhaustive list of all samples") argparser.add_argument('-a', '--append', required=False, action='store_true', dest='append', help="Append to the end of movedatafiles.sh, instead of overwritting\n(use when calling from an external combiner wrapper)") +argparser.add_argument('-g', '--staging', required=False, + action='store_true', dest='staging', help="Write samples list in .tsv.staging and only rename them to the final .tsv at the end of movedatafiles.sh if there were no errors.") argparser.add_argument('-l', '--forcelanes', required=False, action='store_true', dest='forcelanes', help="Explicitly look for sample in each lane (for replicates across lanes)") argparser.add_argument('-p', '--patchmap', metavar='TSV', required=False, default=None, @@ -44,6 +46,8 @@ sampleset=args.outdir link=args.link append=args.append lab = args.batch +staging_suffix='.staging' if args.staging else '' + # parse the chmod parameter try: @@ -136,7 +140,7 @@ if not os.path.isdir(sampleset): # output files batch=f"{date}_{flowcell}" -tsv=open(os.path.join(sampleset,f'samples.{batch}.tsv.staging'), 'wt') +tsv=open(os.path.join(sampleset,f'samples.{batch}.tsv{staging_suffix}'), 'wt') # shell script file with all moving instructions inside sh=open(os.path.join(sampleset,'movedatafiles.sh'), 'at' if append else 'wt') @@ -230,19 +234,16 @@ if args.batch: with open(os.path.join(sampleset,f'batch.{batch}.yaml'), 'wt') as byml: print(yaml.dump({'type':'jobinfo','lab':lab,'runfolder':runfolder,'date':date,'instrument':instr,'runnum':runnum,'flowcell':flowcell,'lanes':lanes,'library':library,'folder':folder}, sort_keys=False), file=byml) -# coda: return status -if not append: print(f""" -if (( !ALLOK )); then - echo Some errors - exit 1 -fi; - -""", file=sh) +# coda: rename staging and return status +if args.staging: + print(f"(( ALLOK )) && mv -v {sampleset}/samples.{batch}.tsv{staging_suffix} {sampleset}/samples.{batch}.tsv", file=sh) -print(f"mv -v {sampleset}/sample.{batch}.tsv.staging samples {sampleset}/sample.{batch}.tsv", file=sh) +if not append: print(""" +if (( ! ALLOK )); then + echo Some errors + exit 1 +fi; -print(""" echo All Ok exit 0 """, file=sh) -