-
Notifications
You must be signed in to change notification settings - Fork 4
/
spooker
executable file
·149 lines (133 loc) · 5.77 KB
/
spooker
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#!/usr/bin/env bash
# This script is designed to be part of the
# - onerror
# - oncomplete
# part of Snakefiles to gather info about:
# 1. the pipeline
# 2. the user
# 3. other metadata
# This data is then tar-gzipped and saved to a common location.
# For runs on BIOWULF:
# - tarball is saved to /scratch/ccbrpipeliner using the spook tool
# - a cronjob then picks up the tarball and saves it to /data/CCBR_Pipeliner/userdata/ccbrpipeliner
# - [TODO] another cronjob then reads files under /data/CCBR_Pipeliner/userdata/ccbrpipeliner and generates
# detailed HTML reports about pipeline usages.
# For runs on FRCE:
# - tarball is saved to /mnt/projects/CCBR-Pipelines/pipelines/userdata/ccbrpipeliner by simple cp command
# - [TODO] a cronjob then picks up the tarball and moves it to biowulf at /data/CCBR_Pipeliner/userdata/ccbrpipeliner/frce
# - [TODO] another cronjob then reads files under /data/CCBR_Pipeliner/userdata/ccbrpipeliner/frce and adds to the
# detailed HTML report about pipeline usages.
# requires 2 inputs:
# 1. Pipelines outputdir ... absolute path
# 2. name of the pipeline ... eg. RENEE or XAVIER
SCRIPTNAME="$0"
SCRIPTBASENAME=$(basename $SCRIPTNAME)
# 2 arguments are required ... PIPELINE_OUTDIR and PIPELINE_NAME
if [[ "$#" != "2" ]];then
echo "$SCRIPTBASENAME FAILED!: ERROR: 2 arguments expected!"
echo "$SCRIPTBASENAME FAILED!: ERROR: Argument 1: pipeline outdir"
echo "$SCRIPTBASENAME FAILED!: ERROR: Argument 2: pipeline name"
exit 1
fi
set -o pipefail
PIPELINE_OUTDIR=$1
PIPELINE_NAME=$2
PIPELINE_OUTDIR_SIZE=$(du -bs $PIPELINE_OUTDIR | awk '{print $1}')
PIPELINE_NAME_UPPER=$(echo "$PIPELINE_NAME" | tr '[:lower:]' '[:upper:]')
PIPELINE_NAME_LOWER=$(echo "$PIPELINE_NAME" | tr '[:upper:]' '[:lower:]')
PIPELINE_PATH=$(which $PIPELINE_NAME_LOWER)
PIPELINE_VERSION=$($PIPELINE_NAME_LOWER --version 2>/dev/null | tail -n1 | awk '{print $NF}' || echo "UNKNOWN")
DT=$(date +%y%m%d%H%M%S)
archivefile="${PIPELINE_OUTDIR}/${DT}.tar.gz"
treefile="${PIPELINE_OUTDIR}/${DT}.tree.json"
metadata="${PIPELINE_OUTDIR}/${DT}.json"
SCONTROL=$(type -P scontrol)
if [[ "$SCONTROL" == "" ]];then
echo "$SCRIPTBASENAME FAILED!: ERROR: scontrol command not in PATH!"
echo "$SCRIPTBASENAME FAILED!: ERROR: usage metadata cannot be collected!!"
exit 1
fi
# create the archive with all metadata
dryrunlogfile=""
if [[ -d "$PIPELINE_OUTDIR" ]];then
# find the newest dryrun file
dryrunlogfile=$(ls -rt ${PIPELINE_OUTDIR}/dryrun*log 2>/dev/null |tail -n1 || echo "")
cmd="tar czvf ${archivefile}"
if [[ "$dryrunlogfile" != "" ]];then
cmd="$cmd $dryrunlogfile"
fi
# gather some info
echo "PIPELINE_OUTDIR: $PIPELINE_OUTDIR" > $metadata
echo "PIPELINE_OUTDIR_SIZE: $PIPELINE_OUTDIR_SIZE" >> $metadata
echo "PIPELINE_NAME: $PIPELINE_NAME_UPPER" >> $metadata
echo "PIPELINE_PATH: $PIPELINE_PATH" >> $metadata
echo "PIPELINE_VERSION: $PIPELINE_VERSION" >> $metadata
echo "USER: $USER" >> $metadata
#GROUPS=$(groups 2>/dev/null)
echo "GROUPS:" $(groups) >> $metadata
echo "DATE: $DT" >> $metadata
tree -J $PIPELINE_OUTDIR > $treefile
cmd="$cmd $metadata $treefile"
# files from pipelines in written in snakemake
if [[ -d "${PIPELINE_OUTDIR}/logfiles" ]];then
logdir="${PIPELINE_OUTDIR}/logfiles"
for thisfile in "snakemake.log" "snakemake.log.jobby" "master.log" "runtime_statistics.json";do
absthisfile="${logdir}/${thisfile}"
if [[ -f "$absthisfile" ]];then
cmd="$cmd $absthisfile"
fi
done
fi
# [TODO] files from pipelines in written in nextflow
# [TODO] ... add nextflow related files here ...
echo "$SCRIPTBASENAME: $cmd"
$cmd && echo "$SCRIPTBASENAME: $archivefile created!"
rm -f $metadata $treefile
else # PIPELINE_OUTDIR does not exist!
echo "$SCRIPTBASENAME FAILED!: ERROR: $PIPELINE_OUTDIR does not exist!"
echo "$SCRIPTBASENAME FAILED!: ERROR: usage metadata cannot be collected!!"
exit 1
fi
# check if you are on BIOWULF or FRCE
clustername=$(scontrol show config|grep -i clustername|awk '{print $NF}')
if [[ "$clustername" == "biowulf" ]];then ISBIOWULF=true;else ISBIOWULF=false;fi
if [[ "$clustername" == "fnlcr" ]];then ISFRCE=true;else ISFRCE=false;fi
if [[ $ISBIOWULF == true || $ISFRCE == true ]];then
if [[ $ISBIOWULF == true ]];then
SPOOK=$(type -P spook)
if [[ "$SPOOK" == "" ]];then
echo "$SCRIPTBASENAME: spook is NOT in PATH."
echo "$SCRIPTBASENAME: trying to add it by sourcing /data/CCBR_Pipeliner/cronjobs/scripts/setup"
. "/data/CCBR_Pipeliner/cronjobs/scripts/setup"
SPOOK=$(type -P spook)
if [[ "$SPOOK" == "" ]];then
echo "$SCRIPTBASENAME FAILED!: ERROR: spook is still not in PATH!"
echo "$SCRIPTBASENAME FAILED!: ERROR: usage metadata cannot be collected!!"
exit 1
fi
fi
echo "$SCRIPTBASENAME: spook is now in PATH:$SPOOK"
SPOOK_COPY2DIR="/scratch/ccbrpipeliner"
fi
if [[ $ISFRCE == true ]];then
SPOOK_COPY2DIR="/mnt/projects/CCBR-Pipelines/pipelines/userdata/ccbrpipeliner"
fi
echo "$SCRIPTBASENAME: SPOOK_COPY2DIR: $SPOOK_COPY2DIR"
# copy over the metadata archive
if [ -f "${archivefile}" ];then
if [[ $ISBIOWULF == true ]]; then
cmd="$SPOOK -f ${archivefile} -d $SPOOK_COPY2DIR"
echo "$SCRIPTBASENAME: $cmd"
$cmd
fi
if [[ $ISFRCE == true ]];then
cmd="cp -rv ${archivefile} $SPOOK_COPY2DIR"
echo "$SCRIPTBASENAME: $cmd"
$cmd
fi
fi
else # not biowulf or frce ... so exit
echo "$SCRIPTBASENAME FAILED!: ERROR: Neither on BIOWULF Nor on FRCE"
echo "$SCRIPTBASENAME FAILED!: ERROR: $archivefile created but NOT copied!"
exit 1
fi