-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathG1PA.process_short_reads.sh
494 lines (419 loc) · 17 KB
/
G1PA.process_short_reads.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
# Gradients 1 PA metatranscriptomes
# Raw short read processing
# AUTHOR: Ryan D. Groussman
# 3/22/2017
# G1PA short read sequences arrived from the sequencing center
# in two parts ('Part 1' and 'Part 2'). These were processed
# sequentially as they arrived.
# we have our first file to work with: the barcode lookup table:
BARCODE_LOOKUP="morales_gradient_lookup.csv"
# the machine run structure is different than last time
# start up AWS machine - r3.2xlarge machine
# with 1000GB EBS storage
# Launched on 3Apr2017 at 14:00
EC2_ADDRESS="ec2-35-160-231-166.us-west-2.compute.amazonaws.com"
# G1PA directory used for processing:
G1PA_DIR="/mnt/raid/gradients1"
# download from S3 to here:
cd $G1PA_DIR
# disk size for G1PA part 1 compressed:
du -smh
# "219G"
# this release - rename files with lookup csv
# rename files with decode_barcodes.py so format is standardized
# the barcodes on poolA_barcode_lookup.csv need to be reverse complimented
# starting with barcdode 2 and moving back from there
# e.g., AGGCGAAG.GAATTCGT -> ACGAATTC.CTTCGCCT
# check for accidental duplication: no dupes!
# and pasted it in this format (example) to gradients1a_barcode_lookup_revcomp.csv
# ACGAATTC.CTTCGCCT,S16C1_B_600
# CGAGTAAT.GCCTCTAT,S29C1_C_200
# used http://www.bioinformatics.org/sms/rev_comp.html to generate revcomp barcodes and uploaded to
wc $G1PA_DIR/scripts/gradients1a_barcode_lookup_revcomp.csv
# 46 lines..
# rename the files
for dir in $(ls -d 1821_Morales_*_done); do
$G1PA_DIR/scripts/decode_barcodes.py $G1PA_DIR/scripts/gradients1a_barcode_lookup_fwd.csv $dir/ >> decode_barcodes.log
done
# file containing the sample name prefix files:
cat $G1PA_DIR/scripts/gradients1a_barcode_lookup_fwd.csv | awk -F, '{print $2}' > gradients1a_prefix.txt
cd $G1PA_DIR
# run the Illumina_QC_AWS.sh on every lane directory
# note that lane information disappears in the post-trimmomatic files. that's fine.
for dir in 1821_Morales_170315 1821_Morales_170315_NS2 1821_Morales_170321_NS3 1821_Morales_170321_NS4; do
for lane in {1..4}; do
cd $G1PA_DIR/"$dir"_"$lane"_done/
echo "Working in $PWD" >> $G1PA_DIR/gradients1a_Illumina_QC_AWS.log
for sample in $(cat $G1PA_DIR/gradients1a_prefix.txt); do
$G1PA_DIR/scripts/Illumina_QC_AWS.sh "$sample"_"$lane".1.fastq.gz "$sample"_"$lane".2.fastq.gz "$sample"_"$lane" >> $G1PA_DIR/gradients1a_Illumina_QC_AWS.log
done
multiqc .
mv multiqc_report.html $G1PA_DIR/multiqc/multiqc_report_"$dir"_"$lane".html
done
done
# concatenate lanes - keep run name in it
# note that the first run is not called 'NS1' but we're calling it that now
# to match the other 'runs'.
cd $G1PA_DIR
mkdir 1821_Morales_NS1_combined_PE
mkdir 1821_Morales_NS1_combined_merged
mkdir 1821_Morales_NS2_combined_PE
mkdir 1821_Morales_NS2_combined_merged
mkdir 1821_Morales_NS3_combined_PE
mkdir 1821_Morales_NS3_combined_merged
mkdir 1821_Morales_NS4_combined_PE
mkdir 1821_Morales_NS4_combined_merged
# an example of our current output for one sample:
# "S02C1_3umA_1.1.fastq.gz
# S02C1_3umA_1.1.paired.trim.fastq.gz
# S02C1_3umA_1.1.unpaired.trim.fastq.gz
# S02C1_3umA_1.2.fastq.gz
# S02C1_3umA_1.2.paired.trim.fastq.gz
# S02C1_3umA_1.2.unpaired.trim.fastq.gz
# S02C1_3umA_1.flash.extendedFrags.fastq.gz
# S02C1_3umA_1.flash.notCombined.fastq.gz "
# clear space by removing these intermediate and raw files:
for dir in 1821_Morales_170315 1821_Morales_170315_NS2 1821_Morales_170321_NS3 1821_Morales_170321_NS4; do
for lane in {1..4}; do
cd $G1PA_DIR/"$dir"_"$lane"_done/
rm *.unpaired.trim.fastq.gz
rm *.flash.notCombined.fastq.gz
rm *.1.fastq.gz
rm *.2.fastq.gz
done
done
# NS1 concatenated paired trimmed
COMBINED_DIR=$G1PA_DIR/1821_Morales_NS1_combined_PE
for prefix in $(cat $G1PA_DIR/gradients1a_prefix.txt); do
for lane in 1 2 3 4; do
FASTQ_DIR="$G1PA_DIR/1821_Morales_170315_"$lane"_done"
zcat $FASTQ_DIR/"$prefix"_"$lane".1.paired.trim.fastq.gz >> $COMBINED_DIR/"$prefix".1.NS1_combined.paired.trim.fastq
zcat $FASTQ_DIR/"$prefix"_"$lane".2.paired.trim.fastq.gz >> $COMBINED_DIR/"$prefix".2.NS1_combined.paired.trim.fastq
done
gzip -v $COMBINED_DIR/"$prefix".1.NS1_combined.paired.trim.fastq
gzip -v $COMBINED_DIR/"$prefix".2.NS1_combined.paired.trim.fastq
done
cd $COMBINED_DIR
multiqc .
mv multiqc_report.html $G1PA_DIR/multiqc/multiqc_report_NS1_combined_PE.html
for fastq in $(ls *NS1_combined.paired.trim.fastq.gz); do
mv $fastq $G1PA_DIR/data/combined_PE/
done
# NS1 concatenated merged
cd $G1PA_DIR
COMBINED_DIR=$G1PA_DIR/1821_Morales_NS1_combined_merged
for prefix in $(cat $G1PA_DIR/gradients1a_prefix.txt); do
for lane in 1 2 3 4; do
FASTQ_DIR="$G1PA_DIR/1821_Morales_170315_"$lane"_done"
zcat $FASTQ_DIR/"$prefix"_"$lane".flash.extendedFrags.fastq.gz >> $COMBINED_DIR/"$prefix".NS1_combined.extendedFrags.fastq
done
gzip -v $COMBINED_DIR/"$prefix".NS1_combined.extendedFrags.fastq
done
cd $COMBINED_DIR
multiqc .
mv multiqc_report.html $G1PA_DIR/multiqc/multiqc_report_NS1_combined_merged.html
for fastq in $(ls *NS1_combined.extendedFrags.fastq.gz); do
mv $fastq $G1PA_DIR/combined_merged/
done
cd $G1PA_DIR
# NS2 concatenated paired trimmed
COMBINED_DIR=$G1PA_DIR/1821_Morales_NS2_combined_PE
for prefix in $(cat $G1PA_DIR/gradients1a_prefix.txt); do
for lane in 1 2 3 4; do
FASTQ_DIR="$G1PA_DIR/1821_Morales_170315_NS2_"$lane"_done"
zcat $FASTQ_DIR/"$prefix"_"$lane".1.paired.trim.fastq.gz >> $COMBINED_DIR/"$prefix".1.NS2_combined.paired.trim.fastq
zcat $FASTQ_DIR/"$prefix"_"$lane".2.paired.trim.fastq.gz >> $COMBINED_DIR/"$prefix".2.NS2_combined.paired.trim.fastq
done
gzip -v $COMBINED_DIR/"$prefix".1.NS2_combined.paired.trim.fastq
gzip -v $COMBINED_DIR/"$prefix".2.NS2_combined.paired.trim.fastq
done
cd $COMBINED_DIR
multiqc .
mv multiqc_report.html $G1PA_DIR/multiqc/multiqc_report_NS2_combined_PE.html
for fastq in $(ls *NS2_combined.paired.trim.fastq.gz); do
mv $fastq $G1PA_DIR/combined_PE/
done
# NS2 concatenated merged
cd $G1PA_DIR
COMBINED_DIR=$G1PA_DIR/1821_Morales_NS2_combined_merged
for prefix in $(cat $G1PA_DIR/gradients1a_prefix.txt); do
for lane in 1 2 3 4; do
FASTQ_DIR="$G1PA_DIR/1821_Morales_170315_NS2_"$lane"_done"
zcat $FASTQ_DIR/"$prefix"_"$lane".flash.extendedFrags.fastq.gz >> $COMBINED_DIR/"$prefix".NS2_combined.extendedFrags.fastq
done
gzip -v $COMBINED_DIR/"$prefix".NS2_combined.extendedFrags.fastq
done
cd $COMBINED_DIR
multiqc .
mv multiqc_report.html $G1PA_DIR/multiqc/multiqc_report_NS2_combined_merged.html
for fastq in $(ls *NS2_combined.extendedFrags.fastq.gz); do
mv $fastq $G1PA_DIR/combined_merged/
done
# NS3 concatenated paired trimmed
COMBINED_DIR=$G1PA_DIR/1821_Morales_NS3_combined_PE
for prefix in $(cat $G1PA_DIR/gradients1a_prefix.txt); do
for lane in 1 2 3 4; do
FASTQ_DIR="$G1PA_DIR/1821_Morales_170321_NS3_"$lane"_done"
zcat $FASTQ_DIR/"$prefix"_"$lane".1.paired.trim.fastq.gz >> $COMBINED_DIR/"$prefix".1.NS3_combined.paired.trim.fastq
zcat $FASTQ_DIR/"$prefix"_"$lane".2.paired.trim.fastq.gz >> $COMBINED_DIR/"$prefix".2.NS3_combined.paired.trim.fastq
done
gzip -v $COMBINED_DIR/"$prefix".1.NS3_combined.paired.trim.fastq
gzip -v $COMBINED_DIR/"$prefix".2.NS3_combined.paired.trim.fastq
done
cd $COMBINED_DIR
multiqc .
mv multiqc_report.html $G1PA_DIR/multiqc/multiqc_report_NS3_combined_PE.html
for fastq in $(ls *NS3_combined.paired.trim.fastq.gz); do
mv $fastq $G1PA_DIR/combined_PE/
done
# NS3 concatenated merged
cd $G1PA_DIR
COMBINED_DIR=$G1PA_DIR/1821_Morales_NS3_combined_merged
for prefix in $(cat $G1PA_DIR/gradients1a_prefix.txt); do
for lane in 1 2 3 4; do
FASTQ_DIR="$G1PA_DIR/1821_Morales_170321_NS3_"$lane"_done"
zcat $FASTQ_DIR/"$prefix"_"$lane".flash.extendedFrags.fastq.gz >> $COMBINED_DIR/"$prefix".NS3_combined.extendedFrags.fastq
done
gzip -v $COMBINED_DIR/"$prefix".NS3_combined.extendedFrags.fastq
done
cd $COMBINED_DIR
multiqc .
mv multiqc_report.html $G1PA_DIR/multiqc/multiqc_report_NS3_combined_merged.html
for fastq in $(ls *NS3_combined.extendedFrags.fastq.gz); do
mv $fastq $G1PA_DIR/combined_merged/
done
# NS4 concatenated paired trimmed
COMBINED_DIR=$G1PA_DIR/1821_Morales_NS4_combined_PE
for prefix in $(cat $G1PA_DIR/gradients1a_prefix.txt); do
for lane in 1 2 3 4; do
FASTQ_DIR="$G1PA_DIR/1821_Morales_170321_NS4_"$lane"_done"
zcat $FASTQ_DIR/"$prefix"_"$lane".1.paired.trim.fastq.gz >> $COMBINED_DIR/"$prefix".1.NS4_combined.paired.trim.fastq
zcat $FASTQ_DIR/"$prefix"_"$lane".2.paired.trim.fastq.gz >> $COMBINED_DIR/"$prefix".2.NS4_combined.paired.trim.fastq
done
gzip -v $COMBINED_DIR/"$prefix".1.NS4_combined.paired.trim.fastq
gzip -v $COMBINED_DIR/"$prefix".2.NS4_combined.paired.trim.fastq
done
cd $COMBINED_DIR
multiqc .
mv multiqc_report.html $G1PA_DIR/multiqc/multiqc_report_NS4_combined_PE.html
for fastq in $(ls *NS4_combined.paired.trim.fastq.gz); do
mv $fastq $G1PA_DIR/combined_PE/
done
# NS4 concatenated merged
cd $G1PA_DIR
COMBINED_DIR=$G1PA_DIR/1821_Morales_NS4_combined_merged
for prefix in $(cat $G1PA_DIR/gradients1a_prefix.txt); do
for lane in 1 2 3 4; do
FASTQ_DIR="$G1PA_DIR/1821_Morales_170321_NS4_"$lane"_done"
zcat $FASTQ_DIR/"$prefix"_"$lane".flash.extendedFrags.fastq.gz >> $COMBINED_DIR/"$prefix".NS4_combined.extendedFrags.fastq
done
gzip -v $COMBINED_DIR/"$prefix".NS4_combined.extendedFrags.fastq
done
cd $COMBINED_DIR
multiqc .
mv multiqc_report.html $G1PA_DIR/multiqc/multiqc_report_NS4_combined_merged.html
for fastq in $(ls *NS4_combined.extendedFrags.fastq.gz); do
mv $fastq $G1PA_DIR/combined_merged/
done
# zip all the notes and logs
tar -zcvf multiqc_report_gradients1_batch1.tar.gz multiqc/
aws s3 cp multiqc_report_gradients1_batch1.tar.gz $G1PA_DIR/logs/
##########################
#### 6tr translation #####
##########################
cd $G1PA_DIR
for run in NS1 NS2 NS3 NS4; do
cd $G1PA_DIR/1821_Morales_"$run"_combined_merged
# get the handles for all of the files + run name
for fastq in $(ls *extendedFrags.fastq.gz); do
echo $fastq | sed 's/.extendedFrags.fastq.gz//g' >> "$run"_extendedFrags.handles.txt
done
# fastq -> fasta -> 6tr.fasta
for handle in $(cat "$run"_extendedFrags.handles.txt); do
echo $handle
gunzip -k "$handle".extendedFrags.fastq.gz
seqret -auto -sequence "$handle".extendedFrags.fastq -outseq "$handle".extendedFrags.fasta
rm "$handle".extendedFrags.fastq
transeq -auto -frame 6 -sequence "$handle".extendedFrags.fasta -outseq "$handle".6tr.fasta
rm "$handle".flash.extendedFrags.fasta
gzip "$handle".6tr.fasta
aws s3 cp "$handle".6tr.fasta.gz $G1PA_DIR/combined_translated/
# rm "$handle".6tr.fasta.gz
done
done
##########################
# frame selection using mORFeus.py
# collect the handles from each run:
for run in NS1 NS2 NS3 NS4; do
cd $G1PA_DIR/1821_Morales_"$run"_combined_merged
for fasta in $(ls *.fasta.gz); do
echo $(basename ${fasta%%_combined.6tr.fasta.gz}) >> $run.6tr_handles.list.txt
done
done
# above method keeps machine run data intact
# example handle: S04C1_0.2umA.NS4
len_cutoff="40"
morf_dir="$G1PA_DIR/morfeus"
function gmorfeus {
echo "Unzipping " "$1"_combined.6tr.fasta.gz
gunzip "$1"_combined.6tr.fasta.gz
echo "Running mORFeus.py on " "$1"_combined.6tr.fasta
mORFeus.py -m -l $len_cutoff "$1"_combined.6tr.fasta
echo "Cleaning up..."
mv "$1"_combined.6tr.orfs"$len_cutoff".fasta $morf_dir
gzip $morf_dir/"$1"_combined.6tr.orfs"$len_cutoff".fasta
mv $morf_dir/"$1"_combined.6tr.orfs"$len_cutoff".fasta.gz $G1PA_DIR/morfeus_translated/
rm "$1"_combined.6tr.fasta
}
for run in NS1 NS2 NS3 NS4; do
cd $G1PA_DIR/1821_Morales_"$run"_combined_merged
for file in $(cat $run.6tr_handles.list.txt); do
echo "Starting on: " $file
gmorfeus $file
done
done
################
# final cleanup, uploading logs, etc:
# zipped tarball for 50% of gradients reads (first 4 machine runs)
tar -zcvf gradients1_std_counts_pt1.tar.gz sam/
aws s3 cp gradients1_std_counts_pt1.tar.gz $G1PA_DIR/standard_counts/
## G1PA Part 2
# 5/1/2017
# The second and final batch of Gradients1 RNA-Seq data
# start up AWS machine - this time we are going to try an r3.xlarge machine ($0.333 per Hour)
# with 1000GB EBS storage
# Launched on 1May2017 at 14:00
# the new files are named a bit differently
# we will want to 'rename' to match what we already have established.
# download from fastq source
cd $G1PA_DIR
mkdir multiqc
# compare the naming schemes between Part 1 and Part 2:
# an example of the naming scheme here (in 1821_Morales_NS5_1_done/)
Nutr5-T0_St.10_0.2umA1.fastq.gz
Nutr5-T0_St.10_0.2umA2.fastq.gz
Nutr5-T0_St.10_0.2umB1.fastq.gz
Nutr5-T0_St.10_0.2umB2.fastq.gz
S11C1_0.2umB1.fastq.gz
S11C1_0.2umB2.fastq.gz
# what the 'real' naming scheme looks like (combined product):
# S02C1_0.2umA.1.NS1_combined.paired.trim.fastq.gz
# what the deflines look like:
# "@HHFNYBGX2:1:11101:10005:11472/1"
# now let's make our gradients1b_prefix.txt by hand:
# take ls -l *2.fastq.gz and get file lists like these: S11C1_3umB2.fastq.gz
# remove the 2.fastq.gz to get our 'base' names
# (remember there's no field separator for the lane here)
# like this: > S11C1_3umB
# manually paste into 'gradients1b_prefix.txt' in $G1PA_DIR
# NOTE scratch below:
# Initial QC loop calling Illumina_QC_AWS.sh
function initial_qc {
cd $G1PA_DIR/1821_Morales_NS"$1"_"$2"_done/
echo "Working in $PWD" >> $G1PA_DIR/gradients1b_Illumina_QC_AWS.log
for sample in $(cat $G1PA_DIR/gradients1b_prefix.txt); do
$G1PA_DIR/scripts/Illumina_QC_AWS.sh "$sample"1.fastq.gz "$sample"2.fastq.gz "$sample"_"$2"
done
multiqc .
mv multiqc_report.html $G1PA_DIR/multiqc/multiqc_report_NS"$1"_"$2".html
}
# this should run it on all 4 x 4 files
for dir in 5 6 7 8; do
for lane in {1..4}; do
initial_qc $dir $lane
done
done
# concatenate lanes - keep run name in it
cd $G1PA_DIR
mkdir 1821_Morales_NS5_combined_PE
mkdir 1821_Morales_NS5_combined_merged
mkdir 1821_Morales_NS6_combined_PE
mkdir 1821_Morales_NS6_combined_merged
mkdir 1821_Morales_NS7_combined_PE
mkdir 1821_Morales_NS7_combined_merged
mkdir 1821_Morales_NS8_combined_PE
mkdir 1821_Morales_NS8_combined_merged
# clear up some space - go through and remove the raw fastq files
# we want to REMOVE:
for dir in 5 6 7 8; do
for lane in {1..4}; do
cd $G1PA_DIR/1821_Morales_NS"$dir"_"$lane"_done/
rm *.unpaired.trim.fastq.gz
rm *.flash.notCombined.fastq.gz
rm *1.fastq.gz
rm *2.fastq.gz
done
done
############ combining lanes ########
cd $G1PA_DIR
function concatenate_merged {
cd $G1PA_DIR
COMBINED_DIR=$G1PA_DIR/1821_Morales_NS"$1"_combined_merged
for prefix in $(cat $G1PA_DIR/gradients1b_prefix.txt); do
for lane in 1 2 3 4; do
FASTQ_DIR=$G1PA_DIR"/1821_Morales_NS"$1"_"$lane"_done"
cat $FASTQ_DIR/"$prefix"_"$lane".flash.extendedFrags.fastq.gz >> $COMBINED_DIR/"$prefix".NS"$1"_combined.extendedFrags.fastq.gz
done
aws s3 cp $COMBINED_DIR/"$prefix".NS"$1"_combined.extendedFrags.fastq.gz $G1PA_DIR/combined_merged/
done
}
# combine merged fragment lanes into one:
for i in 5 6 7 8; do
concatenate_merged $i
done
# now our function for concatenated paired:
function concatenate_paired {
cd $G1PA_DIR
COMBINED_DIR=$G1PA_DIR/1821_Morales_NS"$1"_combined_PE
for prefix in $(cat $G1PA_DIR/gradients1b_prefix.txt); do
for lane in 1 2 3 4; do
FASTQ_DIR=$G1PA_DIR"/1821_Morales_NS"$1"_"$lane"_done"
cat $FASTQ_DIR/"$prefix"_"$lane".1.paired.trim.fastq.gz >> $COMBINED_DIR/"$prefix".1.NS"$1"_combined.paired.trim.fastq.gz
cat $FASTQ_DIR/"$prefix"_"$lane".2.paired.trim.fastq.gz >> $COMBINED_DIR/"$prefix".2.NS"$1"_combined.paired.trim.fastq.gz
done
cd $COMBINED_DIR
aws s3 cp "$prefix".1.NS"$1"_combined.paired.trim.fastq.gz $G1PA_DIR/combined_PE/
aws s3 cp "$prefix".2.NS"$1"_combined.paired.trim.fastq.gz $G1PA_DIR/combined_PE/
done
}
# conduct concatenation of paired end reads:
for i in 5 6 7 8; do
concatenate_paired $i
done
##########################
#### 6tr translation #####
##########################
function translate_6tr {
# get the handles for all of the files + run name
cd $G1PA_DIR/1821_Morales_NS"$1"_combined_merged
for fastq in $(ls *extendedFrags.fastq.gz); do
echo $fastq | sed 's/.extendedFrags.fastq.gz//g' >> "$1"_extendedFrags.handles.txt
done
# fastq -> fasta -> 6tr.fasta
for handle in $(cat "$1"_extendedFrags.handles.txt); do
echo $handle
gunzip -k "$handle".extendedFrags.fastq.gz
seqret -auto -sequence "$handle".extendedFrags.fastq -outseq "$handle".extendedFrags.fasta
rm "$handle".extendedFrags.fastq
transeq -auto -frame 6 -sequence "$handle".extendedFrags.fasta -outseq "$handle".6tr.fasta
rm "$handle".extendedFrags.fasta
gzip "$handle".6tr.fasta
mv "$handle".6tr.fasta.gz $G1PA_DIR/combined_translated/
done
}
for i in 5 6 7 8; do
translate_6tr $i
done
# check file counts:
ls $G1PA_DIR/combined_translated/ | grep -c "6tr.fasta.gz"
# 376 files = 46 samples * 8 files/sample; good.
# zip the multiqc logs:
tar -zcvf multiqc_report_gradients1_batch2.tar.gz multiqc/
mv multiqc_report_gradients1_batch2.tar.gz $G1PA_DIR/logs/
# we can get rid of all the fasta files now and then zip up the remaining logs:
cd $G1PA_DIR
rm 1821_Morales_NS*/*.fastq.gz # dev/md0 1.1T 394G 611G 40% /mnt/raid
rm 1821_Morales_NS*/*.fasta.gz # /dev/md0 1.1T 40G 965G 4% /mnt/raid
# now zip up everything else (mostly just txt files)
tar -zcvf gradients1_batch2_logs.tar.gz 1821_Morales_NS*_*_done/
mv gradients1_batch2_logs.tar.gz $G1PA_DIR/logs/