forked from TheJacksonLaboratory/ATAC-seq
-
Notifications
You must be signed in to change notification settings - Fork 2
/
auyar_scripts.sh
executable file
·122 lines (98 loc) · 5.92 KB
/
auyar_scripts.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#!/bin/bash
dataDIR=$1 #ARGV, contains folder with FASTQ files, right now needs trailing /
scriptDIR=$(pwd)
outputDIR=$scriptDIR/working
mkdir $outputDIR
### FastQC Pipeline ###
mkdir $outputDIR/fastQC
find $dataDIR -name *.fastq.gz > $outputDIR/fastqc_filelist.txt
FILENUMBER=$(wc -l $outputDIR/fastqc_filelist.txt | cut -d' ' -f1)
#echo $FILENUMBER
echo \#!/bin/bash >> $outputDIR/fastqc.qsub
echo \#PBS -l nodes=1:ppn=2 >> $outputDIR/fastqc.qsub
echo \#PBS -l walltime=24:00:00 >> $outputDIR/fastqc.qsub
echo \#PBS -N fastqc >> $outputDIR/fastqc.qsub
echo \#PBS -t 1-$FILENUMBER >> $outputDIR/fastqc.qsub
echo FILE=\$\(head -n \$PBS_ARRAYID $outputDIR/fastqc_filelist.txt \| tail -1\) >> $outputDIR/fastqc.qsub
echo /opt/compsci/FastQC/0.11.3/fastqc -t 2 --noextract \$FILE -o $outputDIR/fastQC >> $outputDIR/fastqc.qsub
######
## Trimmomatic & Adapter Trimming Pipeline ###
mkdir $outputDIR/trimmomatic
find $dataDIR -name *R1_001.fastq.gz > $outputDIR/trimmomatic_R1_filelist.txt
FILENUMBER=$(wc -l $outputDIR/trimmomatic_R1_filelist.txt | cut -d' ' -f1)
#echo $FILENUMBER
echo \#!/bin/bash >> $outputDIR/trimmomatic.qsub
echo \#PBS -l nodes=1:ppn=2 >> $outputDIR/trimmomatic.qsub
echo \#PBS -l walltime=24:00:00 >> $outputDIR/trimmomatic.qsub
echo \#PBS -N trimmomatic >> $outputDIR/trimmomatic.qsub
echo \#PBS -t 1-$FILENUMBER >> $outputDIR/trimmomatic.qsub
echo module load python >> $outputDIR/trimmomatic.qsub
echo FILE=\$\(head -n \$PBS_ARRAYID $outputDIR/trimmomatic_R1_filelist.txt \| tail -1\) >> $outputDIR/trimmomatic.qsub
echo FILE2=\$\(basename "\${FILE}" \| sed \'s/R1_001\.fastq.gz/R2_001\.fastq.gz/g\'\) >> $outputDIR/trimmomatic.qsub
echo FILE1paired=\$\(basename \"\${FILE}\" \| sed \'s/R1_001\.fastq.gz/R1_001\.trim\.fastq\.gz/g\'\) >> $outputDIR/trimmomatic.qsub
echo FILE2paired=\$\(basename \"\${FILE2}\" \| sed \'s/R2_001\.fastq.gz/R2_001\.trim\.fastq\.gz/g\'\) >> $outputDIR/trimmomatic.qsub
echo FILE1unpaired=\$\(basename \"\${FILE}\" \| sed \'s/R1_001\.fastq.gz/R1_001\.trimU\.fastq\.gz/g\'\) >> $outputDIR/trimmomatic.qsub
echo FILE2unpaired=\$\(basename \"\${FILE2}\" \| sed \'s/R2_001\.fastq.gz/R2_001\.trimU\.fastq\.gz/g\'\) >> $outputDIR/trimmomatic.qsub
echo java -jar /opt/compsci/Trimmomatic/0.33/trimmomatic-0.33.jar PE -threads 2 \$FILE $dataDIR\$FILE2 $outputDIR/trimmomatic/\$FILE1paired $outputDIR/trimmomatic/\$FILE1unpaired $outputDIR/trimmomatic/\$FILE2paired $outputDIR/trimmomatic/\$FILE2unpaired TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36 >> $outputDIR/trimmomatic.qsub
echo python $scriptDIR/auyar/pyadapter_trim.py -a $outputDIR/trimmomatic/\$FILE1paired -b $outputDIR/trimmomatic/\$FILE2paired >> $outputDIR/trimmomatic.qsub
######
### ATAC-seq Portion ###
###
### This section is still a work in progress...
###
###
# Next steps...
# auyar/run_ATAC_pipeline.sh, which calls:
# * auyar/bwa_trimmomatic.sh
# * auyar/macs2_bwa_trimmomatic.sh
# * auyar/homer_bwa_trimmomatic.sh
# * auyar/nReads.sh
mkdir $outputDIR/trimmomatic/bwa
mkdir $outputDIR/trimmomatic/bwa/macs2
mkdir $outputDIR/trimmomatic/bwa/homer
#cp $scriptDIR/auyar/run_ATAC_pipeline.sh $currentDIR/
#echo \#!/bin/bash >> $outputDIR/ATAC-seq.qsub
#echo \#PBS -l nodes=1:ppn=16 >> $outputDIR/ATAC-seq.qsub
#echo \#PBS -l walltime=16:00:00 >> $outputDIR/ATAC-seq.qsub
#echo \#PBS -N ATAC-seq >> $outputDIR/ATAC-seq.qsub
#echo module load python >> $outputDIR/ATAC-seq.qsub
#echo module load R >> $outputDIR/ATAC-seq.qsub
#echo module load perl/5.10.1 >> $outputDIR/ATAC-seq.qsub
#echo module load samtools/0.1.19 >> $outputDIR/ATAC-seq.qsub
#echo module load bedtools >> $outputDIR/ATAC-seq.qsub
#echo gunzip $outputDIR/trimmomatic/*.gz >> $outputDIR/ATAC-seq.qsub
#echo bash $scriptDIR/auyar/bwa_trimmomatic.sh ../ >> $outputDIR/ATAC-seq.qsub
#ls $outputDIR/trimmomatic
#for f in $(find $outputDIR/trimmomatic/ -name *R1_001.trim.fastq.gz)
#do
#fileName2=$(basename "${f}" | sed 's/R1_001\.trim\.fastq/R2_001\.trim\.fastq/g')
#fileNameSAM=$(basename "${f}" | sed 's/R1_001\.trim\.fastq/hg19\.sam/g')
#fileNameMetrics=$(basename "${f}" | sed 's/R1_001\.trim\.fastq/metrics\.txt/g')
#echo /opt/compsci/bwa/0.7.12/bin/bwa mem -M /data/shared/genomes/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/genome.fa $f $outputDIR/trimmomatic/$fileName2 > $outputDIR/trimmomatic/bwa/$fileNameSAM
#done
#echo bash $scriptDIR/auyar/macs2_bwa_trimmomatic.sh ../ >> $outputDIR/ATAC-seq.qsub
#echo bash $scriptDIR/auyar/homer_bwa_trimmomatic.sh ../ >> $outputDIR/ATAC-seq.qsub
#echo bash $scriptDIR/auyar/nReads.sh ../ >> $outputDIR/ATAC-seq.qsub
#echo gzip $outputDIR/trimmomatic/*.fastq >> $outputDIR/ATAC-seq.qsub
######
qsub $outputDIR/fastqc.qsub
JOBHOLD="$(qsub $outputDIR/trimmomatic.qsub)"
#qsub -Wdepend=afterokarray:$JOBHOLD $outputDIR/run_ATAC_pipeline.sh
#find $outputDIR/trimmomatic -name *R1_001.trim.fastq.gz > $outputDIR/bwa_filelist.txt
#FILENUMBER=$(wc -l $outputDIR/bwa_R1_filelist.txt | cut -d' ' -f1)
#mkdir $outputDIR/trimmomatic/bwa
#echo \#!/bin/bash >> $outputDIR/bwa.qsub
#echo \#PBS -l nodes=1:ppn=16 >> $outputDIR/bwa.qsub
#echo \#PBS -l walltime=48:00:00 >> $outputDIR/bwa.qsub
#echo \#PBS -N ATAC-seq-bwa >> $outputDIR/bwa.qsub
#echo \#PBS -t 1-$FILENUMBER >> $outputDIR/bwa.qsub
#echo module load python >> $outputDIR/bwa.qsub
#echo module load R >> $outputDIR/bwa.qsub
#echo module load perl/5.10.1 >> $outputDIR/bwa.qsub
#echo module load samtools/0.1.19 >> $outputDIR/bwa.qsub
#echo module load bedtools >> $outputDIR/bwa.qsub
#echo FILE=\$\(head -n \$PBS_ARRAYID $outputDIR/bwa_filelist.txt \| tail -1\) >> $outputDIR/bwa.qsub
#echo FILE2=\$\(basename "\${FILE}"\| sed \'s/R1_001\.trim\.fastq\.gz/R2_001\.trim\.fastq\.gz/g\'\) >> $outputDIR/bwa.qsub
#echo FILESAM=$FILE_hg19.sam >> $outputDIR/bwa.qsub
#echo /opt/compsci/bwa/0.7.12/bin/bwa mem -M /data/shared/genomes/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/genome.fa $outputDIR/trimmomatic/\$FILE $outputDIR/trimmomatic/\$FIILE2 > $outputPath/trimmomatic/bwa/\$FILESAM >> $outputDIR/bwa.qsub
#qsub -Wdepend=afterokarray:$JOBHOLD $outputDIR/run_ATAC_pipeline.sh