|
| 1 | +#!/usr/bin/env python |
| 2 | +import sys |
| 3 | +import os |
| 4 | +import basic |
| 5 | +import glob |
| 6 | +import re |
| 7 | + |
| 8 | +# Global variables |
| 9 | +jobName="STAR" |
| 10 | +samplesDir="/data/chilab/RNAseq_2015-07" |
| 11 | +slurmDir="/data/chilab/bill/slurm-STAR" |
| 12 | +starIndex="/data/chilab/bill/STAR-index" |
| 13 | +fastqFiles="/data/chilab/bill/STAR" |
| 14 | +outputDir="/data/chilab/bill/sam" |
| 15 | +sjdbOverhang=125 |
| 16 | +numThreads=8 |
| 17 | +memory=40000 |
| 18 | +STAR="/data/reddylab/software/STAR_2.4.2a/STAR-STAR_2.4.2a/bin/Linux_x86_64/STAR"; |
| 19 | + |
| 20 | +# Make output directories |
| 21 | +if(not os.path.exists(slurmDir)): |
| 22 | + os.makedirs(slurmDir) |
| 23 | +if(not os.path.exists(outputDir)): |
| 24 | + os.makedirs(outputDir); |
| 25 | + |
| 26 | +# Get list of sample directories |
| 27 | +samples=glob.glob(samplesDir+"/Sample_*") |
| 28 | + |
| 29 | +# Process each sample |
| 30 | +jobID=1 |
| 31 | +for sample in samples: |
| 32 | + match=re.search("(Sample_\S+)",sample); id=match.group(0) |
| 33 | + outfile=slurmDir+"/"+id+".slurm" |
| 34 | + OUT=open(outfile,"w") |
| 35 | + header="\n".join(["#!/bin/bash", |
| 36 | + "#", |
| 37 | + "#SBATCH -J %(jobName)s%(jobID)i" % locals(), |
| 38 | + "#SBATCH -o %(jobName)s%(jobID)i.output" % locals(), |
| 39 | + "#SBATCH -e %(jobName)s%(jobID)i.output" % locals(), |
| 40 | + "#SBATCH -A %(jobName)s%(jobID)i" % locals(), |
| 41 | + "#SBATCH --mem %(memory)i" %locals(), |
| 42 | + "#SBATCH --cpus-per-task=%(numThreads)s" %locals(), |
| 43 | + "#"]) |
| 44 | + print >>OUT, header |
| 45 | + #print >>OUT, "cd "+outputDir |
| 46 | + print >>OUT, "cd "+outputDir |
| 47 | + |
| 48 | + # Process each file |
| 49 | + files=glob.glob(sample+"/*.fastq.gz") |
| 50 | + for file in files: |
| 51 | + match=re.search("([^/]+)\s*$",file); |
| 52 | + if(match is None): sys.exit("can't parse filename") |
| 53 | + fileNoPath=match.group(1) |
| 54 | + match=re.search("(\S+_R)([12])(_\S+.fastq.gz)",fileNoPath); |
| 55 | + if(match is None): sys.exit("can't parse paired file indicator: "+fileNoPath) |
| 56 | + prefix=match.group(1) |
| 57 | + R=int(match.group(2)) |
| 58 | + suffix=match.group(3) |
| 59 | + if(R!=1): continue |
| 60 | + firstFile=fastqFiles+"/"+fileNoPath |
| 61 | + secondFile=fastqFiles+"/"+prefix+"2"+suffix |
| 62 | + match=re.search("(\S+).fastq.gz",fileNoPath) |
| 63 | + if(match is None): sys.exit("Can't parse filename") |
| 64 | + filestem=match.group(1) |
| 65 | + command=STAR+" --genomeLoad LoadAndKeep --genomeDir %(starIndex)s --readFilesIn %(firstFile)s %(secondFile)s --readFilesCommand zcat --outFileNamePrefix %(filestem)s --outSAMstrandField intronMotif --runThreadN %(numThreads)i" % locals() |
| 66 | + print >>OUT, command |
| 67 | + |
| 68 | + OUT.close() |
| 69 | + jobID=jobID+1 |
| 70 | + |
0 commit comments