-
Notifications
You must be signed in to change notification settings - Fork 0
/
1_feature_count.R
38 lines (27 loc) · 1.33 KB
/
1_feature_count.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
library(Rsubread)
library(rtracklayer)
library(Rsamtools)
library(GenomicRanges)
library(GenomicAlignments)
dirs <- list.dirs("/SAN/Eimeria_Totta/tophat_March_c", recursive=FALSE)
bams <- sapply(dirs, list.files, "accepted_hits.bam", full.names=TRUE)
FC <- featureCounts(bams,
annot.ext="/SAN/Eimeria_Totta/reference_genomes/mm10_eimeria_merge/indexes_bowtie2/mm10_GRCm38_eimeriaHaberkorn_fixed.gtf",
isGTFAnnotationFile=TRUE,
isPairedEnd=TRUE,
nthreads=20)
colnames(FC$counts) <- gsub(".*?tophat_March_c\\.(.*?_rep\\d).*", "\\1", colnames(FC$counts))
## The phenotype information stored in fastq and bam file/folder names
## is inconsistent in that the following samples lack challenge/first
## infection information compared to e.g. NMRI_2ndInf_0dpi_rep1
## They are 1st_Inf and 2nd_Inf controls a lacking for these
## conditions.
colnames(FC$counts)[colnames(FC$counts)%in%"Rag_0dpi_rep1"] <-
"Rag_1stInf_0dpi_rep1"
colnames(FC$counts)[colnames(FC$counts)%in%"Rag_0dpi_rep2"] <-
"Rag_1stInf_0dpi_rep2"
colnames(FC$counts)[colnames(FC$counts)%in%"C57BL6_0dpi_rep1"] <-
"C57BL6_1stInf_0dpi_rep1"
colnames(FC$counts)[colnames(FC$counts)%in%"C57BL6_0dpi_rep2"] <-
"C57BL6_1stInf_0dpi_rep2"
write.table(FC$counts, "output_data/RC_All_genes.csv", sep=",")