-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathVersion2_Experiment.Rmd
99 lines (81 loc) · 3.51 KB
/
Version2_Experiment.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
---
title: "R Notebook"
output: html_notebook
---
```{r load_libs, message=FALSE, warning=FALSE}
library(tidyverse)
library(limma)
```
```{r make_data}
data.timepoints <- 12
data.treatments <- 2
data.cell.lines <- 9
data.replicates <- 6
data.batch.size <- 72
data.conditions <- data.timepoints * data.treatments * data.cell.lines
data.rows <- 1e3
data.cols <- data.conditions * data.replicates
data.no.batches <- data.cols / data.batch.size
data.no.samples <- data.timepoints * data.treatments * data.cell.lines * data.replicates
pheno.treatment <- paste0("TRT", LETTERS[1:data.treatments]) %>%
rep(.,each = data.no.samples/data.treatments)
pheno.timepts <- paste0("TP", LETTERS[1:data.timepoints]) %>%
rep(.,data.no.samples/data.timepoints)
pheno.replicate <- rep(1:data.replicates, each = data.timepoints) %>%
rep(., data.treatments * data.cell.lines)
pheno.cell.line <- paste0("CL", LETTERS[1:data.cell.lines]) %>%
rep(., each = data.timepoints * data.replicates) %>%
rep(., 2)
pheno.table <- data.frame(Sample = paste0("SAM",1:data.no.samples)) %>%
mutate(Treatment = pheno.treatment,
Time.Point = pheno.timepts,
Replicate = pheno.replicate,
Cell.Line = pheno.cell.line,
SampleType = paste0(Treatment, "_", Time.Point, "_", Cell.Line)) %>%
arrange(SampleType)
data.mat <- rnorm(data.rows*data.cols, 7, 2) %>%
matrix(ncol = data.cols) %>%
`colnames<-`(pheno.table$Sample) %>%
`rownames<-`(paste0("PROBE_",1:data.rows))
pheno.table
```
```{r calculate_batches}
data.prop.split <- .5 # Proportion of replicate splits
data.batch.size <- 72 # Number of samples that can be ran on a single run
batch.bins <- nrow(pheno.table)/data.batch.size
keep.seq <- c(1:3,7:9,13:15,19:21,25:27,31:33)
samples.avail <- pheno.table %>% group_by(SampleType) %>% summarise(N = n())
pheno.table$Batch <- 0
for(i in 1:batch.bins) {
curr.batch.levels <- 6
idx.end <- i*data.batch.size
idx.start <- (idx.end-data.batch.size)+1
idx.range <- idx.start:idx.end
idx.keep <- keep.seq[1:((data.replicates*data.prop.split)*curr.batch.levels)] + idx.start - 1
pheno.table[idx.range,]$Batch <- i
pheno.table[idx.keep,]$Batch <- i-1
}
pheno.table <- pheno.table %>% mutate(Batch = ifelse(Batch == 0,1,Batch),
Batch = as.factor(Batch))
pheno.table %>% group_by(Batch) %>% summarise(N = n())
```
```{r fit_model}
data.conditions <- pheno.table$SampleType %>% unique
design <- model.matrix(~0 + SampleType + Batch, data = pheno.table)
colnames(design) <- colnames(design) %>% gsub("SampleType","",.)
contrasts <- combn(length(data.conditions),2) %>% t
for(i in 1:nrow(contrasts)) {
contrasts[i,1] <- data.conditions[as.numeric(contrasts[i,1])]
contrasts[i,2] <- data.conditions[as.numeric(contrasts[i,2])]
}
contrasts <- paste0(contrasts[,1], "-", contrasts[,2]) %>%
`names<-`(gsub(" - ","_Vs_",.)) %>% .[1:20] %>%
makeContrasts(contrasts = ., levels = colnames(design))
fit <- lmFit(data.mat, design) %>%
contrasts.fit(contrasts = contrasts) %>%
eBayes
```
```{r run_test}
tt <- topTable(fit, coef = colnames(contrasts)[4])
tt
```