-
Notifications
You must be signed in to change notification settings - Fork 0
/
4.2-CNV_analysis.Rmd
124 lines (95 loc) · 3.7 KB
/
4.2-CNV_analysis.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
---
title: "CNV_analysis"
author: "Massimo Andreatta & Laura Yerly"
output: html_document
date: "2024-02-06"
---
```{r setup, include=FALSE,fig.width=16,fig.height=12}
renv::restore()
library(Seurat)
library(infercnv)
```
# Set path
```{r}
file_clean <- "cache/WHexp_annotated.rds"
seu <- readRDS(file_clean)
```
```{r}
table(seu$Sample, seu$patient_bcc)
table(seu$Sample, seu$annotation)
```
# Isolate tumor cells, normal keratinocytes and T cells for CNV analysis
```{r}
sub <- subset(seu, annotation %in% c("Tcell","Cancer_cells","Normal_Kerat"))
tab <- table(sub$Sample, sub$annotation)
tab
```
Focus on samples with enough cells
```{r eval=F}
ids <- tab[,"Cancer_cells"] >= 50 | tab[,"Normal_Kerat"] >= 50
pass <- names(ids)[ids]
sub <- subset(sub, subset=Sample %in% pass)
```
Downsample, for tests
```{r}
sub.list <- SplitObject(sub, split.by = "Sample")
sub.list.ds <- lapply(sub.list, function(x) {
Idents(x) <- "annotation"
subset(x, downsample = 2000) #limit number of cells for largest group
})
sub.ds <- Reduce(merge, sub.list.ds)
table(sub.ds$Sample, sub.ds$annotation)
```
# InferCNV analysis
Generate gene position file from cellranger gtf: <https://github.com/broadinstitute/inferCNV/wiki/instructions-create-genome-position-file> got the python script on infercnv github and ran on Rstudio Terminal: python3 ../scripts/gtf_to_position_file.py --attribute_name gene_name /export/scratch/twyss/FKuonen_group/BCC_scRNAseq/refdata-gex-GRCh38-2020-A/genes/genes.gtf refdata-gex-GRCh38-2020-A_gen_pos.txt
```{r}
gene_order_file.path<-"_aux/refdata-gex-GRCh38-2020-A_gen_pos.txt"
outdir <- "cache/inferCNV_BCC_all_WH_TumKerT"
options("Seurat.object.assay.version" = "v3")
annot <- sub.ds@meta.data[,c("annotation","Sample")]
infercnv <- CreateInfercnvObject(raw_counts_matrix = sub.ds[["RNA"]]$counts,
annotations_file = annot,
gene_order_file = gene_order_file.path ,
ref_group_names = c("Tcell"))
options(scipen = 100)
infercnv <- infercnv::run(infercnv,
cutoff=0.1, #threshold on gene expression
out_dir=outdir,
cluster_by_groups = FALSE,
denoise=TRUE,
leiden_resolution = 10^(-10),
hclust_method = "ward.D2",
num_threads = 8,
HMM=TRUE)
```
Interpret results
```{r eval=T}
scores=apply(infercnv@expr.data, 2, function(x){ sum(x < 0.9 | x > 1.1)/length(x) })
sub.ds$CNVpct <- scores
VlnPlot(sub.ds, features=c("CNVpct"), group.by = "annotation")
```
Run InferCNV separately for each sample
```{r}
seu.list <- SplitObject(sub, split.by = "patient_bcc")
infercnv.list <- lapply(names(seu.list), function(x) {
obj <- seu.list[[x]]
Idents(sub) <- "annotation"
outdir <- sprintf("cache/inferCNV_%s_WH_TumKerT",x)
annot <- as.data.frame(obj$annotation)
infercnv.this <- CreateInfercnvObject(raw_counts_matrix = obj[["RNA"]]$counts,
annotations_file = annot,
gene_order_file = gene_order_file.path ,
ref_group_names = c("Tcell"))
options(scipen = 100)
infercnv.this <- infercnv::run(infercnv.this,
cutoff=0.1, #threshold on gene expression
out_dir=outdir,
cluster_by_groups = FALSE,
denoise=TRUE,
leiden_resolution = 10^(-10),
hclust_method = "ward.D2",
num_threads = 8,
HMM=TRUE)
infercnv.this
})
```