-
Notifications
You must be signed in to change notification settings - Fork 2
/
Heatmap
190 lines (135 loc) · 6.19 KB
/
Heatmap
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
######################################################################
################### Heatmap #####################
######################################################################
#install.packages("pheatmap")
BiocManager::install("ComplexHeatmap")
library(pheatmap)
library(BiocManager)
library(tidyverse)
library(dplyr)
library(DESeq2)
library(DESeq2)
library(xlsx)
#整理出后面需要的expr_Sig,annotation_col和annotation_row
data1<-read.table("./gene.txt",sep="\t",header=F)
data3<-data1[data1$Sig!="Not",]
data2<-as.data.frame(data3$id)
data4<-read.table("./DESeq2-Express.gene.txt",sep="\t",header=T)
colnames(data1)<-c("ID")
data6<-read.table("./case-control-differ-pvalue-0.05-FC-2.gene.xls",sep="\t",header=T)
expr_Sig <- inner_join( data1,data4,
by = "ID")
row.names(expr_Sig)<-expr_Sig$ID
expr_Sig<-expr_Sig[,-1] #整理出矩阵模式,即基因名为行名,样本名为列名
######整理出annotation_col和annotation_row#######
data5<-as.data.frame(t(expr_Sig[,-1]) )
data5<-data5%>%mutate(Type="Ischemia")
data5<-data5%>%select("Type")
data5[6:10,1]<-c("Normal")
#as.factor(data5$Type)
annotation_col<-data5
data8<-expr_Sig
data3<-read.table("./siggene.txt",sep="\t",header=T)
row.names(data8)<-expr_Sig[,1]
annotation_row<-data8%>%select(GeneClass)
colnames(annotation_row)<-c("GeneClass")
#as.factor(annotation_row$GeneClass)
write.csv(annotation_row,"annotation_row.csv")
write.csv(annotation_col,"datTraits.csv")
data6<-read.table("./NRGexp.txt",sep="\t",header=T)
data7<-data6[data6$A1!=0,]
data8<-read.table("./NRGexp.txt",sep="\t",header=T)
data9<-data8[data8$padj<=0.05,]
summary(data8)
data7<-data6%>%select("gene_id","up_down")
colnames(data7)<-c("ID","GeneClass")
expr_Sig <- inner_join( data7,data4,
by = "ID")
row.names(data4)<-data4$ID ##注意要用基因名作为行名
data4<-data4[,-1]
expr_Sig<-expr_Sig[,-11]
row.names(expr_Sig)<-expr_Sig$ID
as.data.frame(expr_Sig)
data11<-data10%>%select("GeneClass")
annotation_row<-data11
expr_Sig<-data8[,3:12]
#画图
pheatmap(data4)
#是否显示行的聚类,cluster_col同理
pheatmap(data4, cluster_col=T)
#切分热图
pheatmap(expr_Sig, cluster_rows = F, gaps_row = c(10, 14), cluster_cols = T,
cutree_col = 4)
#是否显示图例
pheatmap(expr_Sig, legend = FALSE)
#legend_breaks设置图例的显示范围,间隔为1;legend_labels重写刻度的标签, 需与legend_breaks同时使用。
pheatmap(expr_Sig,legend_breaks = -1:4)
#kmeans是一种聚类算法,详见https://www.cnblogs.com/bourneli/p/3645049.html
pheatmap(expr_Sig, kmeans_k = 3)
#scale标准化
#原始数据中,每个基因表达变化范围对应的数值大小不同,导致图片中色彩变化难以显示基因在不同样本中的变化趋势,
可以对基因在每个样本中基因表达数据进行标准化,使其数值在一定范围内,从而实现热图的优化
pheatmap(expr_Sig, scale = "row")
#pheatmap(expr_Sig, scale = "column")
#pheatmap(expr_Sig, scale = "none")
#聚类线长度优化,可能不一样的算法有不一样的枝长
#算法有'correlation', 'euclidean', 'maximum', 'manhattan', 'canberra', 'binary', 'minkowski'
pheatmap(expr_Sig, clustering_distance_rows = "maximum")
#或clustering_distance_cols
#设置颜色后面,括号里的数字表示梯度,10就是将这三种颜色设置为10个梯度
pheatmap(expr_Sig, color = colorRampPalette(c("navy", "white", "firebrick3"))(10))
#显示色块的数值
pheatmap(expr_Sig, display_numbers = TRUE)
#此外还可添加如下参数
#number_format = "%.3e"表示保留3位小数,且用科学计数法显示
#number_format = "%.3f"表示保留3位小数,用小数显示
pheatmap(expr_Sig, display_numbers = TRUE,number_format = "%.3e")
#调整色块或文本大小
pheatmap(expr_Sig, cellwidth = 5, cellheight = 1, main = "Heatmap", fontsize = 3)
#对行列进行注释
annotation_col <- read.csv(file='datTraits.csv',row.names = 1,
header = TRUE, sep=",", stringsAsFactors = FALSE)
annotation_col
#同理可设置annotation_row
annotation_row <- read.csv(file='annotation_row.csv',row.names = 1,
header = TRUE, sep=",", stringsAsFactors = FALSE)
#自定义注释色块的颜色
ann_colors = list(
Type = c(Ischemia = "#1B9E77", Normal = "#D95F02"),
GeneClass = c(up = "#7570B3",down = "#E7298A")
)
#注意ann_colors是列表
pheatmap(expr_Sig, annotation_col = annotation_col,
annotation_row = annotation_row,annotation_colors = ann_colors)
#最终热图展示
pheatmap(expr_Sig,scale = "row", #clustering_distance_rows = "maximum",则按基因类型划分;
color = colorRampPalette(c("navy", "white", "firebrick3"))(10),
#cellwidth = 18, cellheight = 18, fontsize = 9,
main = "", show_rownames = F, #展不展示基因名称
#参数去掉边框线border=F,
annotation_col = annotation_col,
annotation_row = annotation_row,
annotation_colors = ann_colors,
filename = "heatmapall.pdf")
dev.off()
#复杂注释 结合其他密度图、直方图
library(ComplexHeatmap)
library("GetoptLong")
# Define some graphics to display the distribution of columns
.hist = anno_histogram(expr_Sig, gp = gpar(fill = "lightblue"))
.density = anno_density(expr_Sig, type = "line", gp = gpar(col = "blue"))
ha_mix_top = HeatmapAnnotation(hist = .hist, density = .density)
# Define some graphics to display the distribution of rows
.violin = anno_density(expr_Sig, type = "violin", gp = gpar(fill = "lightblue"), which = "row")
.boxplot = anno_boxplot(expr_Sig, which = "row")
ha_mix_right = HeatmapAnnotation(violin = .violin, bxplt = .boxplot, which = "row",
width = unit(3, "cm"))
# Combine annotation with heatmap
Heatmap(expr_Sig, name = "heatmap", column_names_gp = gpar(fontsize = 8),
top_annotation = ha_mix_top)
#+ ha_mix_right
dev.off()
#heatmap():用于绘制简单热图的函数
#heatmap.2():绘制增强热图的函数
#d3heatmap:用于绘制交互式热图的R包
#ComplexHeatmap:用于绘制、注释和排列复杂热图的R&bioconductor包(非常适用于基因组数据分析)