forked from DrZiruiJ/R_Code
-
Notifications
You must be signed in to change notification settings - Fork 0
/
TPM
31 lines (27 loc) · 1.05 KB
/
TPM
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
######################################################################
################### TPM #####################
######################################################################
rm(list = ls())#删除目前工作目录的变量
library(xlsx)
library(readxl)
ann<- read_excel("Counts.xlsx")#读取基因文件
input<- read.table("./All_hg19gene_len.txt",header = T)#自己在Excel中把网盘里的txt文件基因和长度共两列提取出来
library(dplyr)
merge<-left_join(datagene,input,by="Gene")#根据基因那列进行合并
merge <- na.omit(merge)#删除错误值行
write.csv(merge,file = "merge.csv",sep = "\t")#读出文件,直接往下运行也许
mycounts<-read.csv("merge.csv")
head(mycounts)
rownames(mycounts)<-mycounts$Gene
mycounts<-mycounts[,-1]
mycounts<-mycounts[,-11]
head(mycounts)#最后一列Length是基因长度
#TPM计算
kb <- mycounts$Length / 1000
kb
countdata <- mycounts[,1:10]
rpk <- countdata / kb
rpk
tpm <- t(t(rpk)/colSums(rpk) * 1000000)
head(tpm)
write.table(tpm,file="tpm.xls",sep="\t",quote=F)