-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Xu Ren
committed
Feb 13, 2020
0 parents
commit c250c57
Showing
22 changed files
with
1,803 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
Package: RNAAgeCalc | ||
Type: Package | ||
Title: A multi-tissue transcriptional age calculator | ||
Version: 0.99.1 | ||
Authors@R: c( | ||
person("Xu", "Ren", | ||
email = "xuren2120@gmail.com", role = c("aut", "cre")), | ||
person("Pei Fen", "Kuan", | ||
email = "peifen.kuan@stonybrook.edu", role = "aut")) | ||
Description: It has been shown that both DNA methylation and RNA transcription | ||
are linked to chronological age and age related diseases. Several | ||
estimators have been developed to predict human aging from DNA level and | ||
RNA level. Most of the human transcriptional age predictor are based on | ||
microarray data and limited to only a few tissues. To date, transcriptional | ||
studies on aging using RNASeq data from different human tissues is limited. | ||
The aim of this package is to provide a tool for across-tissue and | ||
tissue-specific transcriptional age calculation based on GTEx RNASeq data. | ||
License: GPL-2 | ||
Encoding: UTF-8 | ||
Imports: | ||
ggplot2, | ||
impute, | ||
AnnotationDbi, | ||
org.Hs.eg.db, | ||
stats | ||
Depends: | ||
R (>= 3.5) | ||
Suggests: | ||
knitr, | ||
rmarkdown, | ||
testthat | ||
RoxygenNote: 6.1.0 | ||
VignetteBuilder: knitr | ||
biocViews: RNASeq,GeneExpression |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
export(count2FPKM) | ||
export(makeplot) | ||
export(predict_age) | ||
import(org.Hs.eg.db) | ||
importFrom(AnnotationDbi,select) | ||
importFrom(ggplot2,aes) | ||
importFrom(ggplot2,element_text) | ||
importFrom(ggplot2,geom_point) | ||
importFrom(ggplot2,geom_smooth) | ||
importFrom(ggplot2,ggplot) | ||
importFrom(ggplot2,ggtitle) | ||
importFrom(ggplot2,theme) | ||
importFrom(ggplot2,theme_bw) | ||
importFrom(ggplot2,xlab) | ||
importFrom(ggplot2,ylab) | ||
importFrom(impute,impute.knn) | ||
importFrom(stats,lm) | ||
importFrom(stats,na.omit) | ||
importFrom(stats,residuals) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
|
||
#' @title Converting gene expression data from raw count to FPKM | ||
#' @description This function converts gene expression data from raw count to | ||
#' FPKM. | ||
#' | ||
#' @param rawcount a matrix or data frame which contains gene expression count | ||
#' data. | ||
#' @param genelength gene length in bp. The size of `genelength` should be | ||
#' equal to the number of rows in `rawcount`. This argument is optional. If | ||
#' not provided, gene length is obtained from the internal database. | ||
#' @param idtype a string which indicates the gene id type in rawcount matrix. | ||
#' It should be one of "SYMBOL", "ENSEMBL", "ENTREZID" or "REFSEQ". | ||
#' Default is "SYMBOL". | ||
#' @export | ||
#' @import org.Hs.eg.db | ||
#' @importFrom AnnotationDbi select | ||
#' @return a data frame contains FPKM. | ||
#' @references Carlson M (2019). org.Hs.eg.db: Genome wide annotation for | ||
#' Human. R package version 3.8.2. | ||
#' @examples | ||
#' data(countExample) | ||
#' head(rawcount) | ||
#' fpkm = count2FPKM(rawcount) | ||
#' head(fpkm) | ||
|
||
count2FPKM = function(rawcount, genelength = NULL, idtype = "SYMBOL"){ | ||
if(is.null(genelength)){ | ||
if(idtype!="ENSEMBL"){ | ||
## convert the gene id to ENSEMBL to match the gene id in gene | ||
## length database | ||
temp = suppressMessages(select(org.Hs.eg.db, rownames(rawcount), | ||
columns = "ENSEMBL", keytype = idtype)) | ||
outcome = temp[temp$ENSEMBL%in%rownames(lengthDB),] | ||
outcome = outcome[!duplicated(outcome[,idtype]),] | ||
## ind is all the one to one map | ||
ind = which(!outcome$ENSEMBL %in% | ||
outcome$ENSEMBL[duplicated(outcome$ENSEMBL)]) | ||
outcome1 = outcome[ind,] ## outcome1 is all the 1-1 map | ||
|
||
matchid = match(rownames(rawcount),outcome1[,idtype]) | ||
genelength = lengthDB[outcome1$ENSEMBL[matchid],"bp_length"] | ||
}else{ | ||
genelength = lengthDB[rownames(rawcount), "bp_length"] | ||
} | ||
|
||
}else{ | ||
stopifnot(is.vector(genelength)) | ||
stopifnot(is.numeric(genelength)) | ||
if(any(genelength<0, na.rm = TRUE)){ | ||
stop("genelength cannot contain negative value(s).") | ||
} | ||
if(length(genelength)!=nrow(rawcount)){ | ||
stop("The size of genelength vector should be equal to the number | ||
of rows in rawcount.") | ||
} | ||
} | ||
|
||
if(any(is.na(genelength))){ | ||
warning("Can't find gene length for ", | ||
round(sum(is.na(genelength))/nrow(rawcount)*100,4), | ||
"% genes when converting raw count to FPKM.") | ||
} | ||
|
||
countSum = colSums(rawcount) | ||
bg = matrix(countSum, ncol = ncol(rawcount), | ||
nrow = nrow(rawcount), byrow = TRUE) | ||
wid = matrix(genelength, nrow = nrow(rawcount), | ||
ncol = ncol(rawcount), byrow = FALSE) | ||
data.frame(rawcount / (wid/1000) / (bg/1e6)) | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
#' @title An example of FPKM data | ||
#' | ||
#' @description An example of FPKM data | ||
#' @docType data | ||
#' @name fpkm | ||
#' @usage fpkm | ||
#' @format A data frame which contains the fpkm data. Each row is a gene and | ||
#' each column is a sample | ||
#' @keywords datasets | ||
NULL | ||
|
||
#' @title An example of RNASeq count data | ||
#' | ||
#' @description An example of RNASeq count data | ||
#' @docType data | ||
#' @name rawcount | ||
#' @usage rawcount | ||
#' @format A data frame which contains the RNASeq count data. Each row is a | ||
#' gene and each column is a sample | ||
#' @keywords datasets | ||
NULL |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
|
||
|
||
#' @title Make plot to visualize RNA age | ||
#' | ||
#' @description This function makes plots to visualize the relationship | ||
#' between chronological age and RNA age. | ||
#' @param res a data frame returned by `predict_age` function. If the | ||
#' chronological age is not provided when using `predict_age` function, | ||
#' visulization cannot be made. | ||
#' @param main title of the plot | ||
#' @param xlab label of x-axis | ||
#' @param ylab label of y-axis | ||
#' @importFrom ggplot2 ggplot aes geom_point geom_smooth ggtitle xlab ylab | ||
#' theme_bw theme element_text | ||
#' @export | ||
#' @return the plot which shows RNA age vs chronological age | ||
#' @examples | ||
#' data(fpkmExample) | ||
#' fpkm_large = cbind(fpkm, fpkm, fpkm, fpkm) | ||
#' fpkm_large = cbind(fpkm_large, fpkm_large, fpkm_large, fpkm_large) | ||
#' colnames(fpkm_large) = paste0("sample",1:32) | ||
#' chronage = data.frame(sampleid = colnames(fpkm_large), age = 1:32) | ||
#' res = predict_age(exprdata = fpkm_large, exprtype = "FPKM", | ||
#' chronage = chronage) | ||
#' makeplot(res) | ||
|
||
makeplot = function(res, main = "RNA age vs chronological age", | ||
xlab = "chronological age", ylab = "RNA Age"){ | ||
if(!"ChronAge"%in%colnames(res)){ | ||
stop("Chronological age is not found in res data frame.") | ||
} | ||
if(nrow(na.omit(res))<30){ | ||
warning("Less than 30 samples. The linear regression on RNA age vs | ||
chronological age may not be reliable.") | ||
} | ||
ggplot(res, aes(x=ChronAge, y=RNAAge)) + geom_point(shape=19,size=1) + | ||
geom_smooth(method=lm) + ggtitle(main) + xlab(xlab) + ylab(ylab) + | ||
theme_bw() + theme(plot.title = element_text(hjust = 0.5)) | ||
} | ||
|
||
|
Oops, something went wrong.