Skip to content

Commit

Permalink
updated
Browse files Browse the repository at this point in the history
  • Loading branch information
Xu Ren committed Feb 13, 2020
0 parents commit c250c57
Show file tree
Hide file tree
Showing 22 changed files with 1,803 additions and 0 deletions.
34 changes: 34 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
Package: RNAAgeCalc
Type: Package
Title: A multi-tissue transcriptional age calculator
Version: 0.99.1
Authors@R: c(
person("Xu", "Ren",
email = "xuren2120@gmail.com", role = c("aut", "cre")),
person("Pei Fen", "Kuan",
email = "peifen.kuan@stonybrook.edu", role = "aut"))
Description: It has been shown that both DNA methylation and RNA transcription
are linked to chronological age and age related diseases. Several
estimators have been developed to predict human aging from DNA level and
RNA level. Most of the human transcriptional age predictor are based on
microarray data and limited to only a few tissues. To date, transcriptional
studies on aging using RNASeq data from different human tissues is limited.
The aim of this package is to provide a tool for across-tissue and
tissue-specific transcriptional age calculation based on GTEx RNASeq data.
License: GPL-2
Encoding: UTF-8
Imports:
ggplot2,
impute,
AnnotationDbi,
org.Hs.eg.db,
stats
Depends:
R (>= 3.5)
Suggests:
knitr,
rmarkdown,
testthat
RoxygenNote: 6.1.0
VignetteBuilder: knitr
biocViews: RNASeq,GeneExpression
21 changes: 21 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Generated by roxygen2: do not edit by hand

export(count2FPKM)
export(makeplot)
export(predict_age)
import(org.Hs.eg.db)
importFrom(AnnotationDbi,select)
importFrom(ggplot2,aes)
importFrom(ggplot2,element_text)
importFrom(ggplot2,geom_point)
importFrom(ggplot2,geom_smooth)
importFrom(ggplot2,ggplot)
importFrom(ggplot2,ggtitle)
importFrom(ggplot2,theme)
importFrom(ggplot2,theme_bw)
importFrom(ggplot2,xlab)
importFrom(ggplot2,ylab)
importFrom(impute,impute.knn)
importFrom(stats,lm)
importFrom(stats,na.omit)
importFrom(stats,residuals)
71 changes: 71 additions & 0 deletions R/count2FPKM.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@

#' @title Converting gene expression data from raw count to FPKM
#' @description This function converts gene expression data from raw count to
#' FPKM.
#'
#' @param rawcount a matrix or data frame which contains gene expression count
#' data.
#' @param genelength gene length in bp. The size of `genelength` should be
#' equal to the number of rows in `rawcount`. This argument is optional. If
#' not provided, gene length is obtained from the internal database.
#' @param idtype a string which indicates the gene id type in rawcount matrix.
#' It should be one of "SYMBOL", "ENSEMBL", "ENTREZID" or "REFSEQ".
#' Default is "SYMBOL".
#' @export
#' @import org.Hs.eg.db
#' @importFrom AnnotationDbi select
#' @return a data frame contains FPKM.
#' @references Carlson M (2019). org.Hs.eg.db: Genome wide annotation for
#' Human. R package version 3.8.2.
#' @examples
#' data(countExample)
#' head(rawcount)
#' fpkm = count2FPKM(rawcount)
#' head(fpkm)

count2FPKM = function(rawcount, genelength = NULL, idtype = "SYMBOL"){
if(is.null(genelength)){
if(idtype!="ENSEMBL"){
## convert the gene id to ENSEMBL to match the gene id in gene
## length database
temp = suppressMessages(select(org.Hs.eg.db, rownames(rawcount),
columns = "ENSEMBL", keytype = idtype))
outcome = temp[temp$ENSEMBL%in%rownames(lengthDB),]
outcome = outcome[!duplicated(outcome[,idtype]),]
## ind is all the one to one map
ind = which(!outcome$ENSEMBL %in%
outcome$ENSEMBL[duplicated(outcome$ENSEMBL)])
outcome1 = outcome[ind,] ## outcome1 is all the 1-1 map

matchid = match(rownames(rawcount),outcome1[,idtype])
genelength = lengthDB[outcome1$ENSEMBL[matchid],"bp_length"]
}else{
genelength = lengthDB[rownames(rawcount), "bp_length"]
}

}else{
stopifnot(is.vector(genelength))
stopifnot(is.numeric(genelength))
if(any(genelength<0, na.rm = TRUE)){
stop("genelength cannot contain negative value(s).")
}
if(length(genelength)!=nrow(rawcount)){
stop("The size of genelength vector should be equal to the number
of rows in rawcount.")
}
}

if(any(is.na(genelength))){
warning("Can't find gene length for ",
round(sum(is.na(genelength))/nrow(rawcount)*100,4),
"% genes when converting raw count to FPKM.")
}

countSum = colSums(rawcount)
bg = matrix(countSum, ncol = ncol(rawcount),
nrow = nrow(rawcount), byrow = TRUE)
wid = matrix(genelength, nrow = nrow(rawcount),
ncol = ncol(rawcount), byrow = FALSE)
data.frame(rawcount / (wid/1000) / (bg/1e6))

}
21 changes: 21 additions & 0 deletions R/exampledata.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#' @title An example of FPKM data
#'
#' @description An example of FPKM data
#' @docType data
#' @name fpkm
#' @usage fpkm
#' @format A data frame which contains the fpkm data. Each row is a gene and
#' each column is a sample
#' @keywords datasets
NULL

#' @title An example of RNASeq count data
#'
#' @description An example of RNASeq count data
#' @docType data
#' @name rawcount
#' @usage rawcount
#' @format A data frame which contains the RNASeq count data. Each row is a
#' gene and each column is a sample
#' @keywords datasets
NULL
41 changes: 41 additions & 0 deletions R/makeplot.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@


#' @title Make plot to visualize RNA age
#'
#' @description This function makes plots to visualize the relationship
#' between chronological age and RNA age.
#' @param res a data frame returned by `predict_age` function. If the
#' chronological age is not provided when using `predict_age` function,
#' visulization cannot be made.
#' @param main title of the plot
#' @param xlab label of x-axis
#' @param ylab label of y-axis
#' @importFrom ggplot2 ggplot aes geom_point geom_smooth ggtitle xlab ylab
#' theme_bw theme element_text
#' @export
#' @return the plot which shows RNA age vs chronological age
#' @examples
#' data(fpkmExample)
#' fpkm_large = cbind(fpkm, fpkm, fpkm, fpkm)
#' fpkm_large = cbind(fpkm_large, fpkm_large, fpkm_large, fpkm_large)
#' colnames(fpkm_large) = paste0("sample",1:32)
#' chronage = data.frame(sampleid = colnames(fpkm_large), age = 1:32)
#' res = predict_age(exprdata = fpkm_large, exprtype = "FPKM",
#' chronage = chronage)
#' makeplot(res)

makeplot = function(res, main = "RNA age vs chronological age",
xlab = "chronological age", ylab = "RNA Age"){
if(!"ChronAge"%in%colnames(res)){
stop("Chronological age is not found in res data frame.")
}
if(nrow(na.omit(res))<30){
warning("Less than 30 samples. The linear regression on RNA age vs
chronological age may not be reliable.")
}
ggplot(res, aes(x=ChronAge, y=RNAAge)) + geom_point(shape=19,size=1) +
geom_smooth(method=lm) + ggtitle(main) + xlab(xlab) + ylab(ylab) +
theme_bw() + theme(plot.title = element_text(hjust = 0.5))
}


Loading

0 comments on commit c250c57

Please sign in to comment.