-
Notifications
You must be signed in to change notification settings - Fork 0
/
SmokingPredictions_Cotinine_4.15.2021.Rmd
106 lines (75 loc) · 4.44 KB
/
SmokingPredictions_Cotinine_4.15.2021.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
---
title: "Current Smoking Prediction"
output: html_document
---
Prediction from deep learning model for current smoking generated by Zifeng Wang and transferred December 2019.
Starting datafiles:
smoking_prediction.tsv
isoform_estimation.tsv
```{r setup,include=F}
library(data.table)
library(vioplot)
library(knitr)
library(psych)
library(beeswarm)
library(pROC)
library(OptimalCutpoints)
```
```{r tab1,echo=F}
knitr::opts_chunk$set(echo = TRUE)
#m<-fread("/Users/petercastaldi/Desktop/COPDGene/RNASEQ/MasterFiles/master.file.freeze3.txt")
phe<-fread("/Users/petercastaldi/Desktop/COPDGene/Visit2/COPDGene_P1P2_All_Visit_25mar19.txt",data.table=F)
cbc<-fread("/Users/petercastaldi/Desktop/COPDGene/Visit2/COPDGene_P1P2_All_Visit_28feb19_CBCQC.txt",data.table=F)
phe2<-phe[phe$visitnum==2,]
cat("\nDimension of COPD Visit 2 phenotypes: ",dim(phe2))
d1<-fread("/Users/petercastaldi/Dropbox (Partners HealthCare)/Research/Others/Zifeng/prediction_data/dataset_split.tsv")
d2<-fread("/Users/petercastaldi/Dropbox (Partners HealthCare)/Research/Others/Zifeng/prediction_data/COPDGeneTest_actual_ids.csv")
allsubj<-c(d1$SubjectID,d2$actual_id)
length(unique(allsubj))
```
```{r select, echo=F}
predid<-fread("/Users/petercastaldi/Dropbox (Partners HealthCare)/Research/Others/Zifeng/prediction_data/COPDGeneTest_actual_ids.csv")
cat("\nNumber of unique ids in the test dataset = ",length(unique(predid$actual_id)))
exonmapfs<-fread("/Users/petercastaldi/Dropbox (Partners HealthCare)/Research/Others/Zifeng/prediction_data/final_results/Exon_Isoform_Map_GTF_FS.tsv")
exonmapfs$sid<-predid
names(exonmapfs)[1]<-"exon_iml_fsl"
all<-merge(phe2[,c("sid","SmokCigNow")],exonmapfs,by="sid")
all$smoke<-ifelse(all$SmokCigNow==1,"current","former")
cat("\nPerformance of exon model in entire test data, Youden index: ")
summary(optimal.cutpoints(exon_iml_fsl~smoke,data=all,methods="Youden",tag.healthy = "former"))
cat("\nPerformance of exon model in entire test data, specificity of 98%: ")
summary(optimal.cutpoints(exon_iml_fsl~smoke,data=all,methods="MinValueSp",tag.healthy = "former",control=control.cutpoints(valueSp=0.98)))
lmmetab<-fread("/Users/petercastaldi/Desktop/COPDGene/Metabolomics/COPDGene_Metabolon_Norm_P2_2018-12-17.txt")
names(metab)[1]<-"sid"
all<-merge(all,metab[,c("sid","cotinine")],by="sid")
all$cotinine[is.na(all$cotinine)]<-0
cat("\nNumber of unique subjects in the test data with SOMAscan metabolite measurements = ",length(unique(all$sid)))
roc_exonmapfs<-roc(smoke~exon_iml_fsl,data=all,smooth=TRUE,plot=TRUE)
roc_exonmapfs
cat("\nPerformance of exon model in 106 subjects with overlapping cotinine data: ")
summary(optimal.cutpoints(exon_iml_fsl~smoke,data=all,methods="Youden",tag.healthy = "former"))
roc_cotinine<-roc(smoke~cotinine,data=all,smooth=TRUE,plot=TRUE)
roc_cotinine
cat("\nPerformance of cotinine model: ")
summary(optimal.cutpoints(cotinine~smoke,data=all,methods="Youden",tag.healthy = "former"))
cat("\nDeLong test for exon versus cotinine:")
roc.test(roc(smoke~exon_iml_fsl,data=all,smooth=FALSE),roc(smoke~cotinine,data=all,smooth=FALSE),method="delong")
png("/Users/petercastaldi/Dropbox (Partners HealthCare)/Research/Others/Zifeng/PJC/FIGURES/cotinine_violin_4.20.21.png",width=1920,height=1920,res=300)
par(cex.lab=1.4, cex.axis=1,mar=c(5.1, 5.1, 4.1, 2.1))
vioplot(cotinine~smoke,data=all,ylab="Cotinine (relative fluorescent units)",xlab="Smoking Status")
x<-dev.off()
png("/Users/petercastaldi/Dropbox (Partners HealthCare)/Research/Others/Zifeng/PJC/FIGURES/exon_iml_fs_violin_4.20.21.png",width=1920,height=1920,res=300)
par(cex.lab=1.4, cex.axis=1,mar=c(5.1, 5.1, 4.1, 2.1))
vioplot(exon_iml_fsl~smoke,data=all,ylab="Predicted values: exon IML-GTF FSL model",xlab="Smoking Status")
x<-dev.off()
png("/Users/petercastaldi/Dropbox (Partners HealthCare)/Research/Others/Zifeng/PJC/FIGURES/ROC_cotinine_versus_exonimlfsl_4.20.21.png",width=1920,height=1920,res=300)
plot(roc_cotinine,cex.lab=1.4,cex.axis=1.4)
plot(roc_exonmapfs,add=TRUE,col="red",print.auc=FALSE)
legend("bottomright",c("cotinine","E-I map + FS"),pch=19,col=c("black","red"),xjust=1,cex=1.4)
x<-dev.off()
# tiff("/Users/petercastaldi/Dropbox (Partners HealthCare)/Research/Others/Zifeng/PJC/FIGURES/ROC_cotinine_versus_exonimlfsl_4.20.21.tiff",width=1920,height=1920,res=300)
# plot(roc_cotinine,cex.lab=1.4,cex.axis=1.4)
# plot(roc_exonmapfs,add=TRUE,col="red",print.auc=FALSE)
# legend("bottomright",c("cotinine","E-I map + FS"),pch=19,col=c("black","red"),xjust=1,cex=1.4)
# x<-dev.off()
```