forked from alaghemandi/CS555_2020
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathModule-6-Example-4.R
79 lines (57 loc) · 1.87 KB
/
Module-6-Example-4.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# setwd("SET THE Working Director to THE PATH TO THIS DIRECTORY")
# install.packages("aod")
# install.packages("pROC")
library(aod)
library(stats)
library(pROC)
data<-read.csv("Datasets/cevent.csv")
attach(data)
# print a small part of the data
head(data)
# multiple logistic regression
data$male <- ifelse(data$sex =="M", 1, 0)
m2<-glm(data$event ~ data$chol + data$male + data$age, family=binomial)
summary(m2)
# ROC curve
# install.package("pROC")
library(pROC)
# using model with chol and sex and age
data$prob <-predict(m2, type=c("response"))
roc.info <-roc(data$event ~ data$prob, legacy.axes=T)
# different thresholds
roc.info$thresholds
roc.df <-data.frame(
tpp=roc.info$sensitivities * 100,
fpp=(1-roc.info$specificities) * 100,
threshholds = roc.info$thresholds
)
# look at the head of this dataframe
head(roc.df)
# look at the tail of this dataframe
tail(roc.df)
# Filter all of the values bigger than 40 and smaller than 80
roc.df[roc.df$tpp > 40 & roc.df$tpp < 80, ]
#############
# Partial ROC curve
############
par(pty="s")
roc(data$event ~ data$prob, plot=TRUE, legacy.axes=T, percent=T,
xlab="False Positive (%)", ylab="True Positive (%)", col="blue", lwd=4,
print.auc=T, print.auc.x=45, partial.auc=c(100, 80),
auc.polygon = T, auc.polygon.col="gray"
)
#
# Compare ROC curves
#
# Let us do another classification like randomForest
# install.packages("randomForest")
library(randomForest)
# Build the model for randomForest
rf.model <- randomForest(formula = factor(event) ~ . , data=data, importance = TRUE)
rf.model
par(pty="s")
# Logistic
roc(data$event ~ data$prob, plot=TRUE, legacy.axes=T, percent=T,
xlab="False Positive (%)", ylab="True Positive (%)", col="blue", lwd=4, print.auc=T, print.auc.x=45)
# Random Forest
plot.roc(data$event, rf.model$votes[,1], percent=T, col="green", lwd=4, print.auc=T, add=T, print.auc.y=40)