-
Notifications
You must be signed in to change notification settings - Fork 0
/
revive_try1.R
142 lines (102 loc) · 4.4 KB
/
revive_try1.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#load libraries
library(randomForest) #for relatively fast importance estimation
library(pracma) #for Savitzky-Golay smoothing (savgol)
#stage for reproducibility
set.seed(45627)
#control variables
N_repeats_per_rate <- 300 #repeats per value of rate
N_samples_per_run <- 300 #how many samples per run
#make rate variable
rate <- seq(from = 0, to = 1, by =0.01)
#for each rate, for each variable, we compute
# 5th percentile, median, 95th percentile
rate_store <- data.frame(matrix(0,nrow=N_repeats_per_rate,ncol=3*3+1))
for (i in 1:length(rate)){
#what is this rate
this_rate <- rate[i]
#stage for inner loop
run_store <- data.frame(matrix(0,nrow=length(rate),ncol=3))
#make importance samples
for (j in 1:N_samples_per_run){
#make values x1, x2, x3
x1 <- runif(n = N_samples_per_run)
x2 <- runif(n = N_samples_per_run)
x3 <- runif(n = N_samples_per_run)
x4 <- runif(n = N_samples_per_run)
sw <- rbinom(n = N_samples_per_run, size = 1, prob = this_rate)
#make y
y <- x1 + sw*x2 + 0*x3 + (1-sw)*x4
#populate data frame
mydata <- data.frame(x1,x2,x3,y)
#fit via random forest
my_rf_fit <- randomForest(y~.,
data = mydata,
ntree = 300)
#compute variable importance
run_store[j,] <- importance(my_rf_fit)
#store
}
#compute summary values and store
rate_store[i, 1] <- this_rate
temp <- quantile(x = run_store[,1], probs = c(0.05, 0.5, 0.95))
rate_store[i, 2] <- temp[1]
rate_store[i, 3] <- temp[2]
rate_store[i, 4] <- temp[3]
temp <- quantile(x = run_store[,2], probs = c(0.05, 0.5, 0.95))
rate_store[i, 5] <- temp[1]
rate_store[i, 6] <- temp[2]
rate_store[i, 7] <- temp[3]
temp <- quantile(x = run_store[,3], probs = c(0.05, 0.5, 0.95))
rate_store[i, 8] <- temp[1]
rate_store[i, 9] <- temp[2]
rate_store[i, 10] <- temp[3]
}
#this part is to save redo-redo-redo time when making markdown
names(rate_store) <- c("rate",
"LCL_x1",
"Med_x1",
"UCL_x1",
"LCL_x2",
"Med_x2",
"UCL_x2",
"LCL_x3",
"Med_x3",
"UCL_x3")
rate_store <- rate_store[1:length(rate),]
#write it
write.csv(x = rate_store, file = "my_rate_store.csv")
rm(list=ls())
#read it
rate_store <- read.csv("my_rate_store.csv")
rate_store <- rate_store[,-1]
rate_store <- rate_store[c(1:101),]
rate <- rate_store[,1]
#so how do we print it?
for (i in 1:length(rate)){
# s_max <- sum(rate_store[i,c(3,6,9)])
s_max <- sum(rate_store[i,c(3)])
s_min <- 0
rate_store[i,2] <- ( rate_store[i,2] - s_min)/(s_max-s_min)
rate_store[i,3] <- ( rate_store[i,3] - s_min)/(s_max-s_min)
rate_store[i,4] <- ( rate_store[i,4] - s_min)/(s_max-s_min)
rate_store[i,5] <- ( rate_store[i,5] - s_min)/(s_max-s_min)
rate_store[i,6] <- ( rate_store[i,6] - s_min)/(s_max-s_min)
rate_store[i,7] <- ( rate_store[i,7] - s_min)/(s_max-s_min)
rate_store[i,8] <- ( rate_store[i,8] - s_min)/(s_max-s_min)
rate_store[i,9] <- ( rate_store[i,9] - s_min)/(s_max-s_min)
rate_store[i,10] <- ( rate_store[i,10] - s_min)/(s_max-s_min)
}
x_range <- c(min(rate),max(rate))
y_range <- c(-0.05,1.25)
plot(rate_store$rate, rate_store$Med_x1,
xlim=x_range, ylim=y_range,
col="Green", pch=19)
lines(smooth.spline(rate,rate_store$LCL_x1,spar = 0.8), col="Green")
lines(smooth.spline(rate,rate_store$UCL_x1,spar = 0.8), col="Green")
points(rate_store$rate, rate_store$Med_x2, col="Blue", pch=19)
lines(smooth.spline(rate,rate_store$LCL_x2,spar = 0.8), col="Blue")
lines(smooth.spline(rate,rate_store$UCL_x2,spar = 0.8), col="Blue")
points(rate_store$rate, rate_store$Med_x3, col="Red", pch=19)
lines(smooth.spline(rate,rate_store$LCL_x3,spar = 0.8), col="Red")
lines(smooth.spline(rate,rate_store$UCL_x3,spar = 0.8), col="Red")
grid()