-
Notifications
You must be signed in to change notification settings - Fork 0
/
xgbFit.R
140 lines (133 loc) · 5.76 KB
/
xgbFit.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
xgbFull <- getModelInfo ("xgbTree", regex=FALSE)[[1]]
xgbFull$parameters <- data.frame (parameter = c("nrounds",
"max_depth",
"gamma",
"eta",
"min_child_weight",
"subsample",
"colsample_bytree"),
class = c("numeric",
"numeric",
"numeric",
"numeric",
"numeric",
"numeric",
"numeric"),
label = c("# Boosting Iterations",
"Max Tree Depth",
"Gamma",
"Shrinkage",
"Min Child Weight",
"Subsample ratio",
"Col Sample ratio"))
xgbFull$grid <- function (x, y, len = NULL, search="grid")
{
if (search == "grid") {
out <- expand.grid(max_depth = seq(1, len),
nrounds = floor((1:len)*50),
gamma = 0,
eta = 0.3,
min_child_weight = 1,
subsample = 1,
colsample_bytree = 1)
}
else {
out <- data.frame(nrounds = sample(1:1000, size = len, replace = TRUE),
max_depth = sample(1:10, replace = TRUE, size = len),
gamma = sample (0:10, replace = TRUE, size = len),
eta = runif(len, min = 0.001, max = 0.6),
min_child_weight = sample (1:5, size = len, replace=TRUE),
subsample = runif (len, min = 0.5, max = 1.0),
colsample_bytree = runif (len, min = 0.2, max = 1.0))
out$nrounds <- floor(out$nrounds)
out <- out[!duplicated(out), ]
}
out
}
xgbFull$fit <- function (x, y, wts, param, lev, last, classProbs, ...)
{
if (is.factor(y)) {
if (length(lev) == 2) {
y <- ifelse(y == lev[1], 1, 0)
dat <- xgb.DMatrix(as.matrix(x), label = y)
out <- xgb.train(list(eta = param$eta,
max_depth = param$max_depth,
gamma = param$gamma,
min_child_weight = param$min_child_weight,
subsample = param$subsample,
colsample_bytree = param$colsample_bytree),
data = dat,
nrounds = param$nrounds,
objective = "binary:logistic",
...)
}
else {
y <- as.numeric(y) - 1
dat <- xgb.DMatrix(as.matrix(x), label = y)
out <- xgb.train(list(eta = param$eta,
max_depth = param$max_depth,
gamma = param$gamma,
min_child_weight = param$min_child_weight,
subsample = param$subsample,
colsample_bytree = param$colsample_bytree),
data = dat, num_class = length(lev),
nrounds = param$nrounds,
objective = "multi:softprob",
...)
}
}
else {
dat <- xgb.DMatrix(as.matrix(x), label = y)
out <- xgb.train(list(eta = param$eta,
max_depth = param$max_depth,
gamma = param$gamma,
min_child_weight = param$min_child_weight,
subsample = param$subsample,
colsample_bytree = param$colsample_bytree),
data = dat,
nrounds = param$nrounds,
objective = "reg:linear",
...)
}
out
}
xgbFull$loop <- function (grid)
{
loop <- ddply(grid,
c("eta", "max_depth", "gamma", "min_child_weight",
"subsample", "colsample_bytree"),
function(x) c(nrounds = max(x$nrounds)))
submodels <- vector(mode = "list", length = nrow(loop))
for (i in seq(along = loop$nrounds)) {
index <- which(grid$max_depth == loop$max_depth[i] &
grid$eta == loop$eta[i] &
grid$gamma == loop$gamma[i] &
grid$min_child_weight == loop$min_child_weight[i] &
grid$subsample == loop$subsample[i] &
grid$colsample_bytree == loop$colsample_bytree)
trees <- grid[index, "nrounds"]
submodels[[i]] <- data.frame(nrounds = trees[trees !=
loop$nrounds[i]])
}
list(loop = loop, submodels = submodels)
}
# tuneGrid <- expand.grid (nrounds = 1000,
# eta = 0.001,
# gamma = 2,
# max_depth = 5,
# min_child_weight = 3:5,
# subsample = 0.5,
# colsample_bytree = 0.5)
#
# trControl <- trainControl (method = "repeatedcv",
# number = 5,
# repeats = 5,
# verboseIter = TRUE)
#
#
# fit <- train (mpg ~ .,
# data=mtcars,
# preProcess = c("center", "scale"),
# tuneGrid = tuneGrid,
# method=xgbFull,
# verbose = 1)