-
Notifications
You must be signed in to change notification settings - Fork 0
/
7.Performance comparison of Regression Model, Regression Tree, Bagging Tree and Random Forest.Rmd
108 lines (84 loc) · 2.6 KB
/
7.Performance comparison of Regression Model, Regression Tree, Bagging Tree and Random Forest.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
---
output:
word_document: default
html_document: default
---
DATA STRUCTURE
```{r}
data <- DALEX::dragons
head(data)
str(data)
summary(data)
```
SPLITTING DATA
```{r}
library(caret)
set.seed(2380)
index <- createDataPartition(data$year_of_birth, p = 0.75,
list = FALSE, times = 1)
train <- data[index,]
test <- data[-index,]
nrow(train)
nrow(test)
```
LINEAR REGRESSİON MODEL
```{r}
#Traing Model
model_LM <- lm(life_length~. ,data=train)
#Performance for Train
predicted_train_LM <- predict(model_LM, train)
rmse_train_LM <-sqrt(mean((predicted_train_LM - train$life_length) ^ 2))
#Performance for Test
predicted_test_LM <- predict(model_LM, test)
rmse_test_LM <- sqrt(mean((predicted_test_LM - test$life_length) ^ 2))
cat("test_rmse:", rmse_test_LM,"\n")
cat("train_rmse:", rmse_train_LM)
```
REGRESSION TREE
```{r}
# Training a regression tree on the dragons data
library(rpart)
library(rpart.plot)
model_DT <- rpart(life_length ~. , method = "anova", data = train)
#Performance for test
predicted_test_DT <- predict(model_DT, test)
rmse_test_DT <- sqrt(mean((predicted_test_DT - test$life_length) ^ 2))
#Performance for train
predicted_train_DT <- predict(model_DT, train)
rmse_train_DT <- sqrt(mean((predicted_train_DT - train$life_length) ^ 2))
cat("test_rmse_dt:", rmse_test_DT,"\n")
cat("train_rmse_dt:", rmse_train_DT)
```
TRAINING BEGGING TREE
```{r}
library(randomForest)
model_BT <- randomForest(life_length~. , data=train, mtry= 7)
#Performance for test
predicted_test_BT <- predict(model_BT, test)
rmse_test_BT <- sqrt(mean((predicted_test_BT - test$life_length) ^ 2))
#Performance for train
predicted_train_BT <- predict(model_BT, train)
rmse_train_BT <- sqrt(mean((predicted_train_BT - train$life_length) ^ 2))
cat("test_rmse_dt:", rmse_test_BT,"\n")
cat("train_rmse_dt:", rmse_train_BT)
```
RANDOM FOREST
```{r}
model_RF <- randomForest(life_length ~ ., data = train)
#Performance for test
predicted_test_RF <- predict(model_RF, test)
rmse_test_RF <- sqrt(mean((predicted_test_RF - test$life_length) ^ 2))
#Performance for train
predicted_train_RF <- predict(model_RF, train)
rmse_train_RF <- sqrt(mean((predicted_train_RF - train$life_length) ^ 2))
cat("test_rmse_dt:", rmse_test_RF,"\n")
cat("train_rmse_dt:", rmse_train_RF)
```
```{r}
#Comparing models performances
data.frame(
"TEST" = c(rmse_test_BT,rmse_test_DT,rmse_test_LM,rmse_test_RF),
"TRAIN" = c(rmse_train_BT,rmse_train_DT,rmse_train_LM,rmse_train_RF),
row.names = c("BEGGING TREE", "DECISION TREE","LINEAR MODEL","RANDOM FOREST")
)
```