-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmachinelearningproject.rmd
115 lines (99 loc) · 2.84 KB
/
machinelearningproject.rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
---
title: "Human Activity Recognition"
author: "Ravi Teja"
date: "27 December 2015"
output: html_document
---
```{r}
library(caret)
library(rpart)
library(rpart.plot)
library(RColorBrewer)
library(randomForest)
library(knitr)
```
# Loading the Data and Partitioning
```{r}
training <- read.csv("pml-training.csv")
testing <- read.csv("pml-testing.csv")
intrain <- createDataPartition(training$classe,p=0.6,list = FALSE)
myTraining <- training[intrain,]
myTesting <- training[-intrain,]
```
#Prepocessing the data
```{r}
#Removing near zero variance
nzv <- nearZeroVar(myTraining, saveMetrics=TRUE)
myTraining <- myTraining[,nzv$nzv==FALSE]
nzv<- nearZeroVar(myTesting,saveMetrics=TRUE)
myTesting <- myTesting[,nzv$nzv==FALSE]
#Removing the foirst column
myTraining <- myTraining[c(-1)]
```
removing variables with more than 60% NAs
```{r}
trainingV3 <- myTraining
for(i in 1:length(myTraining)) {
if( sum( is.na( myTraining[, i] ) ) /nrow(myTraining) >= .6) {
for(j in 1:length(trainingV3)) {
if( length( grep(names(myTraining[i]), names(trainingV3)[j]) ) == 1) {
trainingV3 <- trainingV3[ , -j]
}
}
}
}
# Set back to the original variable name
myTraining <- trainingV3
rm(trainingV3)
```
Transforming mytesting and testing
```{r}
clean1 <- colnames(myTraining)
clean2 <- colnames(myTraining[, -58]) # remove the classe column
myTesting <- myTesting[clean1] # allow only variables in myTesting that are also in myTraining
testing <- testing[clean2]
```
Coercing mytrain and test data
```{r}
for (i in 1:length(testing) ) {
for(j in 1:length(myTraining)) {
if( length( grep(names(myTraining[i]), names(testing)[j]) ) == 1) {
class(testing[j]) <- class(myTraining[i])
}
}
}
# To get the same class between testing and myTraining
testing <- rbind(myTraining[2, -58] , testing)
testing <- testing[-1,]
```
# Prediction with Decision Trees
```{r}
set.seed(12345)
modFitA1 <- rpart(classe ~ ., data=myTraining, method="class")
predictionsA1 <- predict(modFitA1, myTesting, type = "class")
cmtree <- confusionMatrix(predictionsA1, myTesting$classe)
cmtree
```
#Prediction with Random Forests
```{r}
set.seed(12345)
modFitB1 <- randomForest(classe ~ ., data=myTraining)
predictionB1 <- predict(modFitB1, myTesting, type = "class")
cmrf <- confusionMatrix(predictionB1, myTesting$classe)
cmrf
plot(modFitB1)
```
# Predicting Results with Test Data
Random Forests had better accuracy so using that as final model,predictions are done
```{r}
predictionB2 <- predict(modFitB1, testing, type = "class")
predictionB2
# Write the results to a text file for submission
pml_write_files = function(x){
n = length(x)
for(i in 1:n){
filename = paste0("problem_id_",i,".txt")
write.table(x[i],file=filename,quote=FALSE,row.names=FALSE,col.names=FALSE)
}
}
```