-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_analysis.R
84 lines (69 loc) · 3.71 KB
/
run_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# This script processes the data from 'sourcefile' and creates a tidy dataset
# containing only the mean and std values for each measurement of the original source.
# Produces 2 output files to working directory:
# totalData.txt : Contains a tidy dataset with all the mean and std values from the
# original source.
# totalAveWide.txt : Contains a reduced tidy dataset with the average of the values
# measured (mean and std as in totalData.txt) for each activity of each subjectID.
# Prerequisites:
# Unzipped source file in 'sourcefile' must be uncompressed in the current
# working directory.
# Package reshape must be installed, and availble in the working environment.
# Set url for source zip file. Not loaded directly as loading varies by platform.
sourcefile <- "https://d396qusza40orc.cloudfront.net/getdata%2Fprojectfiles%2FUCI%20HAR%20Dataset.zip"
train_files <- c("subject_train.txt","X_train.txt","y_train.txt")
test_files <- c("subject_test.txt","X_test.txt","y_test.txt" )
files <- c("activity_labels.txt","features_info.txt","features.txt","Readme.txt")
# Check if source data folder exists. We assume all files are inside, no further checks
if (!file.exists("UCI HAR Dataset")) {
stop("Source Data is missing from workspace directory")
}
# Read the activiy_labels.txt (levels used in ytrain and xtrain files)
activity_labels<-read.table("UCI HAR Dataset/activity_labels.txt")
# Read the features.txt variable names (for xtrain and xtest files)
features<-read.table("UCI HAR Dataset/features.txt")
# Read the test files
subtest<-read.table("UCI HAR Dataset/test/subject_test.txt")
xtest<-read.table("UCI HAR Dataset/test/X_test.txt")
ytest<-read.table("UCI HAR Dataset/test/y_test.txt")
# Read the train files
subtrain<-read.table("UCI HAR Dataset/train/subject_train.txt")
xtrain<-read.table("UCI HAR Dataset/train/X_train.txt")
ytrain<-read.table("UCI HAR Dataset/train/y_train.txt")
# Fix column names
colnames(xtest)<-features[,2]
colnames(xtrain)<-features[,2]
colnames(subtest)<-"subjectID"
colnames(subtrain)<-"subjectID"
colnames(ytest)<-"activity"
colnames(ytrain)<-"activity"
# Create a vector called 'selected' with all the indices of means and std
# values within xtest/xtrain data.frames
selected <- grep("std|mean", features[,2], ignore.case=T)
# Subset xtest and xtrain data.frames to contain only the selected values
xtest<-subset(xtest, select = selected)
xtrain<-subset(xtrain, select = selected)
# Combine actions to get a single data.frame
# Combine xtest and xtrain in a single data.frame 'xtotal' using rbind
xtotal <- rbind (xtest, xtrain)
# Combine ytest and ytrain in a single data.frame 'ytotal' using rbind
ytotal <- rbind (ytest, ytrain)
# Combine subtest and subtrain in a single data.frame 'subtotal' using rbind
subtotal <- rbind (subtest, subtrain)
# Combine subtotal, ytotal, xtotal in a single data.frame 'totaldata' using cbind
# could be optimized to use less memory (data.frames)
totaldata <- cbind (subtotal,ytotal,xtotal)
# Set activity column as a factor variable
totaldata$activity <- factor(totaldata$activity)
# Set the levels of activity variable to those listed in activity_labels
levels(totaldata$activity) <- activity_labels[,2]
# Output totaldata tidy set to totalData.txt
write.table(totaldata,file="totalData.txt",row.names=FALSE)
library(reshape)
# Melt totaldata to get a skinny dataframe with subjectID and activity as ID vars.
totalMolten <- melt(totaldata, id.vars = c("subjectID", "activity"))
# Cast the Molten dataframe with an aggregate function 'mean' to create an average
# of the measured values for each activity for each subjectID
totalWide<- cast(totalMolten, subjectID + activity ~ variable, mean)
# Write the output to a file
write.table(totalWide,file="totalAveWide.txt",row.names=FALSE)