-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_analysis.R
More file actions
52 lines (41 loc) · 2.42 KB
/
run_analysis.R
File metadata and controls
52 lines (41 loc) · 2.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
library(dplyr)
#if working directory *contains* the data set, go inside
base_dir="UCI HAR Dataset"
if(file.exists(base_dir))
setwd(base_dir)
#else we assume the working directory is the content of the data folder
#Merge the training and the test sets to create one data set.
#merge subjects
train_subjects <- read.table("train/subject_train.txt",col.names="subject_id")
test_subjects <- read.table("test/subject_test.txt",col.names="subject_id")
subjects <- bind_rows(train_subjects,test_subjects)
#merge training activity labels
train_activities <- read.table("train/y_train.txt",col.names="act_id")
test_activities <- read.table("test/y_test.txt",col.names="act_id")
activities <- bind_rows(train_activities,test_activities)
# Load measurements set with descriptive features labels
# Data is appropriately labelled by removing special chars (parenthesis, -)
features <- read.table("features.txt",col.names=c("id","label"))
clean_label <- gsub("\\(|\\)","",features$label)
clean_label <- gsub("-","_",clean_label)
train_set <- read.table("train/X_train.txt",col.names=clean_label)
test_set <- read.table("test/X_test.txt",col.names=clean_label)
measurements <- bind_rows(train_set,test_set)
#create data frame with all attributes and observations
alltogether <- data.frame(subjects,activities,measurements)
#Extract only the measurements on the mean and standard deviation for each measurement.
cleanset <- select(alltogether,subject_id,act_id,matches("*(std|mean)($|_)",ignore.case = FALSE))
#Use descriptive activity names to name the activities in the data set
features <- read.table("activity_labels.txt",col.names=c("act_id","act_name"))
cleanset<-cleanset %>%
inner_join(features) %>%
select(subject_id,act_name,everything(),-act_id)
#5 From the data set in step 4, creates a second, independent tidy data set with the average of each variable for each activity and each subject. We prefix each calculated average with the "mbgv_" string for "mean of grouped variable" using the setNames function
avg_va <-cleanset %>%
group_by(subject_id,act_name) %>%
summarise_each(funs(mean)) %>%
setNames(c(names(.)[1:2], paste0("mogo_",names(.)[0:-2])))
#export the clean data set from step 5 as a csv file
#file is exported in parent folder
#since we made sure we went inside the UCI HAR Dataset folder
write.table(avg_va,file="../tidy.csv",row.names=FALSE)