-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_analysis.R
50 lines (31 loc) · 1.6 KB
/
run_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
library(reshape2)
# 1. Merges the training and the test sets to create one data set.
train_set = read.table("train/X_train.txt")
train_labels = read.table("train/y_train.txt")
test_set = read.table("test/X_test.txt")
test_labels = read.table("test/y_test.txt")
names(train_labels) <- "activity"
names(test_labels) <- "activity"
subject_train = read.table("train/subject_train.txt")
subject_test = read.table("test/subject_test.txt")
columns = read.table("features.txt")
names(test_set) <- columns$V2
names(train_set) <- columns$V2
names(subject_train) <- "subject_id"
names(subject_test) <- "subject_id"
train = cbind(subject_train, train_labels, train_set)
test = cbind(subject_test, test_labels, test_set)
merged = rbind(train, test)
# 2. Extracts only the measurements on the mean and standard deviation for each measurement.
columns = names(merged)
target_columns = columns[grepl("mean", columns) | grepl("std", columns) | grepl("activity", columns) | grepl("subject_id", columns)]
step2_data = merged[, target_columns]
# 3. Uses descriptive activity names to name the activities in the data set
activities = read.table("activity_labels.txt")
# 4. Appropriately labels the data set with descriptive variable names.
step2_data$activity <- factor(step2_data$activity, labels=activities$V2)
# 5.From the data set in step 4, creates a second, independent tidy data
# set with the average of each variable for each activity and each subject.
out = dcast(melt(step2_data, id=c("subject_id","activity")), subject_id+activity ~ variable, mean)
# write the tidy data set to a file
write.csv(out, "out.csv", row.names=FALSE)