-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPrinciple Component Analysis Boulder Housing.R
31 lines (24 loc) · 1.25 KB
/
Principle Component Analysis Boulder Housing.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# PCA analysis for boulder housing data
# PCA is capturing information through variance
# read data
rm(list = ls())
setwd("~/Downloads/5415 Advanced Data Analytics/Boulder Housing")
load("boulder-cleaned.RData")
boulder.clean <- na.omit(boulder.clean)
# remove several columns from the data
boulder.clean1 <- boulder.clean[, !names(boulder.clean) %in% c("HOME.TYPE",
"ADDRESS", "ZIP", "LIST.PRICE", "PARKING.TYPE")]
# Perform principle component analysis scale=TRUE specifies that the
# variables should be scaled, which is usually a good idea
boulder.pr <- prcomp(boulder.clean1, scale = TRUE)
# This plot shows the percentage of variations explained by each
# principle component. This is a way to examine the importance of each
# principle component. In follow-up analysis, you may not want to
# include all principle components, but only a few important ones
plot(summary(boulder.pr)$importance[2, ], xlab = "Principle Component",
ylab = "Proportion of Variance")
# perform linear regression on the principle components
boulder.clean2 <- cbind(LIST.PRICE = boulder.clean$LIST.PRICE,
data.frame(boulder.pr$x))
lm.fit <- lm(LIST.PRICE ~ ., data = boulder.clean2)
summary(lm.fit)