-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPrinciple Component Analysis Example.R
125 lines (98 loc) · 3.29 KB
/
Principle Component Analysis Example.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# Supervised VS Unsupervised Learning
---------------------------------------------------------------------------------
# PRINCIPAL COMPONENTS ANALYSIS for supervised vs unsupervised
---------------------------------------------------------------------------------
# Load in data for states
states <- row.names(USArrests)
states
# look at names
names (USArrests)
apply(USArrests, 2, mean)
apply(USArrests, 2, var)
pr.out <- prcomp (USArrests , scale = TRUE)
names(pr.out)
pr.out$center
pr.out$scale
pr.out$rotation
dim(pr.out$x)
biplot (pr.out , scale = 0)
# first biplot
pr.out$rotation = -pr.out$rotation
pr.out$x = -pr.out$x
biplot (pr.out , scale = 0)
pr.out$sdev
pr.var <- pr.out$sdev^2
pr.var
pve <- pr.var / sum (pr.var)
pve
# graphing the principal component analysis
par (mfrow = c(1, 2))
plot (pve , xlab = " Principal Component ",
ylab = " Proportion of Variance Explained ", ylim = c(0, 1),
type = "b")
plot ( cumsum (pve), xlab = " Principal Component ",
ylab = " Cumulative Proportion of Variance Explained ",
ylim = c(0, 1), type = "b")
a <- c(1, 2, 8, -3)
cumsum (a)
---------------------------------------------------------------------------------
# K Means Clustering
---------------------------------------------------------------------------------
# setting the seed
set.seed (2)
x <- matrix ( rnorm (50 * 2), ncol = 2)
x[1:25, 1] <- x[1:25, 1] + 3
x[1:25, 2] <- x[1:25, 2] - 4
km.out <- kmeans (x, 2, nstart = 20)
# looking at your mean
km.out$cluster
# graphing cluster and plotting cluster
par (mfrow = c(1, 2))
plot (x, col = (km.out$cluster + 1),
main = "K- Means Clustering Results with K = 2",
xlab = "", ylab = "", pch = 20, cex = 2)
# When K = 3 ...........
set.seed (4)
km.out <- kmeans (x, 3, nstart = 20)
km.out
plot (x, col = (km.out$cluster + 1),
main = "K- Means Clustering Results with K = 3",
xlab = "", ylab = "", pch = 20, cex = 2)
set.seed (4)
km.out <- kmeans (x, 3, nstart = 1)
km.out$tot.withinss
km.out <- kmeans (x, 3, nstart = 20)
km.out$tot.withinss
[1] 97.9793
----------------------------------------------------------------
# Hierarchical Clustering
hc.complete <- hclust ( dist (x), method = "complete")
# average
hc.average <- hclust ( dist (x), method = "average")
# single
hc.single <- hclust ( dist (x), method = "single")
# plotting
par (mfrow = c(1, 3))
plot (hc.complete, main = "Complete Linkage",
xlab = "", sub = "", cex = .9)
plot (hc.average , main = "Average Linkage",
xlab = "", sub = "", cex = .9)
plot (hc.single, main = "Single Linkage",
xlab = "", sub = "", cex = .9)
# cutrees
cutree (hc.complete, 2)
# average
cutree (hc.average , 2)
# complete
cutree (hc.single, 2)
# single
cutree (hc.single, 4)
# single
xsc <- scale (x)
plot ( hclust ( dist (xsc), method = "complete") ,
main = "Hierarchical Clustering with Scaled Features")
x <- matrix ( rnorm (30 * 3), ncol = 3)
dd <- as.dist (1 - cor (t(x)))
plot ( hclust (dd, method = "complete") ,
main = "Complete Linkage with Correlation - Based Distance",
xlab = "", sub = "")