-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path02-PCA.py
59 lines (51 loc) · 1.54 KB
/
02-PCA.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# -*- coding: utf-8 -*-
import numpy as npy
import pylab as pyl
import itertools as itl
def mean_center(X):
(rows, cols) = npy.shape(X)
new_X = npy.zeros((rows, cols), float)
_averages = npy.average(X, 0)
for row in range(rows):
new_X[row, 0:cols] = X[row, 0:cols] - _averages[0:cols]
return new_X
def standardization(X):
(rows, cols) = npy.shape(X)
new_X = npy.zeros((rows, cols))
_STDs = npy.std(X, 0)
for value in _STDs:
if value == 0: raise ZeroDivisionError, 'division by zero, cannot proceed'
for row in range(rows):
new_X[row, 0:cols] = X[row, 0:cols] / _STDs[0:cols]
return new_X
def PCA_svd(X, standardize=False):
X = mean_center(X)
if standardize:
X = standardization(X)
(rows, cols) = npy.shape(X)
[U, S, V] = npy.linalg.svd(X) # NOTE!,this line is time consuming which is not allowed on laptop
if npy.shape(S)[0] < npy.shape(U)[0]: U = U[:, 0:npy.shape(S)[0]]
Scores = U * S
Loadings = V
variances = S**2 / cols
variances_sum = sum(variances)
explained_var = variances / variances_sum
return Scores, Loadings, explained_var
FileName='optdigits-orig.wdep'
MAT = []
X = []
for i in open(FileName):
if not i:
break
i = i.strip('\n')
if len(i) < 5:
number = int(i)
if number == 3:
MAT.append(X)
X=[]
else:
for str in i:
X.append(int(str))
data = npy.array(npy.matrix(MAT).T)
T, P, explained_var = PCA_svd(data)
pyl.plot(T[:,0],T[:,1])