-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclustering.py
107 lines (98 loc) · 2.81 KB
/
clustering.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import numpy as np
from scipy.sparse.linalg import svds
from sklearn.preprocessing import normalize
from sklearn import cluster
from sklearn import metrics
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score
from munkres import Munkres
import utilss
def best_map(L1, L2):
#L1 should be the labels and L2 should be the clustering number we got
Label1 = np.unique(L1)
nClass1 = len(Label1)
Label2 = np.unique(L2)
nClass2 = len(Label2)
nClass = np.maximum(nClass1,nClass2)
G = np.zeros((nClass,nClass))
for i in range(nClass1):
ind_cla1 = L1 == Label1[i]
ind_cla1 = ind_cla1.astype(float)
for j in range(nClass2):
ind_cla2 = L2 == Label2[j]
ind_cla2 = ind_cla2.astype(float)
G[i,j] = np.sum(ind_cla2 * ind_cla1)
m = Munkres()
index = m.compute(-G.T)
index = np.array(index)
c = index[:,1]
newL2 = np.zeros(L2.shape)
for i in range(nClass2):
newL2[L2 == Label2[i]] =c[i]
return newL2
def thrC(C, ro):
if ro < 1:
N = C.shape[1]
Cp = np.zeros((N,N))
S = np.abs(np.sort(-np.abs(C), axis=0))
Ind = np.argsort(-np.abs(C), axis=0)
for i in range(N):
cL1 = np.sum(S[:,i]).astype(float)
stop = False
csum = 0
t = 0
while(stop == False):
csum = csum + S[t,i]
if csum > ro*cL1:
stop = True
Cp[Ind[0:t+1, i], i] = C[Ind[0:t+1, i], i]
t = t + 1
else:
Cp = C
return Cp
def post_proC(C, K, d, alpha):
# C: coefficient matrix, K: number of clusters, d: dimension of each subspace
n = C.shape[0]
C = 0.5*(C + C.T)
C = C - np.diag(np.diag(C)) + np.eye(n,n) # for sparse C, this step will make the algorithm more numerically stable
r = d*K + 1
U, S, _ = svds(C,r,v0 = np.ones(n))
U = U[:,::-1]
S = np.sqrt(S[::-1])
S = np.diag(S)
U = U.dot(S)
U = normalize(U, norm='l2', axis = 1)
Z = U.dot(U.T)
Z = Z * (Z>0)
L = np.abs(Z ** alpha)
L = L/L.max()
L = 0.5 * (L + L.T)
spectral = cluster.SpectralClustering(n_clusters=K, eigen_solver='arpack', affinity='precomputed', assign_labels='discretize')
spectral.fit(L)
grp = spectral.fit_predict(L) + 1
return grp, L
def err_rate(gt_s, s):
c_x = best_map(gt_s,s)
err_x = np.sum(gt_s[:] != c_x[:])
missrate = err_x.astype(float) / (gt_s.shape[0])
return missrate
def accuracy(gt_s, s):
c_x = best_map(gt_s,s)
accuracy=accuracy_score(gt_s, c_x)
return accuracy
def precision(gt_s, s):
c_x = best_map(gt_s,s)
precision=precision_score(gt_s, c_x,average='weighted')
return precision
def recall(gt_s, s):
c_x = best_map(gt_s,s)
recall=recall_score(gt_s, c_x,average='weighted')
return recall
def f1(gt_s, s):
c_x = best_map(gt_s,s)
f1=f1_score(gt_s, c_x,average='weighted')
return f1
def auc(gt_s, s,clusters):
c_x = best_map(gt_s,s)
c_x_one_hot = utilss.dense_to_one_hot(c_x, clusters)
roc_auc = metrics.roc_auc_score(y_true=gt_s, y_score=c_x_one_hot, average='macro', multi_class='ovo')
return roc_auc