-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathevaluation.py
152 lines (106 loc) · 4.45 KB
/
evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import numpy as np
import pandas as pd
from sklearn.metrics import adjusted_rand_score
import igraph
import classes
def finalPopMetrics2(pop, mst_genotype, int_links_indices, relev_links_len):
final_pop_metrics = pd.DataFrame()
num_examples = classes.Datapoint.num_examples
sol_num_clusters = []
ari_values = []
VARs = []
CNNs = []
for indiv in pop:
# Create a graph
g = igraph.Graph()
# Add each point to the graph
g.add_vertices(num_examples) # or len(mst_genotype))
# Get the base nodes
base_nodes = list(range(num_examples))
# Get the mst_genotype
new_values = np.asarray(mst_genotype)
# Modify the relevant part of the mst_genotype
new_values[int_links_indices[:relev_links_len]] = indiv
# Create the graph
g.add_edges(zip(base_nodes, new_values))
# Get the connected components
conn_components = g.components(mode="WEAK")
sol_num_clusters.append(len(conn_components))
# Now use these components to get labels
pred_labels = np.empty(num_examples)
pred_labels.fill(np.nan) # fill with NaNs so we know it works
for i, component in enumerate(conn_components):
pred_labels[component] = i
if np.any(np.isnan(pred_labels)) == True:
print("Label missing when trying to calculate ARI")
# Add the ARI value
ari_values.append(adjusted_rand_score(labels_true=classes.Datapoint.label_vals, labels_pred=pred_labels))
VARs.append(indiv.fitness.values[0])
CNNs.append(indiv.fitness.values[1])
# Add to the dataframe
final_pop_metrics['Num Clusters'] = sol_num_clusters
final_pop_metrics['ARI'] = ari_values
final_pop_metrics['VAR'] = VARs
final_pop_metrics['CNN'] = CNNs
#### Might be best to return all of this individually
return final_pop_metrics
# @profile
def final_pop_metrics(pop, mst_genotype, int_links_indices, relev_links_len):
num_examples = classes.Datapoint.num_examples
sol_num_clusters = []
ari_values = []
for indiv in pop:
# Create a graph
g = igraph.Graph()
# Add each point to the graph
g.add_vertices(num_examples) # or len(mst_genotype))
# Get the base nodes
base_nodes = list(range(num_examples))
# Get the mst_genotype
new_values = np.asarray(mst_genotype)
# Modify the relevant part of the mst_genotype
new_values[int_links_indices[:relev_links_len]] = indiv
# Create the graph
g.add_edges(zip(base_nodes, new_values))
# Get the connected components
conn_components = g.components(mode="WEAK")
sol_num_clusters.append(len(conn_components))
# Now use these components to get labels
pred_labels = np.empty(num_examples)
pred_labels.fill(np.nan) # fill with NaNs so we know it works
for i, component in enumerate(conn_components):
pred_labels[component] = i
assert np.any(np.isnan(pred_labels)) != True
# Add the ARI value
ari_values.append(adjusted_rand_score(labels_true=classes.Datapoint.label_vals, labels_pred=pred_labels))
### Can use this attribute to get the number directly
# Still need components though so may not be useful
# There may be a better way of doing this - may not be important though
###
# test = [indiv.num_clusts for indiv in pop]
# if test == sol_num_clusters:
# print("Same!")
# else:
# print(sol_num_clusters)
# print(test,"\n")
return sol_num_clusters, ari_values
def numClusters(pop, mst_genotype, int_links_indices, relev_links_len):
num_examples = classes.Datapoint.num_examples
sol_num_clusters = []
for indiv in pop:
# Create a graph
g = igraph.Graph()
# Add each point to the graph
g.add_vertices(num_examples) # or len(mst_genotype))
# Get the base nodes
base_nodes = list(range(num_examples))
# Get the mst_genotype
new_values = np.asarray(mst_genotype)
# Modify the relevant part of the mst_genotype
new_values[int_links_indices[:relev_links_len]] = indiv
# Create the graph
g.add_edges(zip(base_nodes, new_values))
# Get the connected components
conn_components = g.components(mode="WEAK")
sol_num_clusters.append(len(conn_components))
return np.asarray(sol_num_clusters)