-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCMAES.py
521 lines (441 loc) · 22.9 KB
/
CMAES.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
from deap import creator, base, tools, benchmarks, algorithms, cma
import numpy as np
'''implementation of a standard CMA-ES algorithm using the Deap library: https://deap.readthedocs.io/en/master/examples/cmaes.html
additional functionality and further explanation of the steps below can be found in the documentation
Installation: Deap Library: https://deap.readthedocs.io/en/master/installation.html
'''
class CMAES:
np.random.seed()
population = []
g_objective_fitnesses = []
g_novelty_scores = []
g_behavioural_descriptors = []
def __init__(self, individual_size, eval_function, pop_size = 5, sigma = 1.0, threshold = 0.95, maximise = True):
#set random seed as CMA module draws from this
self.solution_size = individual_size
self.pop_size = pop_size
self.maximise = maximise
self.eval_function = eval_function
self.sigma = sigma
self.solution_threshold = threshold
self.set_up()
#MAYBE USE THIS TO FILL OUT DATA AND PASS BACK MY MYEA
self.fullResults = []
def set_up(self):
try:
if self.maximise:
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
else:
creator.create("FitnessMax", base.Fitness, weights=(-1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)
except:
pass
self.toolbox = base.Toolbox()
self.logbook = tools.Logbook()
self.toolbox.register("evaluate", self.eval_function)
#define strategy the algorithm will use, along with the population generation and update methods
self.strategy = cma.Strategy(centroid=[0]*self.solution_size, sigma=self.sigma, lambda_= self.pop_size)
self.toolbox.register("generate", self.strategy.generate, creator.Individual)
self.toolbox.register("update", self.strategy.update)
#establish stats to be recorded - default standard stats are set below
self.stats = tools.Statistics(lambda ind: ind.fitness.values)
self.stats.register("avg", np.mean)
self.stats.register("std", np.std)
self.stats.register("min", np.min)
self.stats.register("max", np.max)
self.hof = tools.HallOfFame(1)
def refresh(self):
self.set_up()
def run_algorithm(self, generations):
np.random.seed() # reset seed
self.refresh()
print("Running standard CMAES evaluation for " + str(generations) + " generations with a population size of " + str(self.pop_size))
hof = tools.HallOfFame(1)
gen = 0
solution = False
slnEval = -1 #default value
#pop, logbook = algorithms.eaGenerateUpdate(toolbox, ngen=5, stats=stats, halloffame=hof)
while gen < generations:
print("Running evals for gen: " + str(gen))
# Generate a new population
self.population[:] = self.pop_boundaries(self.toolbox.generate())
# Evaluate the individuals
fitnesses = []
for i in range(self.pop_size):
#get the fitness and endpoint of each run
fit, endpoint = self.toolbox.evaluate(self.population[i])
fitnesses.append((fit,))
#check if the run was successful (fitness > solution threshold)
if fit >= self.solution_threshold:
slnEval = (gen*self.pop_size) + i + 1
print("Solution found! On eval: " + str(slnEval))
print(self.population[i])
#if so, break
solution = True
break
#map fitnesses to solutions inside cma algorithm
for ind, fit in zip(self.population, fitnesses):
ind.fitness.values = fit
#if a solution was found break algorithm loop and exit
if solution:
print("break triggered")
break
self.update_records(gen)
gen += 1
return slnEval
def pop_boundaries(self, population):
'''function manually ensures population members are within expected bounds (-1, 1)
currently no dedicated way to ensure this with Deap library'''
for j in range(len(population)):
for i in range(len(population[j])):
if population[j][i] > 1.0:
population[j][i] = 1.0
elif population[j][i] < -1.0:
population[j][i] = -1.0
return population
def update_records(self, gen):
self.hof.update(self.population)
record = self.stats.compile(self.population)
self.toolbox.update(self.population)
self.logbook.record(gen=gen, evals=self.pop_size, **record)
print(self.logbook.stream)
'''CMA-ES algorithm augmented with novelty search
This implementation uses a ratio to determine what weight to give novelty in individual fitness assessment
the novelty_ratio variable determines this weight (between 0 - 1 weighting) - (0%-100% novelty)
Further reading on Novelty Search: https://www.cs.swarthmore.edu/~meeden/DevelopmentalRobotics/lehman_ecj11.pdf
'''
class NCMAES(CMAES):
np.random.seed()
def __init__(self, individual_size, eval_function, pop_size = 5, novelty_ratio = 1.0, sigma = 1.0, threshold = 0.95, maximise = True):
self.solution_size = individual_size
self.pop_size = pop_size
self.maximise = maximise
self.eval_function = eval_function
self.sigma = sigma
self.solution_threshold = threshold
#default novelty descriptor is set to K nearest neighbours
self.nearest_neighbours = True;
self.average_velocity = False;
self.distance_fs = False;
self.median_velocity = False;
#novelty archive that holds a list of past solution endpoints for using to help calculate novelty of future solutions
self.archive = []
#novelty is calculated using the K nearest neighbours in the current population and the archive
self.K = 15
#novelty archive is updated with solutions that achieve a novelty score above the threshold
self.add_threshold = 0.9
#novelty archive also employed stochastic selection to ensure random members are also added to archive
self.add_chance = 0.4
#ratio determines what weight to give novelty in combined fitness formula
self.novelty_ratio = novelty_ratio
self.set_up()
def switch_to_nearest_neighbours(self):
self.nearest_neighbours = True
self.average_velocity = False
self.distance_fs = False
#switch novelty behavioural descriptor to average velocity
def switch_to_average_velocity(self):
self.nearest_neighbours = False
self.average_velocity = True
self.distance_fs = False
#switch novelty behavioural descriptor to distance from start point
def switch_to_distance_FS(self):
self.nearest_neighbours = False
self.average_velocity = False
self.distance_fs = True
def switch_to_median_velocity(self):
self.nearest_neighbours = False
self.average_velocity = False
self.distance_fs = False
self.median_velocity = True
def get_n_fitnesses(self, fitnesses, endPoints):
#additional function to calculate the individual novelty scores of each population member
#store the list of endpoints in a class wide variable
self.temp_pop_endpoints = endPoints
novelty_scores = []
#get each individual novelty score
for i in range(len(endPoints)):
novelty_scores.append(self.calculate_novelty(endPoints[i], i))
#reset temp_pop_endpoints
self.temp_pop_endpoints[:] = []
nFits = []
#function that calculates the final novelty score based on the ratio
for fit, novelty in zip(fitnesses, novelty_scores):
#in case of 100% novelty ratio(1.0) final score = temp score
nFits.append((self.novelty_ratio * novelty) + (1 - self.novelty_ratio) * fit)
#print("nfits=")
#print(nFits)
#print("noveltyScores = ")
#print(novelty_scores)
return nFits, novelty_scores
def calculate_novelty(self, endpoint, currentIndex):
#calculate novelty for an individual in the population
temp_distances = []
#print("temp pop endpoints:")
#print(self.temp_pop_endpoints)
#check how close endpoint is to the others in its generation
for i in range(len(self.temp_pop_endpoints)):
if(i != currentIndex):
temp = np.linalg.norm(np.array(endpoint) - np.array(self.temp_pop_endpoints[i]))
temp_distances.append(temp)
#if archive has any members, check how close endpoint is to archive members also
if len(self.archive) > 0:
for i in range(len(self.archive)):
temp_distances.append(np.linalg.norm(np.array(endpoint) - np.array(self.archive[i])))
#find K closest ones
closest = np.sort(temp_distances)[:self.K]
#use to calculate the novelty score
novelty = np.sum(closest)/self.K
#add to archive check
if novelty > self.add_threshold or np.random.random() < self.add_chance:
self.archive.append(endpoint)
return novelty
def run_algorithm(self, generations):
np.random.seed() # reset seed
self.refresh()
self.archive[:] = []
behavioural_descriptor = ""
if self.nearest_neighbours:
behavioural_descriptor += "Nearest Neighbours(endpoints)"
elif self.average_velocity:
behavioural_descriptor += "Average Velocity"
elif self.distance_fs:
behavioural_descriptor += "Distance from start point"
elif self.median_velocity:
behavioural_descriptor += "Distance from start point"
print("Running Novelty CMAES evaluation for " + str(generations) + " generations with a population size of " + str(self.pop_size) + ", novelty behavioural descriptor is: " + behavioural_descriptor)
hof = tools.HallOfFame(1)
gen = 0
solution = False
slnEval = -1
#pop, logbook = algorithms.eaGenerateUpdate(toolbox, ngen=5, stats=stats, halloffame=hof)
while gen < generations:
print("Running evals for gen: " + str(gen))
# Generate a new population
self.population[:] = self.pop_boundaries(self.toolbox.generate())
# Evaluate the individuals
fitnesses = []
endpoints = []
for i in range(self.pop_size):
#fit, endpoint, averageV, distance_fs, median_velocity = self.toolbox.evaluate(self.population[i], map_elites = False, all_novelty = True) #potential issue here
fit, descriptors = self.toolbox.evaluate(self.population[i], map_elites = False, all_novelty = True) #potential issue here
#print("rfit = " + str(fit))
if self.nearest_neighbours:
endpoints.append(descriptors[0])
elif self.average_velocity:
endpoints.append(descriptors[1])
elif self.distance_fs:
endpoints.append(descriptors[2])
else:
print("Novelty behavioural description undefined")
fitnesses.append(fit)
if fit >= self.solution_threshold:
slnEval = (gen*self.pop_size) + i + 1
print("Solution found! On eval: " + str(slnEval)) #calculate current evaluation count
print(self.population[i])
solution = True
break
n_fitnesses, temp_g_novelty_scores = self.get_n_fitnesses(fitnesses, endpoints)
#print("Combined fitnesses for gen " + str(gen) + ":")
#print(n_fitnesses)
for ind, fit in zip(self.population, n_fitnesses):
fitness = []
fitness.append(fit)
ind.fitness.values = fitness
if solution:
print("break triggered")
break
self.update_records(gen)
gen +=1
return slnEval
class NIPES(NCMAES):
np.random.seed()
multi_dimensional_descriptor_indexes = []
multi_dimensional_descriptor = False
descriptor_strings = ["", "Endpoints", "Average Velocity", "Distance from start", "Median Velocity"]
def __init__(self, individual_size, eval_function, pop_size = 10, novelty_ratio = 1.0, sigma = 1.0, threshold = 0.95, maximise = True):
self.solution_size = individual_size
self.pop_size = pop_size
self.maximise = maximise
self.eval_function = eval_function
self.sigma = sigma
self.solution_threshold = threshold
#default novelty descriptor is set to K nearest neighbours
self.nearest_neighbours = True;
self.average_velocity = False;
self.distance_fs = False;
#novelty archive that holds a list of past solution endpoints for using to help calculate novelty of future solutions
self.archive = []
#novelty is calculated using the K nearest neighbours in the current population and the archive
self.K = 15
#novelty archive is updated with solutions that achieve a novelty score above the threshold
self.add_threshold = 0.9
#novelty archive also employed stochastic selection to ensure random members are also added to archive
self.add_chance = 0.4
#ratio determines what weight to give novelty in combined fitness formula
self.novelty_ratio = novelty_ratio
self.novelty_decrement = 0.05
self.pop_stag_threshold = 0.05
self.best_fits_stag_threshold = 0.05
self.maxEvals = 10000
self.set_up()
def restart(self):
print("RESTART TRIGGERED. Current eval count = " + str(self.slnEval))
print("Current novelty ratio: " + str(self.novelty_ratio))
self.pop_size = self.pop_size*2
print("New pop size: " + str(self.pop_size))
self.set_up()
self.novelty_ratio = 1.0
#self.archive[:] = []
self.best_fits[:] = []
print("New novelty ratio: " + str(self.novelty_ratio))
def switch_to_multi_dimensional_descriptor(self, descriptorIndexes):
if(len(self.multi_dimensional_descriptor_indexes)==0):
for i in range(len(descriptorIndexes)):
self.multi_dimensional_descriptor_indexes.append(descriptorIndexes[i])
self.nearest_neighbours = False
self.average_velocity = False
self.distance_fs = False
self.median_velocity = False
self.multi_dimensional_descriptor = True
def run_algorithm(self, maxEvals):
self.maxEvals = maxEvals
np.random.seed() # reset seed
self.refresh()
self.archive[:] = []
self.g_behavioural_descriptors[:] = []
self.g_novelty_scores[:] = []
self.g_objective_fitnesses[:] = []
behavioural_descriptor = ""
if self.nearest_neighbours:
behavioural_descriptor += "Endpoints"
elif self.average_velocity:
behavioural_descriptor += "Average Velocity"
elif self.distance_fs:
behavioural_descriptor += "Distance from start point"
elif self.median_velocity:
behavioural_descriptor += "Median Velocity"
elif self.multi_dimensional_descriptor:
behavioural_descriptor += "Multi-Dimensional Descriptor: "
for i in range(len(self.multi_dimensional_descriptor_indexes)):
behavioural_descriptor += self.descriptor_strings[self.multi_dimensional_descriptor_indexes[i]]
if(i == len(self.multi_dimensional_descriptor_indexes)-1):
pass
else:
behavioural_descriptor += ", "
print("Running NIPES evaluation for " + str(maxEvals) + " maximum evals with an initial population size of " + str(self.pop_size) + ", novelty behavioural descriptor is: " + behavioural_descriptor)
hof = tools.HallOfFame(1)
gen = 0
solution = False
self.slnEval = 0
self.best_fits = []
#pop, logbook = algorithms.eaGenerateUpdate(toolbox, ngen=5, stats=stats, halloffame=hof)
while self.slnEval < self.maxEvals:
print("Running evals for gen: " + str(gen))
#store the behavioural descriptors, objective fitnesses, and novelty scores of each memeber of current generation
temp_g_behavioural_descriptors = []
temp_g_objective_fitnesses = []
temp_g_novelty_scores = []
# Generate a new population
self.population[:] = self.pop_boundaries(self.toolbox.generate())
# Evaluate the individuals
fitnesses = []
endpoints = []
for i in range(self.pop_size):
#fit, endpoint, averageV, distance_fs, median_velocity = self.toolbox.evaluate(self.population[i], map_elites = False, all_novelty = True)
results = self.toolbox.evaluate(self.population[i], map_elites = False, all_novelty = True)
#print("rfit = " + str(fit))
if self.nearest_neighbours:
endpoints.append(results[1])
temp_g_behavioural_descriptors.append(results[1])
elif self.average_velocity:
endpoints.append(results[2])
temp_g_behavioural_descriptors.append(results[2])
elif self.distance_fs:
endpoints.append(results[3])
temp_g_behavioural_descriptors.append(results[3])
elif self.median_velocity:
endpoints.append(results[4])
temp_g_behavioural_descriptors.append(results[4])
elif self.multi_dimensional_descriptor:
temp_descriptor_results = []
for i in range(len(self.multi_dimensional_descriptor_indexes)):
if(self.multi_dimensional_descriptor_indexes[i] == 1):
#appending an endpoint, so must append individual dimensions
temp_descriptor_results.append(results[self.multi_dimensional_descriptor_indexes[i]][0])
temp_descriptor_results.append(results[self.multi_dimensional_descriptor_indexes[i]][1])
else:
temp_descriptor_results.append(results[self.multi_dimensional_descriptor_indexes[i]])
endpoints.append(temp_descriptor_results)
#print("temp_descriptor_results = ")
#print(temp_descriptor_results)
else:
print("Novelty behavioural descriptor undefined")
fit=results[0]
fitnesses.append(fit)
temp_g_objective_fitnesses.append(fit)
self.slnEval += 1
if fit >= self.solution_threshold:
print("Solution found! On eval: " + str(self.slnEval)) #calculate current evaluation count
print(self.population[i])
solution = True
break
if self.slnEval >= self.maxEvals:
print("No solution found at maximum evals")
#return self.slnEval
break
#print("endpoints: ")
#print(endpoints)
n_fitnesses, temp_g_novelty_scores = self.get_n_fitnesses(fitnesses, endpoints)
#print("Combined fitnesses for gen " + str(gen) + ":")
#print(n_fitnesses)
#print("n_fitnesses")
#print(n_fitnesses)
self.g_behavioural_descriptors.append(temp_g_behavioural_descriptors)
self.g_objective_fitnesses.append(temp_g_objective_fitnesses)
self.g_novelty_scores.append(temp_g_novelty_scores)
#for troubleshooting
print("Length of behavioural descriptors: " + str(len(self.g_behavioural_descriptors)))
print("Length of objective fitnesses: " + str(len(self.g_objective_fitnesses)))
print("Length of novelty scores: " + str(len(self.g_novelty_scores)))
if solution:
print("break triggered")
break
if self.slnEval >= self.maxEvals:
break
for ind, fit in zip(self.population, n_fitnesses):
fitness = []
fitness.append(fit)
ind.fitness.values = fitness
self.best_fits.append(np.max(fitnesses))
if self.check_best_fits_stag() or self.check_pop_stag(fitnesses):
self.restart()
else:
self.update_records(gen)
self.decrement_novelty_ratio()
gen+=1
return self.slnEval, self.g_behavioural_descriptors, self.g_objective_fitnesses, self.g_novelty_scores
def decrement_novelty_ratio(self):
if self.novelty_ratio > 0:
self.novelty_ratio = self.novelty_ratio - self.novelty_decrement
def check_best_fits_stag(self, gens = 20):
print("Checking best fits stag. Length of best_fits: " + str(len(self.best_fits)))
if len(self.best_fits) >= gens:
std_dev = np.std(self.best_fits)
del self.best_fits[0]
if std_dev <= self.best_fits_stag_threshold:
print("Best pop fit stag triggered")
return True
else:
return False
else:
return False
def check_pop_stag(self, fitnesses):
print("checking pop stag, length of fitnesses = " + str(len(fitnesses)))
std_dev = np.std(fitnesses)
if std_dev <= self.pop_stag_threshold:
print("Pop stag triggered")
return True
else:
return False
#