Skip to content

Commit

Permalink
move data-collection functions
Browse files Browse the repository at this point in the history
  • Loading branch information
SimonBlanke committed Apr 20, 2024
1 parent 5f2931d commit b27cd9e
Show file tree
Hide file tree
Showing 5 changed files with 100 additions and 83 deletions.
5 changes: 5 additions & 0 deletions src/surfaces/data_collector/config.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# Author: Simon Blanke
# Email: simon.blanke@yahoo.com
# License: MIT License


import os

here_path = os.path.dirname(os.path.realpath(__file__))
Expand Down
91 changes: 91 additions & 0 deletions src/surfaces/data_collector/surfaces_data_collector.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,16 @@
# Author: Simon Blanke
# Email: simon.blanke@yahoo.com
# License: MIT License


import numpy as np
import pandas as pd
from functools import reduce

from search_data_collector import SqlSearchData
from hyperactive import Hyperactive
from hyperactive.optimizers import GridSearchOptimizer as HyperactiveGridSearchOptimizer
from gradient_free_optimizers import GridSearchOptimizer

from .config import default_search_data_path

Expand All @@ -8,3 +20,82 @@ def __init__(self, path=None) -> None:
if path is None:
path = default_search_data_path
super().__init__(path, func2str=True)

def _init_search_data(self, objective_function, search_space):
self.para_names = [key for key in list(search_space.keys())]
self.search_data_length = 0

dim_sizes_list = [len(array) for array in search_space.values()]
self.search_space_size = reduce((lambda x, y: x * y), dim_sizes_list)

search_data_cols = self.para_names + ["score"]
self.search_data = pd.DataFrame([], columns=search_data_cols)

def _array_search_space(self, objective_function, search_space):
while self.search_data_length < self.search_space_size:
print("\n ------------ search_space_size", self.search_space_size)
opt = GridSearchOptimizer(
search_space,
direction="orthogonal",
initialize={},
)
opt.search(
objective_function,
n_iter=int(self.search_space_size * 1),
verbosity=["progress_bar"],
)

self.search_data = pd.concat(
[self.search_data, opt.search_data],
ignore_index=True,
)

self.search_data = self.search_data.drop_duplicates(subset=self.para_names)
self.search_data_length = len(self.search_data)
print(
"\n ------------ self.search_data_length", self.search_data_length, "\n"
)

def _list_search_space(self, objective_function, search_space):
while self.search_data_length < self.search_space_size:
print("\n ------------ search_space_size", self.search_space_size)

hyper = Hyperactive(verbosity=["progress_bar"])
hyper.add_search(
objective_function,
search_space,
initialize={},
n_iter=self.search_space_size,
optimizer=HyperactiveGridSearchOptimizer(direction="orthogonal"),
memory_warm_start=self.search_data,
)
hyper.run()

self.search_data = pd.concat(
[self.search_data, hyper.search_data(objective_function)],
ignore_index=True,
)

self.search_data = self.search_data.drop_duplicates(subset=self.para_names)
self.search_data_length = len(self.search_data)
print(
"\n ------------ self.search_data_length", self.search_data_length, "\n"
)

def collect(
self,
objective_function,
search_space,
table=None,
if_exists="append",
):
if table is None:
table = objective_function.__name__

self._init_search_data(objective_function, search_space)
if isinstance(search_space[self.para_names[0]], np.ndarray):
self._array_search_space(objective_function, search_space)
else:
self._list_search_space(objective_function, search_space)

self.save(table, self.search_data, if_exists)
4 changes: 0 additions & 4 deletions src/surfaces/test_functions/_base_test_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@

import time

from ..data_collector import SurfacesDataCollector


class BaseTestFunction:
explanation = """ """
Expand All @@ -18,8 +16,6 @@ class BaseTestFunction:
pure_objective_function: callable

def __init__(self):
self.sql_data = SurfacesDataCollector()

self.create_objective_function()

def create_objective_function(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,42 +28,6 @@ def __init__(self, metric=None, sleep=0, load_search_data=False):

# self.objective_function.__func__.__name__ = self.__name__

def _collect_data(self, table=None, if_exists="append"):
if table is None:
table = self.__name__

search_space = self.search_space()
search_data_cols = self.para_names + ["score"]
search_data = pd.DataFrame([], columns=search_data_cols)
search_data_length = 0

dim_sizes_list = [len(array) for array in search_space.values()]
search_space_size = reduce((lambda x, y: x * y), dim_sizes_list)

while search_data_length < search_space_size:
print("\n ------------ search_space_size", search_space_size)

hyper = Hyperactive(verbosity=["progress_bar"])
hyper.add_search(
self.objective_function,
search_space,
initialize={},
n_iter=search_space_size,
optimizer=GridSearchOptimizer(direction="orthogonal"),
memory_warm_start=search_data,
)
hyper.run()

search_data = pd.concat(
[search_data, hyper.search_data(self.objective_function)],
ignore_index=True,
)

search_data = search_data.drop_duplicates(subset=self.para_names)
search_data_length = len(search_data)
print("\n ------------ search_data_length", search_data_length, "\n")
self.sql_data.save(self.__name__, search_data, if_exists)

def objective_function_loaded(self, params):
try:
parameter_d = params.para_dict
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,10 @@ def add_dim(search_space_: dict, dim: int, min, max):
values = list(values)
search_space_[dim_str] = values

if isinstance(min, list) and isinstance(max, list) and len(min) == len(max):
if isinstance(min, list) and isinstance(max, list):
if len(min) != len(max) or len(min) != self.n_dim:
raise ValueError

for dim, (min_, max_) in enumerate(zip(min, max)):
add_dim(search_space_, dim, min_, max_)
else:
Expand All @@ -63,45 +66,3 @@ def search_space_from_blank(search_space_blank, value_types):
dim_values = list(dim_values)
search_space[para_names] = dim_values
return search_space

def search_data(self):
self.search_space = self.search_space(value_types="array")

para_names = list(self.search_space.keys())
search_data_cols = para_names + ["score"]
search_data = pd.DataFrame([], columns=search_data_cols)
search_data_length = 0

dim_sizes_list = [len(array) for array in self.search_space.values()]
search_space_size = reduce((lambda x, y: x * y), dim_sizes_list)

self.create_objective_function()

while search_data_length < search_space_size:
print("\n ------------ search_space_size", search_space_size)
opt = GridSearchOptimizer(
self.search_space,
direction="orthogonal",
initialize={},
)
opt.search(
self.pure_objective_function,
n_iter=int(search_space_size * 1),
verbosity=["progress_bar"],
)

search_data = pd.concat(
[search_data, opt.search_data],
ignore_index=True,
)

search_data = search_data.drop_duplicates(subset=para_names)
search_data_length = len(search_data)
print("\n ------------ search_data_length", search_data_length, "\n")

return search_data

def _collect_data(self, table=None, if_exists="append"):
if table is None:
table = self.__name__
self.sql_data.save(table, self.search_data(), if_exists)

0 comments on commit b27cd9e

Please sign in to comment.