Release 1.4.0

automl · Jul 1, 2020 · 5403722 · 5403722
2 parents 77bc0be + 1d96e2b
commit 5403722
Show file tree

Hide file tree

Showing 127 changed files with 3,257 additions and 929 deletions.
diff --git a/README.md b/README.md
@@ -1,9 +1,14 @@
 # CAVE
+
+**NOTE** *This repository is waiting for adoption and there might be longer waiting times for answers.
+However, feel free to report bugs or ask questions in the issues.
+Last known working versions of major dependencies can be found [here](https://github.com/automl/issues/*
+
 ## Configuration Assessment, Visualization and Evaluation
 
 | master ([docs](https://automl.github.io/CAVE/stable/)) | development ([docs](https://automl.github.io/CAVE/dev/)) |
 | --- | --- |
-| [![Build Status](https://travis-ci.org/automl/CAVE.svg?branch=master)](https://travis-ci.org/automl/CAVE) | [![Build Status](https://travis-ci.org/automl/CAVE.svg?branch=development)](https://travis-ci.org/automl/CAVE) |   |
+| [![Build Status](https://travis-ci.org/automl/CAVE.svg?branch=master)](https://travis-ci.org/automl/CAVE) | [![Build Status](https://travis-ci.org/automl/CAVE.svg?branch=development)](https://travis-ci.org/automl/CAVE) |
 
 CAVE is a versatile analysis tool for automatic algorithm configurators. It generates comprehensive reports to
 give insights into the configured algorithm, the instance/feature set and also the configuration tool itself.
@@ -50,6 +55,10 @@ git clone https://github.com/automl/CAVE.git && cd CAVE
 pip install -r requirements.txt
 python3 setup.py install  # (or: python3 setup.py develop)
 ```
+In case you have trouble with your virtualenv+pip setup, try:
+```
+pip install -U setuptools
+```
 Optional: To have some `.png`s automagically available, you also need phantomjs.
 ```
 npm install phantomjs-prebuilt
@@ -114,6 +123,7 @@ Some flags provide additional fine-tuning of the analysis methods:
 - `--cfp_time_slider`: `on` will add a time-slider to the interactive configurator footprint which will result in longer loading times, `off` will generate static png's at the desired quantiles
 - `--cfp_number_quantiles`: determines how many time-steps to prerender from in the configurator footprint
 - `--cot_inc_traj`: how the incumbent trajectory for the cost-over-time plot will be generated if the optimizer is BOHB (from [`racing`, `minimum`, `prefer_higher_budget`])
+- `--pimp_interactive`: whether to plot interactive bokeh-plots for parameter importance
 
 For a full list and further information on how to use CAVE, see:
 `cave --help`
@@ -142,6 +152,11 @@ All your favourite configurators can be processed using [this simple CSV-format]
 cave examples/csv_allinone/run_* --ta_exec_dir examples/csv_allinone/ --output output/csv_example
 ```
 
+#### Auto-PyTorch
+While APT is still in alpha and work in progress at the time of writing, *CAVE* strives to support it as closely as possible.
+There is no unified output available right now, so we provide a [notebook](https://github.com/automl/CAVE/blob/master/examples/autopytorch/apt_notebook.ipynb)
+to showcase some exemplary analysis.
+
 #### SMAC2
 The legacy format of SMAC2 is still supported, though not extensively tested
 ```

diff --git a/cave/__version__.py b/cave/__version__.py
@@ -1 +1 @@
-__version__ = "1.3.3"
+__version__ = "1.4.0"
diff --git a/cave/feature_analysis/__init__.py → cave/analyzer/apt/__init__.py b/cave/feature_analysis/__init__.py → cave/analyzer/apt/__init__.py
diff --git a/cave/analyzer/apt/apt_overview.py b/cave/analyzer/apt/apt_overview.py
@@ -0,0 +1,75 @@
+import os
+from collections import OrderedDict
+
+from pandas import DataFrame
+
+from cave.analyzer.base_analyzer import BaseAnalyzer
+from cave.utils.apt_helpers.apt_warning import apt_warning
+from cave.utils.exceptions import Deactivated
+
+
+class APTOverview(BaseAnalyzer):
+    """
+    Overview of AutoPyTorch-Specific Configurations
+    """
+    def __init__(self, runscontainer):
+        super().__init__(runscontainer)
+        self.output_dir = runscontainer.output_dir
+
+        if self.runscontainer.file_format != "APT":
+            raise Deactivated("{} deactivated, only designed for file-format APT (but detected {})".format(
+                self.get_name(), self.runscontainer.file_format
+            ))
+
+        apt_warning(self.logger)
+
+        html_table = self.run()
+        self.result["General"] = {"table": html_table,
+                                  "tooltip": "AutoPyTorch configuration."}
+
+    def get_name(self):
+        return "Auto-PyTorch Overview"
+
+    def run(self):
+        """ Generate tables. """
+        # Run-specific / budget specific infos
+        runs = self.runscontainer.get_aggregated(keep_folders=True, keep_budgets=False)
+        apt_config_dict = self._runspec_dict_apt_config(runs)
+        results_fit_dict = self._runspec_dict_results_fit(runs)
+
+        for k, runspec_dict in [("Auto-PyTorch Configuration", apt_config_dict),
+                                ("Results of the fit()-call", results_fit_dict)]:
+            order_spec = list(list(runspec_dict.values())[0].keys())  # Get keys of any sub-dict for order
+            html_table_specific = DataFrame(runspec_dict)
+            html_table_specific = html_table_specific.reindex(order_spec)
+            html_table_specific = html_table_specific.to_html(escape=False, justify='left')
+
+            self.result[k] = {"table": html_table_specific}
+
+    def _runspec_dict_results_fit(self, runs):
+        runspec = OrderedDict()
+
+        for idx, run in enumerate(runs):
+            self.logger.debug("Path to folder for run no. {}: {}".format(idx, str(run.path_to_folder)))
+            name = os.path.basename(run.path_to_folder)
+            runspec[name] = OrderedDict()
+            for k, v in run.share_information['results_fit']['info'].items():
+                runspec[name]["Info: " + str(k)] = v
+            for k, v in run.share_information['results_fit']['optimized_hyperparameter_config'].items():
+                runspec[name]["Parameter: " + str(k)] = v
+            runspec[name]["Budget"] = run.share_information['results_fit']['budget']
+            runspec[name]["Loss"] = run.share_information['results_fit']['loss']
+
+        return runspec
+
+    def _runspec_dict_apt_config(self, runs):
+        runspec = OrderedDict()
+
+        for idx, run in enumerate(runs):
+            self.logger.debug("Path to folder for run no. {}: {}".format(idx, str(run.path_to_folder)))
+            name = os.path.basename(run.path_to_folder)
+            runspec[name] = OrderedDict()
+            for k, v in run.share_information['apt_config'].items():
+                runspec[name][k] = v
+
+        return runspec
diff --git a/cave/analyzer/apt/apt_tensorboard.py b/cave/analyzer/apt/apt_tensorboard.py
@@ -0,0 +1,47 @@
+import os
+
+from cave.analyzer.base_analyzer import BaseAnalyzer
+from cave.utils.apt_helpers.apt_warning import apt_warning
+from cave.utils.exceptions import Deactivated
+
+
+class APTTensorboard(BaseAnalyzer):
+    """
+    Overview of AutoPyTorch-Specific Configurations
+    """
+    def __init__(self, runscontainer):
+        super().__init__(runscontainer)
+        if self.runscontainer.file_format != "APT":
+            raise Deactivated("{} deactivated, only designed for file-format APT (but detected {})".format(
+                self.get_name(), self.runscontainer.file_format
+            ))
+        apt_warning(self.logger)
+        self.run()
+
+    def get_name(self):
+        return "Auto-PyTorch Tensorboard"
+
+    def run(self):
+        try:
+            from tensorboard import program
+        except ModuleNotFoundError:
+            raise Deactivated("Please install tensorboard to perform this analysis!")
+
+        if len(self.runscontainer.get_folders()) != 1:
+            raise ValueError("Undefined behaviour for multiple APT-outputs...")
+        run = self.runscontainer.get_aggregated(keep_budgets=False, keep_folders=True)[0]
+
+        # This line will need to be adapted
+        single_tfevents_file = run.share_information['tfevents_paths'][0]
+        tfevents_dir = os.path.split(single_tfevents_file)[0]
+        self.logger.info("Tensorboard base dir: %s", tfevents_dir)
+        print(tfevents_dir)
+
+        #for config in traj['config']:
+        #    self.runscontainer.get_tensorboard_result(config['config'])
+
+        tb = program.TensorBoard()
+        tb.configure(argv=[None, '--logdir', tfevents_dir])
+        url = tb.launch()
+
+        self.result["else"] = " <iframe src=" + url + " width=\"950\" height=\"700\"></iframe> "
diff --git a/cave/analyzer/apt/loss_curves.py b/cave/analyzer/apt/loss_curves.py
@@ -0,0 +1,73 @@
+import os
+from collections import namedtuple
+
+from bokeh.embed import components
+from bokeh.io import output_notebook
+from bokeh.plotting import show
+
+from cave.analyzer.apt.base_apt import BaseAPT
+from cave.reader.runs_container import RunsContainer
+from cave.utils.hpbandster_helpers import format_budgets
+
+Line = namedtuple('Line', ['name', 'time', 'mean', 'upper', 'lower', 'config'])
+
+class LossCurves(BaseAPT):
+    """
+    Only works with AutoPyTorch-instance.
+    Visualize loss-curves of multiple neural networks for comparison in interactive plot.
+    """
+
+    def __init__(self,
+                 runscontainer: RunsContainer,
+                 incumbent_trajectory: str=None,
+                 ):
+        """
+        """
+        super().__init__(runscontainer,
+                         incumbent_trajectory=incumbent_trajectory,
+                         )
+
+        self.rng = self.runscontainer.get_rng()
+
+        self.scenario = self.runscontainer.scenario
+        self.output_dir = os.path.join(self.runscontainer.output_dir, "tensorboard")
+        self.rh = self.runscontainer.get_aggregated(False, False)[0].validated_runhistory
+        # Run-specific / budget specific infos
+        if len(self.runscontainer.get_budgets()) > 1:
+            self.runs = self.runscontainer.get_aggregated(keep_folders=False, keep_budgets=True)
+        else:
+            self.runs = self.runscontainer.get_aggregated(keep_folders=True, keep_budgets=False)
+
+        self.formatted_budgets = format_budgets(self.runscontainer.get_budgets())
+
+        # Will be set during execution:
+        self.plots = []                     # List with paths to '.png's
+
+    def get_name(self):
+        return "Loss Curves"
+
+    def plot(self):
+        """
+        Plot performance over time, using all trajectory entries.
+        max_time denotes max(wallclock_limit, highest recorded time).
+        """
+        #TODO Read in Tensorboard information
+        #TODO interactive loss-plots
+        raise NotImplementedError()
+
+    def get_plots(self):
+        return self.plots
+
+
+    def get_html(self, d=None, tooltip=None):
+        script, div = components(self.plot())
+        if d is not None:
+            d[self.name] = {
+                "bokeh" : (script, div),
+                "tooltip" : self.__doc__,
+            }
+        return script, div
+
+    def get_jupyter(self):
+        output_notebook()
+        show(self.plot())
diff --git a/cave/analyzer/base_analyzer.py b/cave/analyzer/base_analyzer.py
@@ -11,7 +11,18 @@
 
 
 class BaseAnalyzer(object):
-
+    """
+    The base class for analyzing methods. To create a new analyzer, inherit from this class and extend.
+    If you already have an analyzer, but need a wrapper to call it, also inherit it from this class.
+    You should overwrite the "get_name"-method.
+    Currently the initialization calls the analysis. After the analyzer ran, the results should be saved to the member
+    self.result, which is a dictionary with a defined structure.
+    The docstrings (this part) will be used to display a tooltip / help for the analyzer, so it should be a descriptive
+    and concise small paragraph describing the analyzer and it's results.
+    Remember to call super.__init__(runscontainer) in your analyzer's __init__-method. This will initialize the logger,
+    name and important attributes.
+    All configurator data is available via the self.runscontainer.
+    """
     def __init__(self,
                  runscontainer: RunsContainer,
                  *args,
@@ -28,31 +39,38 @@ def __init__(self,
         self.error = False
 
         options = self.runscontainer.analyzing_options
-        if not self.name in options.sections():
-            raise ValueError("Please state in the analyzing options whether or not to run this Analyzer "
-                             "(simply add a line to the .ini file containing [{}])".format(self.name))
-        if not options[self.name].getboolean('run'):
-            raise Deactivated("This method has been deactivated in the options. To enable, just set "
-                                       "[{}][run] = True in the .ini file.".format(self.name))
+        if self.name not in options.sections():
+            self.logger.warning("Please state in the analyzing options whether or not to run this Analyzer "
+                                "(simply add a line to the .ini file containing [{}])".format(self.name))
+        elif not options[self.name].getboolean('run'):
+            raise Deactivated("{0} has been deactivated in the options. To enable, just set "
+                              "[{0}][run] = True in the .ini file or pass the appropriate flags.".format(self.name))
 
         self.options = options[self.name]
         for k, v in kwargs.items():
             if v is not None:
                 self.options[k] = v
         self.logger.debug("{} initialized with options: {}".format(self.name, str(dict(self.options))))
 
-
     def plot_bokeh(self):
-        """ This function needs to be called if bokeh-plots are to be displayed in notebook AND saved to webpage."""
+        """
+        This function should recreate the bokeh-plot from scratch with as little overhead as possible. This is needed to
+        show the bokeh plot in jupyter AND save it to the webpage. The bokeh plot needs to be recreated to be displayed
+        in different outputs for reasons beyond out control. So save all analysis results in the class and simply redo
+        the plotting with this function.
+        This function needs to be called if bokeh-plots are to be displayed in notebook AND saved to html-result.
+        """
         raise NotImplementedError()
 
     def get_html(self, d=None, tooltip=None) -> Tuple[str, str]:
-        """General reports in html-format, to be easily integrated in html-code. ALSO FOR BOKEH-OUTPUT.
+        """General reports in html-format, to be easily integrated in html-code. WORKS ALSO FOR BOKEH-OUTPUT.
 
         Parameters
         ----------
         d: Dictionary
             a dictionary that will be later turned into a website
+        tooltip: string
+            tooltip to be displayed in report. optional, will overwrite the docstrings that are used by default.
 
         Returns
         -------
@@ -66,7 +84,7 @@ def get_html(self, d=None, tooltip=None) -> Tuple[str, str]:
             d[self.name] = self.result
             d[self.name]['tooltip'] = tooltip if tooltip is not None else self.__doc__
         script, div = HTMLBuilder("", "", "").add_layer(None, self.result)
-        combine =  "\n\n".join([script, div])
+        combine = "\n\n".join([script, div])
         return combine
 
     def get_jupyter(self):
@@ -84,6 +102,10 @@ def get_jupyter(self):
 
     @classmethod
     def check_for_bokeh(cls, d):
+        """
+        Check if there is bokeh-plots in the output of this analyzer by checking the result-dictionary for the bokeh
+        keyword.
+        """
         result = []  # all bokeh models
         for k, v in d.items():
             if isinstance(v, dict):

diff --git a/cave/analyzer/budgets/__init__.py b/cave/analyzer/budgets/__init__.py