Merge pull request #173 from Epistimio/release-v0.1.2rc2

Release v0.1.2rc2
Epistimio · Mar 29, 2019 · a0a77d6 · a0a77d6
2 parents ea78af6 + 3e58711
commit a0a77d6
Show file tree

Hide file tree

Showing 29 changed files with 460 additions and 113 deletions.
diff --git a/docs/src/install/core.rst b/docs/src/install/core.rst
@@ -26,7 +26,7 @@ Recommended for users
 
 .. code-block:: sh
 
-   pip install git+https://github.com/mila-udem/orion.git@develop
+   pip install git+https://github.com/epistimio/orion.git@develop
 
 Note that the bleeding-edge branch is develop. The master branch is the same as the latest version
 on PyPI.

diff --git a/docs/src/install/database.rst b/docs/src/install/database.rst
@@ -100,8 +100,8 @@ Oríon configuration files.
 
       database:
         type: 'mongodb'
-          name: 'orion_test'
-          host: 'mongodb://user:pass@localhost'
+        name: 'orion_test'
+        host: 'mongodb://user:pass@localhost'
 
 As it will be referenced with detail in configuration's documentation (TODO),
 the environmental variable definitions precede the ones within files in default

diff --git a/docs/src/user/pytorch.rst b/docs/src/user/pytorch.rst
@@ -148,10 +148,21 @@ Hunting Options
                             either here or in a config)
       -c path-to-config, --config path-to-config
                             user provided orion configuration file
-      --max-trials #        number of jobs/trials to be completed (default:
-                            inf/until preempted)
-      --pool-size #         number of concurrent workers to evaluate candidate
-                            samples (default: 10)
+      --max-trials #        number of trials to be completed for the experiment.
+                            This value will be saved within the experiment
+                            configuration and reused across all workers to
+                            determine experiment's completion. (default: inf/until
+                            preempted)
+      --worker-trials #     number of trials to be completed for this worker. If
+                            the experiment is completed, the worker will die even
+                            if it did not reach its maximum number of trials
+                            (default: inf/until preempted)
+      --pool-size #         number of simultaneous trials the algorithm should
+                            suggest. This is useful if many workers are executed
+                            in parallel and the algorithm has a strategy to sample
+                            non-independant trials simultaneously. Otherwise, it
+                            is better to leave `pool_size` to 1 and set a Strategy
+                            for Oríon's producer. (default: 1)
 
 ``name``
 
@@ -166,9 +177,16 @@ command hunt, including ``name``, ``pool-size`` and ``max-trials``.
 
 The maximum number of trials tried during an experiment.
 
+``worker-trials``
+
+The maximum number of trials to be executed by a worker (a single call to ``orion hunt [...]``).
+
 ``pool-size``
 
-The number of trials which are generated by the algorithm each time it is interrogated.
+The number of trials which are generated by the algorithm each time it is interrogated. This is
+useful if many workers are executed in parallel and the algorithm has a strategy to sample
+non-independant trials simultaneously. Otherwise, it is better to leave ``pool_size`` to its default
+value 1.
 
 
 Results

diff --git a/src/orion/algo/base.py b/src/orion/algo/base.py
@@ -112,9 +112,20 @@ def __init__(self, space, **kwargs):
                     param.lower() in OptimizationAlgorithm.typenames:
                 # pylint: disable=too-many-function-args
                 param = OptimizationAlgorithm(param, space)
+            elif varname == 'seed':
+                self.seed_rng(param)
 
             setattr(self, varname, param)
 
+    def seed_rng(self, seed):
+        """Seed the state of the random number generator.
+
+        :param seed: Integer seed for the random number generator.
+
+        .. note:: This methods does nothing if the algorithm is deterministic.
+        """
+        pass
+
     @abstractmethod
     def suggest(self, num=1):
         """Suggest a `num` of new sets of parameters.

diff --git a/src/orion/algo/random.py b/src/orion/algo/random.py
@@ -8,18 +8,29 @@
    :synopsis: Draw and deliver samples from prior defined in problem's domain.
 
 """
+import numpy
 
 from orion.algo.base import BaseAlgorithm
 
 
 class Random(BaseAlgorithm):
     """Implement a algorithm that samples randomly from the problem's space."""
 
-    def __init__(self, space):
-        """Random sampler takes no other hyperparameter that the problem's space
+    def __init__(self, space, seed=None):
+        """Random sampler takes no other hyperparameter than the problem's space
         itself.
+
+        :param space: `orion.algo.space.Space` of optimization.
+        :param seed: Integer seed for the random number generator.
+        """
+        super(Random, self).__init__(space, seed=seed)
+
+    def seed_rng(self, seed):
+        """Seed the state of the random number generator.
+
+        :param seed: Integer seed for the random number generator.
         """
-        super(Random, self).__init__(space)
+        self.rng = numpy.random.RandomState(seed)
 
     def suggest(self, num=1):
         """Suggest a `num` of new sets of parameters. Randomly draw samples
@@ -30,7 +41,7 @@ def suggest(self, num=1):
         .. note:: New parameters must be compliant with the problem's domain
            `orion.algo.space.Space`.
         """
-        return self.space.sample(num)
+        return self.space.sample(num, seed=self.rng.randint(0, 10000))
 
     def observe(self, points, results):
         """Observe evaluation `results` corresponding to list of `points` in

diff --git a/src/orion/core/cli/base.py b/src/orion/core/cli/base.py
@@ -83,6 +83,12 @@ def get_basic_args_group(parser):
         help="experiment's unique name; "
              "(default: None - specified either here or in a config)")
 
+    basic_args_group.add_argument(
+        '-u', '--user',
+        type=str,
+        help="user associated to experiment's unique name; "
+             "(default: $USER - can be overriden either here or in a config)")
+
     basic_args_group.add_argument('-c', '--config', type=argparse.FileType('r'),
                                   metavar='path-to-config', help="user provided "
                                   "orion configuration file")

diff --git a/src/orion/core/cli/hunt.py b/src/orion/core/cli/hunt.py
@@ -44,7 +44,10 @@ def add_subparser(parser):
 
     orion_group.add_argument(
         "--pool-size", type=int, metavar='#',
-        help="number of concurrent workers to evaluate candidate samples "
+        help="number of simultaneous trials the algorithm should suggest. "
+             "This is useful if many workers are executed in parallel and the algorithm has a "
+             "strategy to sample non-independant trials simultaneously. Otherwise, it is better "
+             "to leave `pool_size` to 1 and set a Strategy for Oríon's producer. "
              "(default: %s)" % resolve_config.DEF_CMD_POOL_SIZE[1])
 
     evc_cli.get_branching_args_group(hunt_parser)

diff --git a/src/orion/core/io/experiment_builder.py b/src/orion/core/io/experiment_builder.py
@@ -178,6 +178,9 @@ def fetch_full_config(self, cmdargs, use_db=True):
         exp_config = resolve_config.merge_configs(
             default_options, env_vars, copy.deepcopy(config_from_db), cmdconfig, cmdargs, metadata)
 
+        if 'user' in exp_config:
+            exp_config['metadata']['user'] = exp_config['user']
+
         # TODO: Find a better solution
         if isinstance(exp_config['algorithms'], dict) and len(exp_config['algorithms']) > 1:
             for key in list(config_from_db['algorithms'].keys()):
@@ -218,7 +221,7 @@ def build_view_from(self, cmdargs):
                                "Please use either `name` cmd line arg or provide "
                                "one in orion's configuration file.")
 
-        return ExperimentView(local_config["name"])
+        return ExperimentView(local_config["name"], local_config.get('user', None))
 
     def build_from(self, cmdargs):
         """Build a fully configured (and writable) experiment based on full configuration.
@@ -264,7 +267,7 @@ def build_from_config(self, config):
         config.pop('database', None)
         config.pop('resources', None)
 
-        experiment = Experiment(config['name'])
+        experiment = Experiment(config['name'], config.get('user', None))
 
         # Finish experiment's configuration and write it to database.
         experiment.configure(config)

diff --git a/src/orion/core/io/resolve_config.py b/src/orion/core/io/resolve_config.py
@@ -59,7 +59,7 @@ def is_exe(path):
 # Default settings for command line arguments (option, description)
 DEF_CMD_MAX_TRIALS = (infinity, 'inf/until preempted')
 DEF_CMD_WORKER_TRIALS = (infinity, 'inf/until preempted')
-DEF_CMD_POOL_SIZE = (10, str(10))
+DEF_CMD_POOL_SIZE = (1, str(1))
 
 DEF_CONFIG_FILES_PATHS = [
     os.path.join(orion.core.DIRS.site_data_dir, 'orion_config.yaml.example'),
@@ -109,6 +109,7 @@ def fetch_default_options():
 
     # get some defaults
     default_config['name'] = None
+    default_config['user'] = getpass.getuser()
     default_config['max_trials'] = DEF_CMD_MAX_TRIALS[0]
     default_config['worker_trials'] = DEF_CMD_WORKER_TRIALS[0]
     default_config['pool_size'] = DEF_CMD_POOL_SIZE[0]
@@ -254,10 +255,10 @@ def fetch_user_repo(user_script):
     dir_path = os.path.dirname(os.path.abspath(user_script))
     try:
         git_repo = git.Repo(dir_path, search_parent_directories=True)
-    except git.exc.InvalidGitRepositoryError as e:
+    except git.exc.InvalidGitRepositoryError:
         git_repo = None
-        raise RuntimeError('Script {} should be in a git repository.'.format(
-            os.path.abspath(user_script))) from e
+        logging.warning('Script %s is not in a git repository. Code modification '
+                        'won\'t be detected.', os.path.abspath(user_script))
     return git_repo
 
 

diff --git a/src/orion/core/worker/__init__.py b/src/orion/core/worker/__init__.py
@@ -21,6 +21,24 @@
 log = logging.getLogger(__name__)
 
 
+def reserve_trial(experiment, producer):
+    """Reserve a new trial, or produce and reserve a trial if none are available."""
+    trial = experiment.reserve_trial(score_handle=producer.algorithm.score)
+
+    if trial is None:
+        log.debug("#### Failed to pull a new trial from database.")
+
+        log.debug("#### Fetch most recent completed trials and update algorithm.")
+        producer.update()
+
+        log.debug("#### Produce new trials.")
+        producer.produce()
+
+        return reserve_trial(experiment, producer)
+
+    return trial
+
+
 def workon(experiment, worker_trials=None):
     """Try to find solution to the search problem defined in `experiment`."""
     producer = Producer(experiment)
@@ -32,28 +50,24 @@ def workon(experiment, worker_trials=None):
     except (OverflowError, TypeError):
         # When worker_trials is inf
         iterator = itertools.count()
-    for _ in iterator:
-        log.debug("#### Try to reserve a new trial to evaluate.")
-        trial = experiment.reserve_trial(score_handle=producer.algorithm.score)
 
-        if trial is None:
-            log.debug("#### Failed to pull a new trial from database.")
+    for _ in iterator:
+        log.debug("#### Poll for experiment termination.")
+        if experiment.is_done:
+            break
 
-            log.debug("#### Fetch most recent completed trials and update algorithm.")
-            producer.update()
+        log.debug("#### Try to reserve a new trial to evaluate.")
+        trial = reserve_trial(experiment, producer)
 
-            log.debug("#### Poll for experiment termination.")
-            if experiment.is_done:
-                break
+        log.debug("#### Successfully reserved %s to evaluate. Consuming...", trial)
+        consumer.consume(trial)
 
-            log.debug("#### Produce new trials.")
-            producer.produce()
+    stats = experiment.stats
 
-        else:
-            log.debug("#### Successfully reserved %s to evaluate. Consuming...", trial)
-            consumer.consume(trial)
+    if not stats:
+        log.info("No trials completed.")
+        return
 
-    stats = experiment.stats
     best = Database().read('trials', {'_id': stats['best_trials_id']})[0]
 
     stats_stream = io.StringIO()

diff --git a/src/orion/core/worker/experiment.py b/src/orion/core/worker/experiment.py
@@ -89,7 +89,7 @@ class Experiment(object):
                  '_node', '_last_fetched')
     non_branching_attrs = ('pool_size', 'max_trials')
 
-    def __init__(self, name):
+    def __init__(self, name, user=None):
         """Initialize an Experiment object with primary key (:attr:`name`, :attr:`user`).
 
         Try to find an entry in `Database` with such a key and config this object
@@ -112,7 +112,8 @@ def __init__(self, name):
         self.name = name
         self._node = None
         self.refers = {}
-        user = getpass.getuser()
+        if user is None:
+            user = getpass.getuser()
         self.metadata = {'user': user}
         self.pool_size = None
         self.max_trials = None
@@ -548,6 +549,8 @@ def stats(self):
             'params': 1
             }
         completed_trials = self.fetch_trials(query, selection)
+        if not completed_trials:
+            return dict()
         stats = dict()
         stats['trials_completed'] = len(completed_trials)
         stats['best_trials_id'] = None
@@ -583,12 +586,12 @@ def _instantiate_config(self, config):
         # Just overwrite everything else given
         for section, value in config.items():
             if section not in self.__slots__:
-                log.warning("Found section '%s' in configuration. Experiments "
-                            "do not support this option. Ignoring.", section)
+                log.info("Found section '%s' in configuration. Experiments "
+                         "do not support this option. Ignoring.", section)
                 continue
             if section.startswith('_'):
-                log.warning("Found section '%s' in configuration. "
-                            "Cannot set private attributes. Ignoring.", section)
+                log.info("Found section '%s' in configuration. "
+                         "Cannot set private attributes. Ignoring.", section)
                 continue
 
             # Copy sub configuration to value confusing side-effects
@@ -619,9 +622,7 @@ def _instantiate_config(self, config):
             self.refers['adapter'] = Adapter.build(self.refers['adapter'])
 
         if not self.producer.get('strategy'):
-            log.warning('You have not set a producer strategy, the basic '
-                        'NoParallelStrategy will be used')
-            self.producer = {'strategy': Strategy(of_type="NoParallelStrategy")}
+            self.producer = {'strategy': Strategy(of_type="MaxParallelStrategy")}
         elif not isinstance(self.producer.get('strategy'), BaseParallelStrategy):
             self.producer = {'strategy': Strategy(of_type=self.producer['strategy'])}
 
@@ -633,8 +634,17 @@ def _branch_config(self, conflicts, branching_configuration):
         """
         experiment_brancher = ExperimentBranchBuilder(conflicts, branching_configuration)
 
-        if not experiment_brancher.is_resolved or experiment_brancher.auto_resolution:
+        needs_manual_resolution = (not experiment_brancher.is_resolved or
+                                   experiment_brancher.auto_resolution)
+
+        if needs_manual_resolution:
             branching_prompt = BranchingPrompt(experiment_brancher)
+
+            if not sys.__stdin__.isatty():
+                raise ValueError(
+                    "Configuration is different and generates a branching event:\n{}".format(
+                        branching_prompt.get_status()))
+
             branching_prompt.cmdloop()
 
             if branching_prompt.abort or not experiment_brancher.is_resolved:
@@ -691,7 +701,7 @@ class ExperimentView(object):
                         ["fetch_trials", "fetch_trials_tree", "fetch_completed_trials",
                          "connect_to_version_control_tree"])
 
-    def __init__(self, name):
+    def __init__(self, name, user=None):
         """Initialize viewed experiment object with primary key (:attr:`name`, :attr:`user`).
 
         Build an experiment from configuration found in `Database` with a key (name, user).
@@ -704,7 +714,7 @@ def __init__(self, name):
         :param name: Describe a configuration with a unique identifier per :attr:`user`.
         :type name: str
         """
-        self._experiment = Experiment(name)
+        self._experiment = Experiment(name, user)
 
         if self._experiment.id is None:
             raise ValueError("No experiment with given name '%s' for user '%s' inside database, "

diff --git a/src/orion/core/worker/primary_algo.py b/src/orion/core/worker/primary_algo.py
@@ -41,6 +41,10 @@ def __init__(self, space, algorithm_config):
         self.transformed_space = build_required_space(requirements, self.space)
         self.algorithm.space = self.transformed_space
 
+    def seed_rng(self, seed):
+        """Seed the state of the algorithm's random number generator."""
+        self.algorithm.seed_rng(seed)
+
     def suggest(self, num=1):
         """Suggest a `num` of new sets of parameters.
 

diff --git a/src/orion/core/worker/producer.py b/src/orion/core/worker/producer.py
@@ -63,14 +63,16 @@ def produce(self):
             log.debug("### Algorithm suggests new points.")
 
             new_points = self.naive_algorithm.suggest(self.pool_size)
+            # Dummy sample to keep the original algorithm's rng state incrementing.
+            self.algorithm.suggest(self.pool_size)
 
             for new_point in new_points:
                 log.debug("#### Convert point to `Trial` object.")
                 new_trial = format_trials.tuple_to_trial(new_point, self.space)
                 try:
                     new_trial.parents = self.naive_trials_history.children
-                    self.experiment.register_trial(new_trial)
                     log.debug("#### Register new trial to database: %s", new_trial)
+                    self.experiment.register_trial(new_trial)
                     sampled_points += 1
                 except DuplicateKeyError:
                     log.debug("#### Duplicate sample. Updating algo to produce new ones.")