Merge pull request #221 from mala-project/naswot_last_touches

NASWOT fixes
mala-project · Oct 8, 2021 · be4ae70 · be4ae70
2 parents 80f3f8f + 58a7011
commit be4ae70
Show file tree

Hide file tree

Showing 2 changed files with 49 additions and 25 deletions.
diff --git a/mala/network/no_training_pruner.py b/mala/network/no_training_pruner.py
@@ -60,7 +60,7 @@ def prune(self, study: "optuna.study.Study", trial:
         objective = ObjectiveNoTraining(self._params, self._data_handler,
                                         self._trial_type)
         surrogate_loss = objective(trial)
-        if surrogate_loss > self._params.hyperparameters.no_training_cutoff:
+        if surrogate_loss < self._params.hyperparameters.no_training_cutoff:
             return True
         else:
             return False
diff --git a/mala/network/objective_no_training.py b/mala/network/objective_no_training.py
@@ -53,30 +53,54 @@ def __call__(self, trial):
         super(ObjectiveNoTraining, self).parse_trial(trial)
 
         # Build the network.
-        net = Network(self.params)
-        device = "cuda" if self.params.use_gpu else "cpu"
-
-        # Load the batchesand get the jacobian.
-        do_shuffle = self.params.running.use_shuffling_for_samplers
-        if self.data_handler.parameters.use_lazy_loading or \
-                self.params.use_horovod:
-            do_shuffle = False
-        if self.params.running.use_shuffling_for_samplers:
-            self.data_handler.mix_datasets()
-        loader = DataLoader(self.data_handler.training_data_set,
-                            batch_size=self.params.running.mini_batch_size,
-                            shuffle=do_shuffle)
-        jac = ObjectiveNoTraining.__get_batch_jacobian(net, loader, device)
-
-        # Loss = - score!
-        surrogate_loss = float('inf')
-        try:
-            surrogate_loss = - ObjectiveNoTraining.__calc_score(jac)
-            surrogate_loss = surrogate_loss.cpu().detach().numpy().astype(
-                np.float64)
-        except RuntimeError:
-            printout("Got a NaN, ignoring sample.")
-        return surrogate_loss
+        surrogate_losses = []
+        for i in range(0, self.params.hyperparameters.
+                       number_training_per_trial):
+            net = Network(self.params)
+            device = "cuda" if self.params.use_gpu else "cpu"
+
+            # Load the batchesand get the jacobian.
+            do_shuffle = self.params.running.use_shuffling_for_samplers
+            if self.data_handler.parameters.use_lazy_loading or \
+                    self.params.use_horovod:
+                do_shuffle = False
+            if self.params.running.use_shuffling_for_samplers:
+                self.data_handler.mix_datasets()
+            loader = DataLoader(self.data_handler.training_data_set,
+                                batch_size=self.params.running.mini_batch_size,
+                                shuffle=do_shuffle)
+            jac = ObjectiveNoTraining.__get_batch_jacobian(net, loader, device)
+
+            # Loss = - score!
+            surrogate_loss = float('inf')
+            try:
+                surrogate_loss = - ObjectiveNoTraining.__calc_score(jac)
+                surrogate_loss = surrogate_loss.cpu().detach().numpy().astype(
+                    np.float64)
+            except RuntimeError:
+                printout("Got a NaN, ignoring sample.")
+            surrogate_losses.append(surrogate_loss)
+
+        if self.params.hyperparameters.number_training_per_trial > 1:
+            printout("Losses from multiple runs are: ")
+            printout(surrogate_losses)
+
+        if self.params.hyperparameters.trial_ensemble_evaluation == "mean":
+            return np.mean(surrogate_losses)
+
+        elif self.params.hyperparameters.trial_ensemble_evaluation == \
+                "mean_std":
+            mean = np.mean(surrogate_losses)
+
+            # Cannot calculate the standar deviation of a bunch of infinities.
+            if np.isinf(mean):
+                return mean
+            else:
+                return np.mean(surrogate_losses) + \
+                       np.std(surrogate_losses)
+        else:
+            raise Exception("No way to estimate the trial metric from ensemble"
+                            " training provided.")
 
     @staticmethod
     def __get_batch_jacobian(net: Network, loader: DataLoader, device) \