Merge pull request #131 from artefactory/dev-tests

ADD: Basic testing for baseline models
artefactory · Jul 19, 2024 · 82152d3 · 82152d3
2 parents dc103bc + f4e9cda
commit 82152d3
Show file tree

Hide file tree

Showing 11 changed files with 195 additions and 83 deletions.
diff --git a/choice_learn/models/base_model.py b/choice_learn/models/base_model.py
@@ -622,12 +622,12 @@ def evaluate(self, choice_dataset, sample_weight=None, batch_size=-1, mode="eval
             batch_loss = tf.reduce_mean(batch_losses)
         return batch_loss
 
-    def _lbfgs_train_step(self, dataset, sample_weight=None):
+    def _lbfgs_train_step(self, choice_dataset, sample_weight=None):
         """Create a function required by tfp.optimizer.lbfgs_minimize.
 
         Parameters
         ----------
-        dataset: ChoiceDataset
+        choice_dataset: ChoiceDataset
             Dataset on which to estimate the paramters.
         sample_weight: np.ndarray, optional
             Sample weights to apply, by default None
@@ -694,7 +694,7 @@ def f(params_1d):
                 assign_new_model_parameters(params_1d)
                 # calculate the loss
                 loss_value = self.evaluate(
-                    dataset, sample_weight=sample_weight, batch_size=-1, mode="eval"
+                    choice_dataset, sample_weight=sample_weight, batch_size=-1, mode="eval"
                 )
                 if self.regularization is not None:
                     regularization = tf.reduce_sum(
@@ -722,14 +722,14 @@ def f(params_1d):
         f.history = []
         return f
 
-    def _fit_with_lbfgs(self, dataset, sample_weight=None, verbose=0):
+    def _fit_with_lbfgs(self, choice_dataset, sample_weight=None, verbose=0):
         """Fit function for L-BFGS optimizer.
 
         Replaces the .fit method when the optimizer is set to L-BFGS.
 
         Parameters
         ----------
-        dataset : ChoiceDataset
+        choice_dataset : ChoiceDataset
             Dataset to be used for coefficients estimations
         epochs : int
             Maximum number of epochs allowed to reach minimum
@@ -748,7 +748,7 @@ def _fit_with_lbfgs(self, dataset, sample_weight=None, verbose=0):
         import tensorflow_probability as tfp
 
         epochs = self.epochs
-        func = self._lbfgs_train_step(dataset, sample_weight=sample_weight)
+        func = self._lbfgs_train_step(choice_dataset=choice_dataset, sample_weight=sample_weight)
 
         # convert initial model parameters to a 1D tf.Tensor
         init_params = tf.dynamic_stitch(func.idx, self.trainable_weights)

diff --git a/choice_learn/models/baseline_models.py b/choice_learn/models/baseline_models.py
@@ -1,4 +1,5 @@
 """Models to be used as baselines for choice modeling. Nothing smart here."""
+
 import numpy as np
 import tensorflow as tf
 
@@ -12,6 +13,11 @@ def __init__(self, **kwargs):
         """Initialize of the model."""
         super().__init__(**kwargs)
 
+    @property
+    def trainable_weights(self):
+        """Return an empty list."""
+        return []
+
     def compute_batch_utility(
         self,
         shared_features_by_choice,
@@ -44,12 +50,19 @@ def compute_batch_utility(
         # In order to avoid unused arguments warnings
         _ = shared_features_by_choice, items_features_by_choice, choices
         return np.squeeze(
-            np.random.uniform(shape=(available_items_by_choice.shape), minval=0, maxval=1)
-        )
+            np.random.uniform(size=(available_items_by_choice.shape), low=0.0, high=1.0)
+        ).astype(np.float32)
+
+    def fit(self, *args, **kwargs):
+        """Make sure that nothing happens during .fit."""
+        _ = kwargs
+        _ = args
+        return {}
 
-    def fit(**kwargs):
+    def _fit_with_lbfgs(self, *args, **kwargs):
         """Make sure that nothing happens during .fit."""
         _ = kwargs
+        _ = args
         return {}
 
 
@@ -64,11 +77,38 @@ def __init__(self, **kwargs):
         super().__init__(**kwargs)
         self.weights = []
 
-    def fit(self, choice_dataset, **kwargs):
-        """Compute the choice frequency of each product and defines it as choice probabilities."""
+    @property
+    def trainable_weights(self):
+        """Return the weights."""
+        return self.weigths
+
+    def fit(self, choice_dataset, *args, **kwargs):
+        """Compute the choice frequency of each product and defines it as choice probabilities.
+
+        Parameters
+        ----------
+        choice_dataset : ChoiceDataset
+            Dataset to be used for fitting
+        """
+        _ = kwargs
+        _ = args
+        choices = choice_dataset.choices
+        for i in range(choice_dataset.get_n_items()):
+            self.weights.append(tf.reduce_sum(tf.cast(choices == i, tf.float32)))
+        self.weights = tf.stack(self.weights) / len(choices)
+
+    def _fit_with_lbfgs(self, choice_dataset, *args, **kwargs):
+        """Compute the choice frequency of each product and defines it as choice probabilities.
+
+        Parameters
+        ----------
+        choice_dataset : ChoiceDataset
+            Dataset to be used for fitting
+        """
         _ = kwargs
+        _ = args
         choices = choice_dataset.choices
-        for i in range(choice_dataset.get_num_items()):
+        for i in range(choice_dataset.get_n_items()):
             self.weights.append(tf.reduce_sum(tf.cast(choices == i, tf.float32)))
         self.weights = tf.stack(self.weights) / len(choices)
 

diff --git a/choice_learn/models/conditional_logit.py b/choice_learn/models/conditional_logit.py
@@ -387,16 +387,16 @@ def _build_coefficients_from_dict(self, n_items):
 
         self.coefficients = coefficients
 
-    def _store_dataset_features_names(self, dataset):
+    def _store_dataset_features_names(self, choice_dataset):
         """Register the name of the features in the dataset. For later use in utility computation.
 
         Parameters
         ----------
         dataset : ChoiceDataset
             ChoiceDataset used to fit the model.
         """
-        self._shared_features_by_choice_names = dataset.shared_features_by_choice_names
-        self._items_features_by_choice_names = dataset.items_features_by_choice_names
+        self._shared_features_by_choice_names = choice_dataset.shared_features_by_choice_names
+        self._items_features_by_choice_names = choice_dataset.items_features_by_choice_names
 
     def compute_batch_utility(
         self,
@@ -620,20 +620,20 @@ def _fit_with_lbfgs(
         self.instantiate(choice_dataset)
 
         fit = super()._fit_with_lbfgs(
-            dataset=choice_dataset,
+            choice_dataset=choice_dataset,
             sample_weight=sample_weight,
             **kwargs,
         )
         if get_report:
             self.report = self.compute_report(choice_dataset)
         return fit
 
-    def compute_report(self, dataset):
+    def compute_report(self, choice_dataset):
         """Compute a report of the estimated weights.
 
         Parameters
         ----------
-        dataset : ChoiceDataset
+        choice_dataset : ChoiceDataset
             ChoiceDataset used for the estimation of the weights that will be
             used to compute the Std Err of this estimation.
 
@@ -644,7 +644,7 @@ def compute_report(self, dataset):
         """
         import tensorflow_probability as tfp
 
-        weights_std = self.get_weights_std(dataset)
+        weights_std = self.get_weights_std(choice_dataset)
         dist = tfp.distributions.Normal(loc=0.0, scale=1.0)
 
         names = []
@@ -673,12 +673,12 @@ def compute_report(self, dataset):
             },
         )
 
-    def get_weights_std(self, dataset):
+    def get_weights_std(self, choice_dataset):
         """Approximates Std Err with Hessian matrix.
 
         Parameters
         ----------
-        dataset : ChoiceDataset
+        choice_dataset : ChoiceDataset
             ChoiceDataset used for the estimation of the weights that will be
             used to compute the Std Err of this estimation.
 
@@ -700,12 +700,12 @@ def get_weights_std(self, dataset):
                     mw.append(w[:, index : index + _w.shape[1]])
                     index += _w.shape[1]
                 model.trainable_weights = mw
-                batch = next(dataset.iter_batch(batch_size=-1))
+                batch = next(choice_dataset.iter_batch(batch_size=-1))
                 utilities = model.compute_batch_utility(*batch)
                 probabilities = tf.nn.softmax(utilities, axis=-1)
                 loss = tf.keras.losses.CategoricalCrossentropy(reduction="sum")(
                     y_pred=probabilities,
-                    y_true=tf.one_hot(dataset.choices, depth=probabilities.shape[1]),
+                    y_true=tf.one_hot(choice_dataset.choices, depth=probabilities.shape[1]),
                 )
             # Compute the Jacobian
             jacobian = tape_2.jacobian(loss, w)

diff --git a/choice_learn/models/latent_class_base_model.py b/choice_learn/models/latent_class_base_model.py
@@ -1,4 +1,5 @@
 """Base class for latent class choice models."""
+
 import numpy as np
 import tensorflow as tf
 import tqdm
@@ -201,12 +202,12 @@ def compute_batch_utility(
             utilities.append(model_utilities)
         return utilities
 
-    def fit(self, dataset, sample_weight=None, verbose=0):
+    def fit(self, choice_dataset, sample_weight=None, verbose=0):
         """Fit the model on a ChoiceDataset.
 
         Parameters
         ----------
-        dataset : ChoiceDataset
+        choice_dataset : ChoiceDataset
             Dataset to be used for coefficients estimations
         sample_weight : np.ndarray, optional
             sample weights to apply, by default None
@@ -221,15 +222,19 @@ def fit(self, dataset, sample_weight=None, verbose=0):
         if self.fit_method.lower() == "em":
             self.minf = np.log(1e-3)
             print("Expectation-Maximization estimation algorithm not well implemented yet.")
-            return self._em_fit(dataset=dataset, sample_weight=sample_weight, verbose=verbose)
+            return self._em_fit(
+                choice_dataset=choice_dataset, sample_weight=sample_weight, verbose=verbose
+            )
 
         if self.fit_method.lower() == "mle":
             if self.optimizer.lower() == "lbfgs" or self.optimizer.lower() == "l-bfgs":
                 return self._fit_with_lbfgs(
-                    dataset=dataset, sample_weight=sample_weight, verbose=verbose
+                    choice_dataset=choice_dataset, sample_weight=sample_weight, verbose=verbose
                 )
 
-            return self._fit_normal(dataset=dataset, sample_weight=sample_weight, verbose=verbose)
+            return self._fit_normal(
+                choice_dataset=choice_dataset, sample_weight=sample_weight, verbose=verbose
+            )
 
         raise ValueError(f"Fit method not implemented: {self.fit_method}")
 
@@ -278,12 +283,12 @@ def evaluate(self, choice_dataset, sample_weight=None, batch_size=-1, mode="eval
             batch_loss = tf.reduce_mean(batch_losses)
         return batch_loss
 
-    def _lbfgs_train_step(self, dataset, sample_weight=None):
+    def _lbfgs_train_step(self, choice_dataset, sample_weight=None):
         """Create a function required by tfp.optimizer.lbfgs_minimize.
 
         Parameters
         ----------
-        dataset: ChoiceDataset
+        choice_dataset: ChoiceDataset
             Dataset on which to estimate the paramters.
         sample_weight: np.ndarray, optional
             Sample weights to apply, by default None
@@ -366,7 +371,7 @@ def f(params_1d):
                 assign_new_model_parameters(params_1d)
                 # calculate the loss
                 loss_value = self.evaluate(
-                    dataset, sample_weight=sample_weight, batch_size=-1, mode="optim"
+                    choice_dataset, sample_weight=sample_weight, batch_size=-1, mode="optim"
                 )
             # calculate gradients and convert to 1D tf.Tensor
             grads = tape.gradient(loss_value, trainable_weights)
@@ -389,14 +394,14 @@ def f(params_1d):
         f.history = []
         return f
 
-    def _fit_with_lbfgs(self, dataset, sample_weight=None, verbose=0):
+    def _fit_with_lbfgs(self, choice_dataset, sample_weight=None, verbose=0):
         """Fit function for L-BFGS optimizer.
 
         Replaces the .fit method when the optimizer is set to L-BFGS.
 
         Parameters
         ----------
-        dataset : ChoiceDataset
+        choice_dataset : ChoiceDataset
             Dataset to be used for coefficients estimations
         epochs : int
             Maximum number of epochs allowed to reach minimum
@@ -415,7 +420,7 @@ def _fit_with_lbfgs(self, dataset, sample_weight=None, verbose=0):
         import tensorflow_probability as tfp
 
         epochs = self.epochs
-        func = self._lbfgs_train_step(dataset, sample_weight=sample_weight)
+        func = self._lbfgs_train_step(choice_dataset, sample_weight=sample_weight)
 
         # convert initial model parameters to a 1D tf.Tensor
         init = []
@@ -446,7 +451,7 @@ def _fit_with_lbfgs(self, dataset, sample_weight=None, verbose=0):
             print("Algorithm converged before reaching max iterations:", results[0].numpy())
         return func.history
 
-    def _gd_train_step(self, dataset, sample_weight=None):
+    def _gd_train_step(self, choice_dataset, sample_weight=None):
         pass
 
     def _nothing(self, inputs):
@@ -488,15 +493,17 @@ def _nothing(self, inputs):
         proba_final = tf.keras.layers.Concatenate(axis=2)(proba_list)
         return tf.math.reduce_sum(proba_final, axis=2, keepdims=False)
 
-    def _expectation(self, dataset):
-        predicted_probas = [model.predict_probas(dataset) for model in self.models]
+    def _expectation(self, choice_dataset):
+        predicted_probas = [model.predict_probas(choice_dataset) for model in self.models]
         if np.sum(np.isnan(predicted_probas)) > 0:
             print("Nan in probas")
         predicted_probas = [
             latent
             * tf.gather_nd(
                 params=proba,
-                indices=tf.stack([tf.range(0, len(dataset), 1), dataset.choices], axis=1),
+                indices=tf.stack(
+                    [tf.range(0, len(choice_dataset), 1), choice_dataset.choices], axis=1
+                ),
             )
             for latent, proba in zip(self.latent_logits, predicted_probas)
         ]
@@ -508,12 +515,12 @@ def _expectation(self, dataset):
 
         return predicted_probas / np.sum(predicted_probas, axis=1, keepdims=True), loss
 
-    def _maximization(self, dataset, verbose=0):
+    def _maximization(self, choice_dataset, verbose=0):
         """Maximize step.
 
         Parameters
         ----------
-        dataset : ChoiceDataset
+        choice_dataset : ChoiceDataset
             dataset to be fitted
         verbose : int, optional
             print level, for debugging, by default 0
@@ -526,19 +533,19 @@ def _maximization(self, dataset, verbose=0):
         self.models = [self.model_class(**mp) for mp in self.model_parameters]
         # M-step: MNL estimation
         for q in range(self.n_latent_classes):
-            self.models[q].fit(dataset, sample_weight=self.weights[:, q], verbose=verbose)
+            self.models[q].fit(choice_dataset, sample_weight=self.weights[:, q], verbose=verbose)
 
         # M-step: latent probability estimation
         latent_probas = np.sum(self.weights, axis=0)
 
         return latent_probas / np.sum(latent_probas)
 
-    def _em_fit(self, dataset, verbose=0):
+    def _em_fit(self, choice_dataset, verbose=0):
         """Fit with Expectation-Maximization Algorithm.
 
         Parameters
         ----------
-        dataset: ChoiceDataset
+        choice_dataset: ChoiceDataset
             Dataset to be used for coefficients estimations
         verbose : int, optional
             print level, for debugging, by default 0
@@ -556,10 +563,12 @@ def _em_fit(self, dataset, verbose=0):
         # Initialization
         for model in self.models:
             # model.instantiate()
-            model.fit(dataset, sample_weight=np.random.rand(len(dataset)), verbose=verbose)
+            model.fit(
+                choice_dataset, sample_weight=np.random.rand(len(choice_dataset)), verbose=verbose
+            )
         for i in tqdm.trange(self.epochs):
-            self.weights, loss = self._expectation(dataset)
-            self.latent_logits = self._maximization(dataset, verbose=verbose)
+            self.weights, loss = self._expectation(choice_dataset)
+            self.latent_logits = self._maximization(choice_dataset, verbose=verbose)
             hist_logits.append(self.latent_logits)
             hist_loss.append(loss)
             if np.sum(np.isnan(self.latent_logits)) > 0: