diff --git a/README.md b/README.md
index 005b18f6..da2f585e 100644
--- a/README.md
+++ b/README.md
@@ -45,6 +45,10 @@ If you are new to choice modelling, you can check this [resource](https://www.pu
- Ready-To-Use datasets:
- [SwissMetro](./choice_learn/datasets/data/swissmetro.csv.gz) from Bierlaire et al. (2001) [[2]](#citation)
- [ModeCanada](./choice_learn/datasets/data/ModeCanada.csv.gz) from Koppelman et al. (1993) [[3]](#citation)
+ - The Train dataset from Ben Akiva et al. (1993) [5](#citation)
+ - The Heating & Electricity datasets from Kenneth Train described [here](https://rdrr.io/cran/mlogit/man/Electricity.html) and [here](https://rdrr.io/cran/mlogit/man/Heating.html)
+ - The TaFeng dataset from [Kaggle](https://www.kaggle.com/datasets/chiranjivdas09/ta-feng-grocery-dataset)
+ -
### Models
- Ready-to-use models:
@@ -91,10 +95,12 @@ Choice-Learn requires the following:
- Python (>=3.8)
- NumPy (>=1.24)
- pandas (>=1.5)
+
For modelling you need:
- TensorFlow (>=2.13)
-Finally, an optional requirement used for report and LBFG-s use is:
-- tensorflow_probability (>=0.20.1)
+
+Finally, an optional requirement used for report and LBFG-S optimization is:
+- TensorFlow Probability (>=0.20.1)
## Usage
```python
@@ -151,6 +157,7 @@ A detailed documentation of this project is available [here](https://artefactory
[2][The Acceptance of Model Innovation: The Case of Swissmetro](https://www.researchgate.net/publication/37456549_The_acceptance_of_modal_innovation_The_case_of_Swissmetro), Bierlaire, M.; Axhausen, K., W.; Abay, G. (2001)\
[3][Applications and Interpretation of Nested Logit Models of Intercity Mode Choice](https://trid.trb.org/view/385097), Forinash, C., V.; Koppelman, F., S. (1993)\
[4][The Demand for Local Telephone Service: A Fully Discrete Model of Residential Calling Patterns and Service Choices](https://www.jstor.org/stable/2555538), Train K., E.; McFadden, D., L.; Moshe, B. (1987)\
+[5] [Estimation of Travel Choice Models with Randomly Distributed Values of Time](https://ideas.repec.org/p/fth/lavaen/9303.html), Ben-Akiva M; Bolduc D; Bradley M(1993)
### Code and Repositories
- [1][RUMnet](https://github.com/antoinedesir/rumnet)
diff --git a/choice_learn/data/choice_dataset.py b/choice_learn/data/choice_dataset.py
index 56448da1..9984a215 100644
--- a/choice_learn/data/choice_dataset.py
+++ b/choice_learn/data/choice_dataset.py
@@ -614,7 +614,7 @@ def __len__(self):
"""
return len(self.choices)
- def get_num_items(self):
+ def get_n_items(self):
"""Method to access the total number of different items.
Returns:
@@ -624,7 +624,7 @@ def get_num_items(self):
"""
return self.base_num_items
- def get_num_choices(self):
+ def get_n_choices(self):
"""Method to access the total number of different choices.
Redundant with __len__ method.
@@ -689,7 +689,7 @@ def _contexts_items_features_df_to_np(
sess_df.columns = sess_df.loc[items_id_column]
if features is not None:
contexts_items_features.append(sess_df[items_index].loc[features].T.values)
- contexts_items_availabilities.append(np.ones(len(items_index)))
+ contexts_items_availabilities.append(np.ones(len(items_index)).astype("float32"))
else:
sess_feats = []
sess_av = []
@@ -806,9 +806,15 @@ def from_single_wide_df(
else:
contexts_items_availabilities = None
- choices = df[choices_column]
+ choices = df[choices_column].to_numpy()
+ print("choice", choices)
if choice_mode == "items_id":
+ if items_id is None:
+ raise ValueError("items_id must be given to use choice_mode 'items_id'")
+ items_id = np.array(items_id)
choices = np.squeeze([np.where(items_id == c)[0] for c in choices])
+ if choices.shape[0] == 0:
+ raise ValueError("No choice found in the items_id list")
return ChoiceDataset(
fixed_items_features=fixed_items_features,
@@ -940,7 +946,7 @@ def summary(self):
print("%=====================================================================%")
print("%%% Summary of the dataset:")
print("%=====================================================================%")
- print("Number of items:", self.get_num_items())
+ print("Number of items:", self.get_n_items())
print(
"Number of choices:",
len(self),
@@ -1038,7 +1044,9 @@ def get_choices_batch(self, choices_indexes, features=None):
)
if self.contexts_items_availabilities is None:
- contexts_items_availabilities = np.ones((len(choices_indexes), self.base_num_items))
+ contexts_items_availabilities = np.ones(
+ (len(choices_indexes), self.base_num_items)
+ ).astype("float32")
else:
contexts_items_availabilities = self.contexts_items_availabilities[choices_indexes]
# .astype(self._return_types[3])
@@ -1179,7 +1187,7 @@ def get_choices_batch(self, choices_indexes, features=None):
)
if self.contexts_items_availabilities is None:
- contexts_items_availabilities = np.ones((self.base_num_items))
+ contexts_items_availabilities = np.ones((self.base_num_items)).astype("float32")
else:
contexts_items_availabilities = self.contexts_items_availabilities[choices_indexes]
@@ -1299,41 +1307,67 @@ def __getitem__(self, choices_indexes):
elif isinstance(choices_indexes, slice):
return self.__getitem__(list(range(*choices_indexes.indices(len(self.choices)))))
- if self.fixed_items_features[0] is None:
- fixed_items_features = None
- else:
+ try:
+ if self.fixed_items_features[0] is None:
+ fixed_items_features = None
+ else:
+ fixed_items_features = self.fixed_items_features
+ except TypeError:
fixed_items_features = self.fixed_items_features
- if self.contexts_features[0] is None:
+
+ try:
+ if self.contexts_features[0] is None:
+ contexts_features = None
+ else:
+ contexts_features = tuple(
+ self.contexts_features[i][choices_indexes]
+ for i in range(len(self.contexts_features))
+ )
+ except TypeError:
contexts_features = None
- else:
- contexts_features = tuple(
- self.contexts_features[i][choices_indexes]
- for i in range(len(self.contexts_features))
- )
- if self.contexts_items_features[0] is None:
+
+ try:
+ if self.contexts_items_features[0] is None:
+ contexts_items_features = None
+ else:
+ contexts_items_features = tuple(
+ self.contexts_items_features[i][choices_indexes]
+ for i in range(len(self.contexts_items_features))
+ )
+ except TypeError:
contexts_items_features = None
- else:
- contexts_items_features = tuple(
- self.contexts_items_features[i][choices_indexes]
- for i in range(len(self.contexts_items_features))
- )
- if self.fixed_items_features_names[0] is None:
+
+ try:
+ if self.fixed_items_features_names[0] is None:
+ fixed_items_features_names = None
+ else:
+ fixed_items_features_names = self.fixed_items_features_names
+ except TypeError:
fixed_items_features_names = None
- else:
- fixed_items_features_names = self.fixed_items_features_names
- if self.contexts_features_names[0] is None:
+ try:
+ if self.contexts_features_names[0] is None:
+ contexts_features_names = None
+ else:
+ contexts_features_names = self.contexts_features_names
+ except TypeError:
contexts_features_names = None
- else:
- contexts_features_names = self.contexts_features_names
- if self.contexts_items_features_names[0] is None:
+ try:
+ if self.contexts_items_features_names[0] is None:
+ contexts_items_features_names = None
+ else:
+ contexts_items_features_names = self.contexts_items_features_names
+ except TypeError:
contexts_items_features_names = None
- else:
- contexts_items_features_names = self.contexts_items_features_names
+
+ try:
+ contexts_items_availabilities = self.contexts_items_availabilities[choices_indexes]
+ except TypeError:
+ contexts_items_availabilities = None
return ChoiceDataset(
fixed_items_features=fixed_items_features,
contexts_features=contexts_features,
contexts_items_features=contexts_items_features,
- contexts_items_availabilities=self.contexts_items_availabilities[choices_indexes],
+ contexts_items_availabilities=contexts_items_availabilities,
choices=[self.choices[i] for i in choices_indexes],
fixed_items_features_names=fixed_items_features_names,
contexts_features_names=contexts_features_names,
@@ -1391,8 +1425,53 @@ def filter(self, bool_list):
Parameters
----------
bool_list : list of boolean
- list of booleans of length self.get_num_sessions() to filter sessions.
+ list of booleans of length self.get_n_contexts() to filter contexts.
True to keep, False to discard.
"""
indexes = [i for i, keep in enumerate(bool_list) if keep]
return self[indexes]
+
+ def get_n_fixed_items_features(self):
+ """Method to access the number of fixed items features.
+
+ Returns:
+ -------
+ int
+ number of fixed items features
+ """
+ if self.fixed_items_features is not None:
+ n_features = 0
+ for fixed_features in self.fixed_items_features:
+ n_features += fixed_features.shape[1]
+ return n_features
+ return 0
+
+ def get_n_contexts_features(self):
+ """Method to access the number of contexts features.
+
+ Returns:
+ -------
+ int
+ number of fixed items features
+ """
+ if self.contexts_features is not None:
+ n_features = 0
+ for context_features in self.contexts_features:
+ n_features += context_features.shape[1]
+ return n_features
+ return 0
+
+ def get_n_contexts_items_features(self):
+ """Method to access the number of context items features.
+
+ Returns:
+ -------
+ int
+ number of fixed items features
+ """
+ if self.contexts_items_features is not None:
+ n_features = 0
+ for contexts_items_features in self.contexts_items_features:
+ n_features += contexts_items_features.shape[2]
+ return n_features
+ return 0
diff --git a/choice_learn/data/indexer.py b/choice_learn/data/indexer.py
index ddba5383..29ed0e2d 100644
--- a/choice_learn/data/indexer.py
+++ b/choice_learn/data/indexer.py
@@ -295,7 +295,7 @@ def __getitem__(self, choices_indexes):
if self.choice_dataset.contexts_items_availabilities is None:
contexts_items_availabilities = np.ones(
(len(choices_indexes), self.choice_dataset.base_num_items)
- )
+ ).astype("float32")
else:
if hasattr(self.choice_dataset.contexts_items_availabilities, "batch"):
contexts_items_availabilities = (
@@ -440,7 +440,9 @@ def __getitem__(self, choices_indexes):
choice = self.choice_dataset.choices[choices_indexes]
if self.choice_dataset.contexts_items_availabilities is None:
- contexts_items_availabilities = np.ones((self.choice_dataset.base_num_items))
+ contexts_items_availabilities = np.ones(
+ (self.choice_dataset.base_num_items)
+ ).astype("float32")
else:
contexts_items_availabilities = self.choice_dataset.contexts_items_availabilities[
choices_indexes
diff --git a/choice_learn/datasets/__init__.py b/choice_learn/datasets/__init__.py
index a16bf199..8a7ec3f4 100644
--- a/choice_learn/datasets/__init__.py
+++ b/choice_learn/datasets/__init__.py
@@ -1,8 +1,5 @@
"""Init file for datasets module."""
-from .base import load_modecanada, load_swissmetro
+from .base import load_electricity, load_heating, load_modecanada, load_swissmetro
-__all__ = [
- "load_modecanada",
- "load_swissmetro",
-]
+__all__ = ["load_modecanada", "load_swissmetro", "load_electricity", "load_heating"]
diff --git a/choice_learn/datasets/base.py b/choice_learn/datasets/base.py
index 92b65eab..6c977f55 100644
--- a/choice_learn/datasets/base.py
+++ b/choice_learn/datasets/base.py
@@ -367,3 +367,163 @@ def load_modecanada(
choices_column=choice_column,
choice_mode="one_zero",
)
+
+
+def load_heating(
+ as_frame=False,
+ to_wide=False,
+):
+ """Load and return the Heating dataset from Kenneth Train.
+
+ Parameters
+ ----------
+ as_frame : bool, optional
+ Whether to return the dataset as pd.DataFrame. If not, returned as ChoiceDataset,
+ by default False.
+ return_desc : bool, optional
+ Whether to return the description, by default False.
+ to_wide : bool, optional
+ Whether to return the dataset in wide format,
+ by default False (an thus retuned in long format).
+
+ Returns:
+ --------
+ ChoiceDataset
+ Loaded Heating dataset
+ """
+ _ = to_wide
+ data_file_name = "heating_data.csv.gz"
+ names, data = load_gzip(data_file_name)
+
+ heating_df = pd.read_csv(resources.files(DATA_MODULE) / "heating_data.csv.gz")
+
+ if as_frame:
+ return heating_df
+
+ contexts_features = ["income", "agehed", "rooms", "region"]
+ choice = ["depvar"]
+ contexts_items_features = ["ic.", "oc."]
+ items = ["gc", "gr", "ec", "er", "hp"]
+
+ choices = np.array([items.index(val) for val in heating_df[choice].to_numpy().ravel()])
+ contexts = heating_df[contexts_features].to_numpy()
+ contexts_items = np.stack(
+ [
+ heating_df[[feat + item for feat in contexts_items_features]].to_numpy()
+ for item in items
+ ],
+ axis=1,
+ )
+ return ChoiceDataset(
+ contexts_features=contexts, contexts_items_features=contexts_items, choices=choices
+ )
+
+
+def load_electricity(
+ as_frame=False,
+ to_wide=False,
+):
+ """Load and return the Electricity dataset from Kenneth Train.
+
+ Parameters
+ ----------
+ as_frame : bool, optional
+ Whether to return the dataset as pd.DataFrame. If not, returned as ChoiceDataset,
+ by default False.
+ to_wide : bool, optional
+ Whether to return the dataset in wide format,
+ by default False (an thus retuned in long format).
+
+ Returns:
+ --------
+ ChoiceDataset
+ Loaded Electricity dataset
+ """
+ _ = to_wide
+ data_file_name = "electricity.csv.gz"
+ names, data = load_gzip(data_file_name)
+
+ elec_df = pd.read_csv(resources.files(DATA_MODULE) / data_file_name)
+ elec_df.choice = elec_df.choice.astype(int)
+ elec_df[["pf", "cl", "loc", "wk", "tod", "seas"]] = elec_df[
+ ["pf", "cl", "loc", "wk", "tod", "seas"]
+ ].astype(float)
+
+ if as_frame:
+ return elec_df
+
+ return ChoiceDataset.from_single_long_df(
+ df=elec_df,
+ contexts_items_features_columns=["pf", "cl", "loc", "wk", "tod", "seas"],
+ items_id_column="alt",
+ contexts_id_column="chid",
+ choice_mode="one_zero",
+ )
+
+
+def load_train(
+ as_frame=False,
+ to_wide=False,
+ return_desc=False,
+):
+ """Load and return the Train dataset from Koppleman et al. (1993).
+
+ Parameters
+ ----------
+ as_frame : bool, optional
+ Whether to return the dataset as pd.DataFrame. If not, returned as ChoiceDataset,
+ by default False.
+ to_wide : bool, optional
+ Whether to return the dataset in wide format,
+ by default False (an thus retuned in long format).
+ return_desc : bool, optional
+ Whether to return the description, by default False.
+
+ Returns:
+ --------
+ ChoiceDataset
+ Loaded Electricity dataset
+ """
+ desc = "A sample of 235 Dutchindividuals facing 2929 choice situations."
+ desc += """Ben-Akiva M, Bolduc D, Bradley M(1993).
+ “Estimation of Travel Choice Models with Randomly Distributed Values of Time.
+ ”Papers 9303, Laval-Recherche en Energie. https://ideas.repec.org/p/fth/lavaen/9303.html."""
+ _ = to_wide
+ data_file_name = "train_data.csv.gz"
+ names, data = load_gzip(data_file_name)
+
+ train_df = pd.read_csv(resources.files(DATA_MODULE) / data_file_name)
+
+ if return_desc:
+ return desc
+
+ if as_frame:
+ return train_df
+ train_df["choice"] = train_df.apply(lambda row: row.choice[-1], axis=1)
+ train_df = train_df.rename(
+ columns={
+ "price1": "1_price",
+ "time1": "1_time",
+ "change1": "1_change",
+ "comfort1": "1_comfort",
+ }
+ )
+ train_df = train_df.rename(
+ columns={
+ "price2": "2_price",
+ "time2": "2_time",
+ "change2": "2_change",
+ "comfort2": "2_comfort",
+ }
+ )
+ print(train_df.head())
+ return ChoiceDataset.from_single_wide_df(
+ df=train_df,
+ items_id=["1", "2"],
+ fixed_items_suffixes=None,
+ contexts_features_columns=["id"],
+ contexts_items_features_suffixes=["price", "time", "change", "comfort"],
+ contexts_items_availabilities_suffix=None,
+ choices_column="choice",
+ choice_mode="items_id",
+ )
diff --git a/choice_learn/datasets/data/electricity.csv.gz b/choice_learn/datasets/data/electricity.csv.gz
new file mode 100644
index 00000000..da0a7ee1
Binary files /dev/null and b/choice_learn/datasets/data/electricity.csv.gz differ
diff --git a/choice_learn/datasets/data/heating_data.csv.gz b/choice_learn/datasets/data/heating_data.csv.gz
new file mode 100644
index 00000000..5fd53831
Binary files /dev/null and b/choice_learn/datasets/data/heating_data.csv.gz differ
diff --git a/choice_learn/datasets/data/train_data.csv.gz b/choice_learn/datasets/data/train_data.csv.gz
new file mode 100644
index 00000000..540f7d32
Binary files /dev/null and b/choice_learn/datasets/data/train_data.csv.gz differ
diff --git a/choice_learn/models/base_model.py b/choice_learn/models/base_model.py
index 4808324e..c25162cc 100644
--- a/choice_learn/models/base_model.py
+++ b/choice_learn/models/base_model.py
@@ -20,6 +20,7 @@ def __init__(
label_smoothing=0.0,
normalize_non_buy=False,
optimizer="Adam",
+ tolerance=1e-8,
callbacks=None,
lr=0.001,
epochs=1,
@@ -38,6 +39,15 @@ def __init__(
normalization,by default True
callbacks : list of tf.kera callbacks, optional
List of callbacks to add to model.fit, by default None and only add History
+ optimizer : str, optional
+ Name of the tf.keras.optimizers to be used, by default "Adam"
+ tolerance : float, optional
+ Tolerance for the L-BFGS optimizer if applied, by default 1e-8
+ lr: float, optional
+ Learning rate for the optimizer if applied, by default 0.001
+ epochs: int, optional
+ (Max) Number of epochs to train the model, by default 1
+ batch_size: int, optional
"""
self.is_fitted = False
self.normalize_non_buy = normalize_non_buy
@@ -69,6 +79,7 @@ def __init__(
self.epochs = epochs
self.batch_size = batch_size
+ self.tolerance = tolerance
@abstractmethod
def compute_batch_utility(
@@ -346,7 +357,7 @@ def fit(
contexts_items_batch,
availabilities_batch,
choices_batch,
- )[0]
+ )[0]["optimized_loss"]
)
val_logs["val_loss"].append(test_losses[-1])
temps_logs = {k: tf.reduce_mean(v) for k, v in val_logs.items()}
@@ -432,11 +443,18 @@ def batch_predict(
# Compute loss from probabilities & actual choices
# batch_loss = self.loss(probabilities, c_batch, sample_weight=sample_weight)
- batch_loss = self.loss(
- y_pred=probabilities,
- y_true=tf.one_hot(choices, depth=probabilities.shape[1]),
- sample_weight=sample_weight,
- )
+ batch_loss = {
+ "optimized_loss": self.loss(
+ y_pred=probabilities,
+ y_true=tf.one_hot(choices, depth=probabilities.shape[1]),
+ sample_weight=sample_weight,
+ ),
+ "NegativeLogLikelihood": tf.keras.losses.CategoricalCrossentropy()(
+ y_pred=probabilities,
+ y_true=tf.one_hot(choices, depth=probabilities.shape[1]),
+ sample_weight=sample_weight,
+ ),
+ }
return batch_loss, probabilities
def save_model(self, path):
@@ -524,7 +542,7 @@ def predict_probas(self, choice_dataset, batch_size=-1):
return tf.concat(stacked_probabilities, axis=0)
- def evaluate(self, choice_dataset, batch_size=-1):
+ def evaluate(self, choice_dataset, sample_weight=None, batch_size=-1, mode="eval"):
"""Evaluates the model for each context and each product of a ChoiceDataset.
Predicts the probabilities according to the model and computes the Negative-Log-Likelihood
@@ -554,8 +572,12 @@ def evaluate(self, choice_dataset, batch_size=-1):
contexts_items_features=contexts_items_features,
contexts_items_availabilities=contexts_items_availabilities,
choices=choices,
+ sample_weight=sample_weight,
)
- batch_losses.append(loss)
+ if mode == "eval":
+ batch_losses.append(loss["NegativeLogLikelihood"])
+ elif mode == "optim":
+ batch_losses.append(loss["optimized_loss"])
if batch_size != -1:
last_batch_size = contexts_items_availabilities.shape[0]
coefficients = tf.concat(
@@ -567,13 +589,15 @@ def evaluate(self, choice_dataset, batch_size=-1):
batch_loss = tf.reduce_mean(batch_losses)
return batch_loss
- def _lbfgs_train_step(self, dataset):
+ def _lbfgs_train_step(self, dataset, sample_weight=None):
"""A factory to create a function required by tfp.optimizer.lbfgs_minimize.
Parameters
----------
dataset: ChoiceDataset
Dataset on which to estimate the paramters.
+ sample_weight: np.ndarray, optional
+ Sample weights to apply, by default None
Returns:
--------
@@ -636,7 +660,9 @@ def f(params_1d):
# update the parameters in the model
assign_new_model_parameters(params_1d)
# calculate the loss
- loss_value = self.evaluate(dataset, batch_size=-1)
+ loss_value = self.evaluate(
+ dataset, sample_weight=sample_weight, batch_size=-1, mode="optim"
+ )
# calculate gradients and convert to 1D tf.Tensor
grads = tape.gradient(loss_value, self.weights)
@@ -659,7 +685,7 @@ def f(params_1d):
f.history = []
return f
- def _fit_with_lbfgs(self, dataset, epochs=None, tolerance=1e-8):
+ def _fit_with_lbfgs(self, dataset, epochs=None, sample_weight=None, verbose=0):
"""Fit function for L-BFGS optimizer.
Replaces the .fit method when the optimizer is set to L-BFGS.
@@ -668,10 +694,12 @@ def _fit_with_lbfgs(self, dataset, epochs=None, tolerance=1e-8):
----------
dataset : ChoiceDataset
Dataset to be used for coefficients estimations
- n_epochs : int
+ epochs : int
Maximum number of epochs allowed to reach minimum
- tolerance : float, optional
- Maximum tolerance accepted, by default 1e-8
+ sample_weight : np.ndarray, optional
+ Sample weights to apply, by default None
+ verbose : int, optional
+ print level, for debugging, by default 0
Returns:
--------
@@ -684,7 +712,7 @@ def _fit_with_lbfgs(self, dataset, epochs=None, tolerance=1e-8):
if epochs is None:
epochs = self.epochs
- func = self._lbfgs_train_step(dataset)
+ func = self._lbfgs_train_step(dataset, sample_weight=sample_weight)
# convert initial model parameters to a 1D tf.Tensor
init_params = tf.dynamic_stitch(func.idx, self.weights)
@@ -694,7 +722,7 @@ def _fit_with_lbfgs(self, dataset, epochs=None, tolerance=1e-8):
value_and_gradients_function=func,
initial_position=init_params,
max_iterations=epochs,
- tolerance=tolerance,
+ tolerance=self.tolerance,
f_absolute_tolerance=-1,
f_relative_tolerance=-1,
)
@@ -702,29 +730,97 @@ def _fit_with_lbfgs(self, dataset, epochs=None, tolerance=1e-8):
# after training, the final optimized parameters are still in results.position
# so we have to manually put them back to the model
func.assign_new_model_parameters(results.position)
- print("L-BFGS Opimization finished:")
- print("---------------------------------------------------------------")
- print("Number of iterations:", results[2].numpy())
- print("Algorithm converged before reaching max iterations:", results[0].numpy())
+ if verbose > 0:
+ print("L-BFGS Opimization finished:")
+ print("---------------------------------------------------------------")
+ print("Number of iterations:", results[2].numpy())
+ print("Algorithm converged before reaching max iterations:", results[0].numpy())
return func.history
-class RandomChoiceModel(ChoiceModel):
- """Dumb model that randomly attributes utilities to products."""
+class BaseLatentClassModel(object): # TODO: should inherit ChoiceModel ?
+ """Base Class to work with Mixtures of models."""
- def __init__(self, **kwargs):
- """Initialization of the model."""
- super().__init__(**kwargs)
+ def __init__(
+ self,
+ n_latent_classes,
+ model_class,
+ model_parameters,
+ fit_method,
+ epochs,
+ optimizer=None,
+ add_exit_choice=False,
+ tolerance=1e-6,
+ lr=0.001,
+ ):
+ """Instantiation of the model mixture.
- def compute_batch_utility(
+ Parameters
+ ----------
+ n_latent_classes : int
+ Number of latent classes
+ model_class : BaseModel
+ class of models to get a mixture of
+ model_parameters : dict
+ hyper-parameters of the models
+ fit_method : str
+ Method to estimate the parameters: "EM", "MLE".
+ epochs : int
+ Number of epochs to train the model.
+ optimizer: str, optional
+ Name of the tf.keras.optimizers to be used if one is used, by default None
+ add_exit_choice : bool, optional
+ Whether or not to add an exit choice, by default False
+ tolerance: float, optional
+ Tolerance for the L-BFGS optimizer if applied, by default 1e-6
+ lr: float, optional
+ Learning rate for the optimizer if applied, by default 0.001
+ """
+ self.n_latent_classes = n_latent_classes
+ if isinstance(model_parameters, list):
+ if not len(model_parameters) == n_latent_classes:
+ raise ValueError(
+ """If you specify a list of hyper-parameters, it means that you want to use\
+ different hyper-parameters for each latent class. In this case, the length\
+ of the list must be equal to the number of latent classes."""
+ )
+ self.model_parameters = model_parameters
+ else:
+ self.model_parameters = [model_parameters] * n_latent_classes
+ self.model_class = model_class
+ self.fit_method = fit_method
+
+ self.epochs = epochs
+ self.add_exit_choice = add_exit_choice
+ self.tolerance = tolerance
+ self.optimizer = optimizer
+ self.lr = lr
+
+ self.loss = tf_ops.CustomCategoricalCrossEntropy(from_logits=False, label_smoothing=0)
+ self.instantiated = False
+
+ def instantiate(self, **kwargs):
+ """Instantiation."""
+ init_logit = tf.Variable(
+ tf.random_normal_initializer(0.0, 0.02, seed=42)(shape=(self.n_latent_classes - 1,)),
+ name="Latent-Logits",
+ )
+ self.latent_logits = init_logit
+ self.models = [self.model_class(**mp) for mp in self.model_parameters]
+ for model in self.models:
+ model.instantiate(**kwargs)
+
+ # @tf.function
+ def batch_predict(
self,
fixed_items_features,
contexts_features,
contexts_items_features,
contexts_items_availabilities,
choices,
+ sample_weight=None,
):
- """Computes the random utility for each product of each context.
+ """Function that represents one prediction (Probas + Loss) for one batch of a ChoiceDataset.
Parameters
----------
@@ -744,42 +840,58 @@ def compute_batch_utility(
choices_batch : np.ndarray
Choices
Shape must be (n_contexts, )
+ sample_weight : np.ndarray, optional
+ List samples weights to apply during the gradient descent to the batch elements,
+ by default None
Returns:
--------
- tf.Tensor
- (n_contexts, n_items) matrix of random utilities
+ tf.Tensor (1, )
+ Value of NegativeLogLikelihood loss for the batch
+ tf.Tensor (batch_size, n_items)
+ Probabilities for each product to be chosen for each context
"""
- # In order to avoid unused arguments warnings
- _ = fixed_items_features, contexts_features, contexts_items_availabilities, choices
- return np.squeeze(
- np.random.uniform(shape=(contexts_items_features.shape), minval=0, maxval=1)
+ # Compute utilities from features
+ utilities = self.compute_batch_utility(
+ fixed_items_features,
+ contexts_features,
+ contexts_items_features,
+ contexts_items_availabilities,
+ choices,
)
- def fit(**kwargs):
- """Make sure that nothing happens during .fit."""
- _ = kwargs
- return {}
-
-
-class DistribMimickingModel(ChoiceModel):
- """Dumb class model that mimicks the probabilities.
-
- It stores the encountered in the train datasets and always returns them
- """
-
- def __init__(self, **kwargs):
- """Initialization of the model."""
- super().__init__(**kwargs)
- self.weights = []
+ latent_probabilities = tf.concat(
+ [[tf.constant(1.0)], tf.math.exp(self.latent_logits)], axis=0
+ )
+ latent_probabilities = latent_probabilities / tf.reduce_sum(latent_probabilities)
+ # Compute probabilities from utilities & availabilties
+ probabilities = []
+ for i, class_utilities in enumerate(utilities):
+ class_probabilities = tf_ops.softmax_with_availabilities(
+ contexts_items_logits=class_utilities,
+ contexts_items_availabilities=contexts_items_availabilities,
+ normalize_exit=self.add_exit_choice,
+ axis=-1,
+ )
+ probabilities.append(class_probabilities * latent_probabilities[i])
+ # Summing over the latent classes
+ probabilities = tf.reduce_sum(probabilities, axis=0)
- def fit(self, choice_dataset, **kwargs):
- """Computes the choice frequency of each product and defines it as choice probabilities."""
- _ = kwargs
- choices = choice_dataset.choices
- for i in range(choice_dataset.get_num_items()):
- self.weights.append(tf.reduce_sum(tf.cast(choices == i, tf.float32)))
- self.weights = tf.stack(self.weights) / len(choices)
+ # Compute loss from probabilities & actual choices
+ # batch_loss = self.loss(probabilities, c_batch, sample_weight=sample_weight)
+ batch_loss = {
+ "optimized_loss": self.loss(
+ y_pred=probabilities,
+ y_true=tf.one_hot(choices, depth=probabilities.shape[1]),
+ sample_weight=sample_weight,
+ ),
+ "NegativeLogLikelihood": tf.keras.losses.CategoricalCrossentropy()(
+ y_pred=probabilities,
+ y_true=tf.one_hot(choices, depth=probabilities.shape[1]),
+ sample_weight=sample_weight,
+ ),
+ }
+ return batch_loss, probabilities
def compute_batch_utility(
self,
@@ -789,7 +901,9 @@ def compute_batch_utility(
contexts_items_availabilities,
choices,
):
- """Returns utility that is fixed. U = log(P).
+ """Latent class computation of utility.
+
+ It computes the utility for each of the latent models and stores them in a list.
Parameters
----------
@@ -810,19 +924,425 @@ def compute_batch_utility(
Choices
Shape must be (n_contexts, )
+ Returns:
+ --------
+ list of np.ndarray
+ List of:
+ Utility of each product for each context.
+ Shape must be (n_contexts, n_items)
+ for each of the latent models.
+ """
+ utilities = []
+ # Iterates over latent models
+ for model in self.models:
+ model_utilities = model.compute_batch_utility(
+ fixed_items_features=fixed_items_features,
+ contexts_features=contexts_features,
+ contexts_items_features=contexts_items_features,
+ contexts_items_availabilities=contexts_items_availabilities,
+ choices=choices,
+ )
+ utilities.append(model_utilities)
+ return utilities
+
+ def fit(self, dataset, sample_weight=None, verbose=0):
+ """Fit the model on a ChoiceDataset.
+
+ Parameters
+ ----------
+ dataset : ChoiceDataset
+ Dataset to be used for coefficients estimations
+ sample_weight : np.ndarray, optional
+ sample weights to apply, by default None
+ verbose : int, optional
+ print level, for debugging, by default 0
+
+ Returns:
+ --------
+ dict
+ Fit history
+ """
+ if self.fit_method.lower() == "em":
+ self.minf = np.log(1e-3)
+ print("Expectation-Maximization estimation algorithm not well implemented yet.")
+ return self._em_fit(dataset=dataset, sample_weight=sample_weight, verbose=verbose)
+
+ if self.fit_method.lower() == "mle":
+ if self.optimizer.lower() == "lbfgs" or self.optimizer.lower() == "l-bfgs":
+ return self._fit_with_lbfgs(
+ dataset=dataset, sample_weight=sample_weight, verbose=verbose
+ )
+
+ return self._fit_normal(dataset=dataset, sample_weight=sample_weight, verbose=verbose)
+
+ raise ValueError(f"Fit method not implemented: {self.fit_method}")
+
+ def evaluate(self, choice_dataset, sample_weight=None, batch_size=-1, mode="eval"):
+ """Evaluates the model for each context and each product of a ChoiceDataset.
+
+ Predicts the probabilities according to the model and computes the Negative-Log-Likelihood
+ loss from the actual choices.
+
+ Parameters
+ ----------
+ choice_dataset : ChoiceDataset
+ Dataset on which to apply to prediction
+
Returns:
--------
np.ndarray (n_contexts, n_items)
- Utilities
+ Choice probabilties for each context and each product
+ """
+ batch_losses = []
+ for (
+ fixed_items_features,
+ contexts_features,
+ contexts_items_features,
+ contexts_items_availabilities,
+ choices,
+ ) in choice_dataset.iter_batch(batch_size=batch_size):
+ loss, _ = self.batch_predict(
+ fixed_items_features=fixed_items_features,
+ contexts_features=contexts_features,
+ contexts_items_features=contexts_items_features,
+ contexts_items_availabilities=contexts_items_availabilities,
+ choices=choices,
+ sample_weight=sample_weight,
+ )
+ if mode == "eval":
+ batch_losses.append(loss["NegativeLogLikelihood"])
+ elif mode == "optim":
+ batch_losses.append(loss["optimized_loss"])
+ if batch_size != -1:
+ last_batch_size = contexts_items_availabilities.shape[0]
+ coefficients = tf.concat(
+ [tf.ones(len(batch_losses) - 1) * batch_size, [last_batch_size]], axis=0
+ )
+ batch_losses = tf.multiply(batch_losses, coefficients)
+ batch_loss = tf.reduce_sum(batch_losses) / len(choice_dataset)
+ else:
+ batch_loss = tf.reduce_mean(batch_losses)
+ return batch_loss
+
+ def _lbfgs_train_step(self, dataset, sample_weight=None):
+ """A factory to create a function required by tfp.optimizer.lbfgs_minimize.
+
+ Parameters
+ ----------
+ dataset: ChoiceDataset
+ Dataset on which to estimate the paramters.
+ sample_weight: np.ndarray, optional
+ Sample weights to apply, by default None
+
+ Returns:
+ --------
+ function
+ with the signature:
+ loss_value, gradients = f(model_parameters).
+ """
+ # obtain the shapes of all trainable parameters in the model
+ weights = []
+ w_to_model = []
+ w_to_model_indexes = []
+ for i, model in enumerate(self.models):
+ for j, w in enumerate(model.weights):
+ weights.append(w)
+ w_to_model.append(i)
+ w_to_model_indexes.append(j)
+ weights.append(self.latent_logits)
+ w_to_model.append(-1)
+ w_to_model_indexes.append(-1)
+ shapes = tf.shape_n(weights)
+ n_tensors = len(shapes)
+
+ # we'll use tf.dynamic_stitch and tf.dynamic_partition later, so we need to
+ # prepare required information first
+ count = 0
+ idx = [] # stitch indices
+ part = [] # partition indices
+
+ for i, shape in enumerate(shapes):
+ n = np.product(shape)
+ idx.append(tf.reshape(tf.range(count, count + n, dtype=tf.int32), shape))
+ part.extend([i] * n)
+ count += n
+
+ part = tf.constant(part)
+
+ @tf.function
+ def assign_new_model_parameters(params_1d):
+ """A function updating the model's parameters with a 1D tf.Tensor.
- Raises:
- -------
- ValueError
- If the model has not been fitted cannot evaluate the utility
+ Pararmeters
+ -----------
+ params_1d: tf.Tensor
+ a 1D tf.Tensor representing the model's trainable parameters.
+ """
+ params = tf.dynamic_partition(params_1d, part, n_tensors)
+ for i, (shape, param) in enumerate(zip(shapes, params)):
+ if w_to_model[i] != -1:
+ self.models[w_to_model[i]].weights[w_to_model_indexes[i]].assign(
+ tf.reshape(param, shape)
+ )
+ else:
+ self.latent_logits.assign(tf.reshape(param, shape))
+
+ # now create a function that will be returned by this factory
+ @tf.function
+ def f(params_1d):
+ """A function that can be used by tfp.optimizer.lbfgs_minimize.
+
+ This function is created by function_factory.
+
+ Parameters
+ ----------
+ params_1d: tf.Tensor
+ a 1D tf.Tensor.
+
+ Returns:
+ --------
+ tf.Tensor
+ A scalar loss and the gradients w.r.t. the `params_1d`.
+ tf.Tensor
+ A 1D tf.Tensor representing the gradients w.r.t. the `params_1d`.
+ """
+ # use GradientTape so that we can calculate the gradient of loss w.r.t. parameters
+ with tf.GradientTape() as tape:
+ # update the parameters in the model
+ assign_new_model_parameters(params_1d)
+ # calculate the loss
+ loss_value = self.evaluate(
+ dataset, sample_weight=sample_weight, batch_size=-1, mode="optim"
+ )
+ # calculate gradients and convert to 1D tf.Tensor
+ grads = tape.gradient(loss_value, weights)
+ grads = tf.dynamic_stitch(idx, grads)
+
+ # print out iteration & loss
+ f.iter.assign_add(1)
+
+ # store loss value so we can retrieve later
+ tf.py_function(f.history.append, inp=[loss_value], Tout=[])
+
+ return loss_value, grads
+
+ # store these information as members so we can use them outside the scope
+ f.iter = tf.Variable(0)
+ f.idx = idx
+ f.part = part
+ f.shapes = shapes
+ f.assign_new_model_parameters = assign_new_model_parameters
+ f.history = []
+ return f
+
+ def _fit_with_lbfgs(self, dataset, epochs=None, sample_weight=None, verbose=0):
+ """Fit function for L-BFGS optimizer.
+
+ Replaces the .fit method when the optimizer is set to L-BFGS.
+
+ Parameters
+ ----------
+ dataset : ChoiceDataset
+ Dataset to be used for coefficients estimations
+ epochs : int
+ Maximum number of epochs allowed to reach minimum
+ sample_weight : np.ndarray, optional
+ Sample weights to apply, by default None
+ verbose : int, optional
+ print level, for debugging, by default 0
+
+ Returns:
+ --------
+ dict
+ Fit history
+ """
+ # Only import tensorflow_probability if LBFGS optimizer is used, avoid unnecessary
+ # dependency
+ import tensorflow_probability as tfp
+
+ if epochs is None:
+ epochs = self.epochs
+ func = self._lbfgs_train_step(dataset, sample_weight=sample_weight)
+
+ # convert initial model parameters to a 1D tf.Tensor
+ init = []
+ for model in self.models:
+ for w in model.weights:
+ init.append(w)
+ init.append(self.latent_logits)
+ init_params = tf.dynamic_stitch(func.idx, init)
+
+ # train the model with L-BFGS solver
+ results = tfp.optimizer.lbfgs_minimize(
+ value_and_gradients_function=func,
+ initial_position=init_params,
+ max_iterations=epochs,
+ tolerance=-1,
+ f_absolute_tolerance=self.tolerance,
+ f_relative_tolerance=-1,
+ x_tolerance=-1,
+ )
+
+ # after training, the final optimized parameters are still in results.position
+ # so we have to manually put them back to the model
+ func.assign_new_model_parameters(results.position)
+ if verbose > 0:
+ print("L-BFGS Opimization finished:")
+ print("---------------------------------------------------------------")
+ print("Number of iterations:", results[2].numpy())
+ print("Algorithm converged before reaching max iterations:", results[0].numpy())
+ return func.history
+
+ def _gd_train_step(self, dataset, sample_weight=None):
+ pass
+
+ def _nothing(self, inputs):
+ """_summary_.
+
+ Parameters
+ ----------
+ inputs : _type_
+ _description_
+
+ Returns:
+ --------
+ _type_
+ _description_
+ """
+ latent_probas = tf.clip_by_value(
+ self.latent_logits - tf.reduce_max(self.latent_logits), self.minf, 0
+ )
+ latent_probas = tf.math.exp(latent_probas)
+ # latent_probas = tf.math.abs(self.logit_latent_probas) # alternative implementation
+ latent_probas = latent_probas / tf.reduce_sum(latent_probas)
+ proba_list = []
+ avail = inputs[4]
+ for q in range(self.n_latent_classes):
+ combined = self.models[q].compute_batch_utility(*inputs)
+ combined = tf.clip_by_value(
+ combined - tf.reduce_max(combined, axis=1, keepdims=True), self.minf, 0
+ )
+ combined = tf.keras.layers.Activation(activation=tf.nn.softmax)(combined)
+ # combined = tf.keras.layers.Softmax()(combined)
+ combined = combined * avail
+ combined = latent_probas[q] * tf.math.divide(
+ combined, tf.reduce_sum(combined, axis=1, keepdims=True)
+ )
+ combined = tf.expand_dims(combined, -1)
+ proba_list.append(combined)
+ # print(combined.get_shape()) # it is useful to print the shape of tensors for debugging
+
+ proba_final = tf.keras.layers.Concatenate(axis=2)(proba_list)
+ return tf.math.reduce_sum(proba_final, axis=2, keepdims=False)
+
+ def _expectation(self, dataset):
+ predicted_probas = [model.predict_probas(dataset) for model in self.models]
+ if np.sum(np.isnan(predicted_probas)) > 0:
+ print("Nan in probas")
+ predicted_probas = [
+ latent
+ * tf.gather_nd(
+ params=proba,
+ indices=tf.stack([tf.range(0, len(dataset), 1), dataset.choices], axis=1),
+ )
+ for latent, proba in zip(self.latent_logits, predicted_probas)
+ ]
+
+ # E-step
+ ###### FILL THE CODE BELOW TO ESTIMATE DETERMINE THE WEIGHTS (weights = xxx)
+ predicted_probas = np.stack(predicted_probas, axis=1) + 1e-10
+ loss = np.sum(np.log(np.sum(predicted_probas, axis=1)))
+
+ return predicted_probas / np.sum(predicted_probas, axis=1, keepdims=True), loss
+
+ def _maximization(self, dataset, verbose=0):
+ """_summary_.
+
+ Parameters
+ ----------
+ dataset : _type_
+ _description_
+ verbose : int, optional
+ print level, for debugging, by default 0
+
+ Returns:
+ --------
+ _type_
+ _description_
+ """
+ self.models = [self.model_class(**mp) for mp in self.model_parameters]
+ # M-step: MNL estimation
+ for q in range(self.n_latent_classes):
+ self.models[q].fit(dataset, sample_weight=self.weights[:, q], verbose=verbose)
+
+ # M-step: latent probability estimation
+ latent_probas = np.sum(self.weights, axis=0)
+
+ return latent_probas / np.sum(latent_probas)
+
+ def _em_fit(self, dataset, verbose=0):
+ """Fit with Expectation-Maximization Algorithm.
+
+ Parameters
+ ----------
+ dataset: ChoiceDataset
+ Dataset to be used for coefficients estimations
+ verbose : int, optional
+ print level, for debugging, by default 0
+
+ Returns:
+ --------
+ list
+ List of logits for each latent class
+ list
+ List of losses at each epoch
+ """
+ hist_logits = []
+ hist_loss = []
+ # Initialization
+ for model in self.models:
+ # model.instantiate()
+ model.fit(dataset, sample_weight=np.random.rand(len(dataset)), verbose=verbose)
+ for i in tqdm.trange(self.epochs):
+ self.weights, loss = self._expectation(dataset)
+ self.latent_logits = self._maximization(dataset, verbose=verbose)
+ hist_logits.append(self.latent_logits)
+ hist_loss.append(loss)
+ if np.sum(np.isnan(self.latent_logits)) > 0:
+ print("Nan in logits")
+ break
+ return hist_logits, hist_loss
+
+ def predict_probas(self, choice_dataset, batch_size=-1):
+ """Predicts the choice probabilities for each context and each product of a ChoiceDataset.
+
+ Parameters
+ ----------
+ choice_dataset : ChoiceDataset
+ Dataset on which to apply to prediction
+ batch_size : int, optional
+ Batch size to use for the prediction, by default -1
+
+ Returns:
+ --------
+ np.ndarray (n_contexts, n_items)
+ Choice probabilties for each context and each product
"""
- # In order to avoid unused arguments warnings
- _ = fixed_items_features, contexts_features, contexts_items_availabilities
- _ = contexts_items_features
- if self.weights is None:
- raise ValueError("Model not fitted")
- return np.stack([np.log(self.weights.numpy())] * len(choices), axis=0)
+ stacked_probabilities = []
+ for (
+ fixed_items_features,
+ contexts_features,
+ contexts_items_features,
+ contexts_items_availabilities,
+ choices,
+ ) in choice_dataset.iter_batch(batch_size=batch_size):
+ _, probabilities = self.batch_predict(
+ fixed_items_features=fixed_items_features,
+ contexts_features=contexts_features,
+ contexts_items_features=contexts_items_features,
+ contexts_items_availabilities=contexts_items_availabilities,
+ choices=choices,
+ )
+ stacked_probabilities.append(probabilities)
+
+ return tf.concat(stacked_probabilities, axis=0)
diff --git a/choice_learn/models/baseline_models.py b/choice_learn/models/baseline_models.py
new file mode 100644
index 00000000..4a93ebbf
--- /dev/null
+++ b/choice_learn/models/baseline_models.py
@@ -0,0 +1,124 @@
+"""Models to be used as baselines for choice modeling. Nothing smart here."""
+import numpy as np
+import tensorflow as tf
+
+from .base_model import ChoiceModel
+
+
+class RandomChoiceModel(ChoiceModel):
+ """Dumb model that randomly attributes utilities to products."""
+
+ def __init__(self, **kwargs):
+ """Initialization of the model."""
+ super().__init__(**kwargs)
+
+ def compute_batch_utility(
+ self,
+ fixed_items_features,
+ contexts_features,
+ contexts_items_features,
+ contexts_items_availabilities,
+ choices,
+ ):
+ """Computes the random utility for each product of each context.
+
+ Parameters
+ ----------
+ fixed_items_features : tuple of np.ndarray
+ Fixed-Item-Features: formatting from ChoiceDataset: a matrix representing the products
+ constant/fixed features.
+ Shape must be (n_items, n_items_features)
+ contexts_features : tuple of np.ndarray (contexts_features)
+ a batch of contexts features
+ Shape must be (n_contexts, n_contexts_features)
+ contexts_items_features : tuple of np.ndarray (contexts_items_features)
+ a batch of contexts items features
+ Shape must be (n_contexts, n_contexts_items_features)
+ contexts_items_availabilities : np.ndarray
+ A batch of contexts items availabilities
+ Shape must be (n_contexts, n_items)
+ choices_batch : np.ndarray
+ Choices
+ Shape must be (n_contexts, )
+
+ Returns:
+ --------
+ tf.Tensor
+ (n_contexts, n_items) matrix of random utilities
+ """
+ # In order to avoid unused arguments warnings
+ _ = fixed_items_features, contexts_features, contexts_items_availabilities, choices
+ return np.squeeze(
+ np.random.uniform(shape=(contexts_items_features.shape), minval=0, maxval=1)
+ )
+
+ def fit(**kwargs):
+ """Make sure that nothing happens during .fit."""
+ _ = kwargs
+ return {}
+
+
+class DistribMimickingModel(ChoiceModel):
+ """Dumb class model that mimicks the probabilities.
+
+ It stores the encountered in the train datasets and always returns them
+ """
+
+ def __init__(self, **kwargs):
+ """Initialization of the model."""
+ super().__init__(**kwargs)
+ self.weights = []
+
+ def fit(self, choice_dataset, **kwargs):
+ """Computes the choice frequency of each product and defines it as choice probabilities."""
+ _ = kwargs
+ choices = choice_dataset.choices
+ for i in range(choice_dataset.get_num_items()):
+ self.weights.append(tf.reduce_sum(tf.cast(choices == i, tf.float32)))
+ self.weights = tf.stack(self.weights) / len(choices)
+
+ def compute_batch_utility(
+ self,
+ fixed_items_features,
+ contexts_features,
+ contexts_items_features,
+ contexts_items_availabilities,
+ choices,
+ ):
+ """Returns utility that is fixed. U = log(P).
+
+ Parameters
+ ----------
+ fixed_items_features : tuple of np.ndarray
+ Fixed-Item-Features: formatting from ChoiceDataset: a matrix representing the products
+ constant/fixed features.
+ Shape must be (n_items, n_items_features)
+ contexts_features : tuple of np.ndarray (contexts_features)
+ a batch of contexts features
+ Shape must be (n_contexts, n_contexts_features)
+ contexts_items_features : tuple of np.ndarray (contexts_items_features)
+ a batch of contexts items features
+ Shape must be (n_contexts, n_contexts_items_features)
+ contexts_items_availabilities : np.ndarray
+ A batch of contexts items availabilities
+ Shape must be (n_contexts, n_items)
+ choices_batch : np.ndarray
+ Choices
+ Shape must be (n_contexts, )
+
+ Returns:
+ --------
+ np.ndarray (n_contexts, n_items)
+ Utilities
+
+ Raises:
+ -------
+ ValueError
+ If the model has not been fitted cannot evaluate the utility
+ """
+ # In order to avoid unused arguments warnings
+ _ = fixed_items_features, contexts_features, contexts_items_availabilities
+ _ = contexts_items_features
+ if self.weights is None:
+ raise ValueError("Model not fitted")
+ return np.stack([np.log(self.weights.numpy())] * len(choices), axis=0)
diff --git a/choice_learn/models/conditional_mnl.py b/choice_learn/models/conditional_mnl.py
index 5a95b60c..27f8f54d 100644
--- a/choice_learn/models/conditional_mnl.py
+++ b/choice_learn/models/conditional_mnl.py
@@ -324,6 +324,7 @@ def instantiate_from_specifications(self):
## Fill items_indexes here
# Better organize feat_to_weight and specifications
+ self.weights = weights
return weights
def _store_dataset_features_names(self, dataset):
@@ -629,16 +630,18 @@ def instantiate(
"""
# Possibility to stack weights to be faster ????
if items_features_names is None:
- items_features_names = []
+ items_features_names = [()]
if contexts_features_names is None:
- contexts_features_names = []
+ contexts_features_names = [()]
if contexts_items_features_names is None:
- contexts_items_features_names = []
+ contexts_items_features_names = [()]
weights = []
weights_count = 0
self._items_features_names = []
for feat_tuple in items_features_names:
tuple_names = []
+ if feat_tuple is None:
+ feat_tuple = ()
for feat in feat_tuple:
if feat in self.params.keys():
if self.params[feat] == "constant":
@@ -671,6 +674,8 @@ def instantiate(
self._contexts_features_names = []
for feat_tuple in contexts_features_names:
+ if feat_tuple is None:
+ feat_tuple = ()
tuple_names = []
for feat in feat_tuple:
if feat in self.params.keys():
@@ -706,6 +711,8 @@ def instantiate(
self._contexts_items_features_names = []
for feat_tuple in contexts_items_features_names:
+ if feat_tuple is None:
+ feat_tuple = ()
tuple_names = []
for feat in feat_tuple:
if feat in self.params.keys():
@@ -783,6 +790,7 @@ def instantiate(
self.instantiated = True
else:
raise ValueError("No weights instantiated")
+ self.weights = weights
return weights
def compute_batch_utility(
@@ -820,6 +828,7 @@ def compute_batch_utility(
Computed utilities of shape (n_choices, n_items).
"""
if isinstance(self.params, ModelSpecification):
+ print("Model in instantiated using manual specification")
return self.compute_batch_utility_from_specification(
fixed_items_features=fixed_items_features,
contexts_features=contexts_features,
@@ -1001,7 +1010,14 @@ def fit(self, choice_dataset, get_report=False, **kwargs):
self.report = self.compute_report(choice_dataset)
return fit
- def _fit_with_lbfgs(self, choice_dataset, epochs=None, tolerance=1e-8, get_report=False):
+ def _fit_with_lbfgs(
+ self,
+ choice_dataset,
+ epochs=None,
+ sample_weight=None,
+ get_report=False,
+ **kwargs,
+ ):
"""Specific fit function to estimate the paramters with LBFGS.
Parameters
@@ -1034,7 +1050,12 @@ def _fit_with_lbfgs(self, choice_dataset, epochs=None, tolerance=1e-8, get_repor
self.instantiated = True
if epochs is None:
epochs = self.epochs
- fit = super()._fit_with_lbfgs(choice_dataset, epochs, tolerance)
+ fit = super()._fit_with_lbfgs(
+ dataset=choice_dataset,
+ epochs=epochs,
+ sample_weight=sample_weight,
+ **kwargs,
+ )
if get_report:
self.report = self.compute_report(choice_dataset)
return fit
@@ -1113,7 +1134,7 @@ def get_weights_std(self, dataset):
probabilities = tf.nn.softmax(utilities, axis=-1)
loss = tf.keras.losses.CategoricalCrossentropy(reduction="sum")(
y_pred=probabilities,
- y_true=tf.one_hot(dataset.choices, depth=4),
+ y_true=tf.one_hot(dataset.choices, depth=probabilities.shape[1]),
)
# Compute the Jacobian
jacobian = tape_2.jacobian(loss, w)
diff --git a/choice_learn/models/latent_class_mnl.py b/choice_learn/models/latent_class_mnl.py
new file mode 100644
index 00000000..94561ff6
--- /dev/null
+++ b/choice_learn/models/latent_class_mnl.py
@@ -0,0 +1,349 @@
+"""Latent Class MNL models."""
+import copy
+
+import tensorflow as tf
+
+from .base_model import BaseLatentClassModel
+from .conditional_mnl import ConditionalMNL, ModelSpecification
+from .simple_mnl import SimpleMNL
+
+
+class LatentClassSimpleMNL(BaseLatentClassModel):
+ """Latent Class for SimpleMNL."""
+
+ def __init__(
+ self,
+ n_latent_classes,
+ fit_method,
+ epochs,
+ add_exit_choice=False,
+ tolerance=1e-6,
+ intercept=None,
+ optimizer="Adam",
+ lr=0.001,
+ **kwargs,
+ ):
+ """Initialization.
+
+ Parameters
+ ----------
+ n_latent_classes : int
+ Number of latent classes.
+ fit_method : str
+ Method to be used to estimate the model.
+ epochs : int
+ Number of epochs
+ add_exit_choice : bool, optional
+ Whether to normalize probabilities with exit choice, by default False
+ tolerance : float, optional
+ LBFG-S tolerance, by default 1e-6
+ intercept : str, optional
+ Type of intercept to include in the SimpleMNL.
+ Must be in (None, 'item', 'item-full', 'constant'), by default None
+ optimizer : str, optional
+ tf.keras.optimizers to be used, by default "Adam"
+ lr : float, optional
+ Learning rate to use for optimizer if relevant, by default 0.001
+ """
+ self.n_latent_classes = n_latent_classes
+ self.intercept = intercept
+ model_params = {
+ "add_exit_choice": add_exit_choice,
+ "intercept": intercept,
+ "optimizer": optimizer,
+ "tolerance": tolerance,
+ "lr": lr,
+ "epochs": epochs,
+ }
+
+ super().__init__(
+ model_class=SimpleMNL,
+ model_parameters=model_params,
+ n_latent_classes=n_latent_classes,
+ fit_method=fit_method,
+ epochs=epochs,
+ add_exit_choice=add_exit_choice,
+ tolerance=tolerance,
+ optimizer=optimizer,
+ lr=lr,
+ **kwargs,
+ )
+
+ def instantiate_latent_models(
+ self, n_items, n_fixed_items_features, n_contexts_features, n_contexts_items_features
+ ):
+ """Instantiation of the Latent Models that are SimpleMNLs.
+
+ Parameters
+ ----------
+ n_items : int
+ Number of items/aternatives to consider.
+ n_fixed_items_features : int
+ Number of fixed items features.
+ n_contexts_features : int
+ Number of contexts features
+ n_contexts_items_features : int
+ Number of contexts items features
+ """
+ for model in self.models:
+ model.indexes, model.weights = model.instantiate(
+ n_items, n_fixed_items_features, n_contexts_features, n_contexts_items_features
+ )
+ model.instantiated = True
+
+ def instantiate(
+ self, n_items, n_fixed_items_features, n_contexts_features, n_contexts_items_features
+ ):
+ """Instantiation of the Latent Class MNL model."""
+ self.latent_logits = tf.Variable(
+ tf.random_normal_initializer(0.0, 0.02, seed=42)(shape=(self.n_latent_classes - 1,)),
+ name="Latent-Logits",
+ )
+
+ self.models = [self.model_class(**mp) for mp in self.model_parameters]
+
+ self.instantiate_latent_models(
+ n_items=n_items,
+ n_fixed_items_features=n_fixed_items_features,
+ n_contexts_features=n_contexts_features,
+ n_contexts_items_features=n_contexts_items_features,
+ )
+
+ def fit(self, dataset, **kwargs):
+ """Fit the model to the dataset.
+
+ Parameters
+ ----------
+ dataset : ChoiceDataset
+ Dataset to fit the model to.
+ """
+ if not self.instantiated:
+ self.instantiate(
+ n_items=dataset.get_n_items(),
+ n_fixed_items_features=dataset.get_n_fixed_items_features(),
+ n_contexts_features=dataset.get_n_contexts_features(),
+ n_contexts_items_features=dataset.get_n_contexts_items_features(),
+ )
+ return super().fit(dataset, **kwargs)
+
+
+class LatentClassConditionalMNL(BaseLatentClassModel):
+ """Latent Class for ConditionalMNL."""
+
+ def __init__(
+ self,
+ n_latent_classes,
+ fit_method,
+ parameters=None,
+ epochs=1,
+ add_exit_choice=False,
+ tolerance=1e-6,
+ optimizer="Adam",
+ lr=0.001,
+ **kwargs,
+ ):
+ """Initialization.
+
+ Parameters
+ ----------
+ n_latent_classes : int
+ Number of latent classes.
+ fit_method : str
+ Method to be used to estimate the model.
+ parameters : dict or ModelSpecification
+ Dictionnary containing the parametrization of the model.
+ The dictionnary must have the following structure:
+ {feature_name_1: mode_1, feature_name_2: mode_2, ...}
+ mode must be among "constant", "item", "item-full" for now
+ (same specifications as torch-choice).
+ epochs : int
+ Number of epochs
+ add_exit_choice : bool, optional
+ Whether to normalize probabilities with exit choice, by default False
+ tolerance : float, optional
+ LBFG-S tolerance, by default 1e-6
+ optimizer : str, optional
+ tf.keras.optimizers to be used, by default "Adam"
+ lr : float, optional
+ Learning rate to use for optimizer if relevant, by default 0.001
+ """
+ self.n_latent_classes = n_latent_classes
+ self.fit_method = fit_method
+ self.params = parameters
+ self.epochs = epochs
+ self.add_exit_choice = add_exit_choice
+ self.tolerance = tolerance
+ self.optimizer = optimizer
+ self.lr = lr
+
+ model_params = {
+ "parameters": self.params,
+ "add_exit_choice": self.add_exit_choice,
+ "optimizer": self.optimizer,
+ "tolerance": self.tolerance,
+ "lr": self.lr,
+ "epochs": self.epochs,
+ }
+
+ super().__init__(
+ model_class=ConditionalMNL,
+ model_parameters=model_params,
+ n_latent_classes=n_latent_classes,
+ fit_method=fit_method,
+ epochs=epochs,
+ add_exit_choice=add_exit_choice,
+ tolerance=tolerance,
+ optimizer=optimizer,
+ lr=lr,
+ **kwargs,
+ )
+
+ def instantiate_latent_models(
+ self,
+ n_items,
+ items_features_names,
+ contexts_features_names,
+ contexts_items_features_names,
+ ):
+ """Instantiation of the Latent Models that are SimpleMNLs.
+
+ Parameters
+ ----------
+ n_items : int
+ Number of items/aternatives to consider.
+ items_features_names: str,
+ Names of fixed_items_features
+ contexts_features_names: str,
+ Names of contexts features
+ contexts_items_features_names: str,
+ Names of contexts items features
+ """
+ if isinstance(self.params, ModelSpecification):
+ for model in self.models:
+ model.params = copy.deepcopy(self.params)
+ model.weights = model.instantiate_from_specifications()
+
+ model._items_features_names = items_features_names
+ model._contexts_features_names = contexts_features_names
+ model._contexts_items_features_names = contexts_items_features_names
+ else:
+ for model in self.models:
+ model.params = self.params
+ model.indexes, model.weights = model.instantiate(
+ num_items=n_items,
+ items_features_names=items_features_names,
+ contexts_features_names=contexts_features_names,
+ contexts_items_features_names=contexts_items_features_names,
+ )
+ model.instantiated = True
+
+ def instantiate(
+ self,
+ n_items,
+ items_features_names,
+ contexts_features_names,
+ contexts_items_features_names,
+ ):
+ """Instantiation of the Latent Class MNL model."""
+ self.latent_logits = tf.Variable(
+ tf.random_normal_initializer(0.0, 0.02, seed=42)(shape=(self.n_latent_classes - 1,)),
+ name="Latent-Logits",
+ )
+
+ self.models = [self.model_class(**mp) for mp in self.model_parameters]
+
+ self.instantiate_latent_models(
+ n_items=n_items,
+ items_features_names=items_features_names,
+ contexts_features_names=contexts_features_names,
+ contexts_items_features_names=contexts_items_features_names,
+ )
+
+ def add_coefficients(
+ self, coefficient_name, feature_name, items_indexes=None, items_names=None
+ ):
+ """Adds a coefficient to the model throught the specification of the utility.
+
+ Parameters
+ ----------
+ coefficient_name : str
+ Name given to the coefficient.
+ feature_name : str
+ features name to which the coefficient is associated. It should work with
+ the names given.
+ in the ChoiceDataset that will be used for parameters estimation.
+ items_indexes : list of int, optional
+ list of items indexes (in the ChoiceDataset) for which we need to add a coefficient,
+ by default None
+ items_names : list of str, optional
+ list of items names (in the ChoiceDataset) for which we need to add a coefficient,
+ by default None
+
+ Raises:
+ -------
+ ValueError
+ When names or indexes are both not specified.
+ """
+ if self.params is None:
+ self.params = ModelSpecification()
+ elif not isinstance(self.params, ModelSpecification):
+ raise ValueError("Cannot add coefficient on top of a dict instantiation.")
+ self.params.add_coefficients(
+ coefficient_name=coefficient_name,
+ feature_name=feature_name,
+ items_indexes=items_indexes,
+ items_names=items_names,
+ )
+
+ def add_shared_coefficient(
+ self, coefficient_name, feature_name, items_indexes=None, items_names=None
+ ):
+ """Adds a single, shared coefficient to the model throught the specification of the utility.
+
+ Parameters
+ ----------
+ coefficient_name : str
+ Name given to the coefficient.
+ feature_name : str
+ features name to which the coefficient is associated. It should work with
+ the names given.
+ in the ChoiceDataset that will be used for parameters estimation.
+ items_indexes : list of int, optional
+ list of items indexes (in the ChoiceDataset) for which the coefficient will be used,
+ by default None
+ items_names : list of str, optional
+ list of items names (in the ChoiceDataset) for which the coefficient will be used,
+ by default None
+
+ Raises:
+ -------
+ ValueError
+ When names or indexes are both not specified.
+ """
+ if self.params is None:
+ self.params = ModelSpecification()
+ elif not isinstance(self.params, ModelSpecification):
+ raise ValueError("Cannot add shared coefficient on top of a dict instantiation.")
+ self.params.add_shared_coefficient(
+ coefficient_name=coefficient_name,
+ feature_name=feature_name,
+ items_indexes=items_indexes,
+ items_names=items_names,
+ )
+
+ def fit(self, dataset, **kwargs):
+ """Fit the model to the dataset.
+
+ Parameters
+ ----------
+ dataset : ChoiceDataset
+ Dataset to fit the model to.
+ """
+ if not self.instantiated:
+ self.instantiate(
+ n_items=dataset.get_n_items(),
+ items_features_names=dataset.fixed_items_features_names,
+ contexts_features_names=dataset.contexts_features_names,
+ contexts_items_features_names=dataset.contexts_items_features_names,
+ )
+ return super().fit(dataset, **kwargs)
diff --git a/choice_learn/models/rumnet.py b/choice_learn/models/rumnet.py
index 09f08fa9..2872bce7 100644
--- a/choice_learn/models/rumnet.py
+++ b/choice_learn/models/rumnet.py
@@ -1133,9 +1133,17 @@ def batch_predict(
probabilities = tf.divide(
probabilities, tf.reduce_sum(probabilities, axis=1, keepdims=True) + 1e-5
)
- batch_loss = self.loss(
- y_pred=probabilities,
- y_true=tf.one_hot(choices, depth=probabilities.shape[1]),
- sample_weight=sample_weight,
- )
+
+ batch_loss = {
+ "optimized_loss": self.loss(
+ y_pred=probabilities,
+ y_true=tf.one_hot(choices, depth=probabilities.shape[1]),
+ sample_weight=sample_weight,
+ ),
+ "NegativeLogLikelihood": tf.keras.losses.CategoricalCrossentropy()(
+ y_pred=probabilities,
+ y_true=tf.one_hot(choices, depth=probabilities.shape[1]),
+ sample_weight=sample_weight,
+ ),
+ }
return batch_loss, probabilities
diff --git a/choice_learn/models/simple_mnl.py b/choice_learn/models/simple_mnl.py
new file mode 100644
index 00000000..b23bf9f2
--- /dev/null
+++ b/choice_learn/models/simple_mnl.py
@@ -0,0 +1,368 @@
+"""Implementation of the simple linear multinomial logit model.
+
+It is a multi output logistic regression.
+"""
+
+import pandas as pd
+import tensorflow as tf
+
+from .base_model import ChoiceModel
+
+
+class SimpleMNL(ChoiceModel):
+ """Simple MNL with one linear coefficient to estimate by feature."""
+
+ def __init__(
+ self,
+ add_exit_choice=False,
+ intercept=None,
+ optimizer="Adam",
+ lr=0.001,
+ **kwargs,
+ ):
+ """Initialization of Simple-MNL.
+
+ Parameters:
+ -----------
+ add_exit_choice : bool, optional
+ Whether or not to normalize the probabilities computation with an exit choice
+ whose utility would be 1, by default True
+ optimizer: str
+ TensorFlow optimizer to be used for estimation
+ lr: float
+ Learning Rate to be used with optimizer.
+ """
+ super().__init__(normalize_non_buy=add_exit_choice, optimizer=optimizer, lr=lr, **kwargs)
+ self.instantiated = False
+ self.intercept = intercept
+
+ def instantiate(
+ self, n_items, n_fixed_items_features, n_contexts_features, n_contexts_items_features
+ ):
+ """Instantiate the model from ModelSpecification object.
+
+ Parameters
+ --------
+ Parameters
+ ----------
+ n_items : int
+ Number of items/aternatives to consider.
+ n_fixed_items_features : int
+ Number of fixed items features.
+ n_contexts_features : int
+ Number of contexts features
+ n_contexts_items_features : int
+ Number of contexts items features
+
+ Returns:
+ --------
+ list of tf.Tensor
+ List of the weights created coresponding to the specification.
+ """
+ weights = []
+ indexes = {}
+ for n_feat, feat_name in zip(
+ [n_fixed_items_features, n_contexts_features, n_contexts_items_features],
+ ["items", "contexts", "contexts_items"],
+ ):
+ if n_feat > 0:
+ weights = [
+ tf.Variable(
+ tf.random_normal_initializer(0.0, 0.02, seed=42)(shape=(n_feat,)),
+ name=f"Weights_{feat_name}",
+ )
+ ]
+ indexes[feat_name] = len(weights) - 1
+ if self.intercept is None:
+ print("No intercept in the model")
+ elif self.intercept == "item":
+ weights.append(
+ tf.Variable(
+ tf.random_normal_initializer(0.0, 0.02, seed=42)(shape=(n_items - 1,)),
+ name="Intercept",
+ )
+ )
+ indexes["intercept"] = len(weights) - 1
+ elif self.intercept == "item-full":
+ print("Are you sure you do not want to normalize an intercept to 0?")
+ weights.append(
+ tf.Variable(
+ tf.random_normal_initializer(0.0, 0.02, seed=42)(shape=(n_items,)),
+ name="Intercept",
+ )
+ )
+ indexes["intercept"] = len(weights) - 1
+ else:
+ weights.append(
+ tf.Variable(
+ tf.random_normal_initializer(0.0, 0.02, seed=42)(shape=(1,)),
+ name="Intercept",
+ )
+ )
+ indexes["intercept"] = len(weights) - 1
+
+ self.instantiated = True
+ self.indexes = indexes
+ self.weights = weights
+ return indexes, weights
+
+ def compute_batch_utility(
+ self,
+ fixed_items_features,
+ contexts_features,
+ contexts_items_features,
+ contexts_items_availabilities,
+ choices,
+ ):
+ """Main method to compute the utility of the model. Selects the right method to compute.
+
+ Parameters
+ ----------
+ fixed_items_features : tuple of np.ndarray
+ Fixed-Item-Features: formatting from ChoiceDataset: a matrix representing the products
+ constant/fixed features.
+ Shape must be (n_items, n_items_features)
+ contexts_features : tuple of np.ndarray (contexts_features)
+ a batch of contexts features
+ Shape must be (n_contexts, n_contexts_features)
+ contexts_items_features : tuple of np.ndarray (contexts_items_features)
+ a batch of contexts items features
+ Shape must be (n_contexts, n_contexts_items_features)
+ contexts_items_availabilities : np.ndarray
+ A batch of contexts items availabilities
+ Shape must be (n_contexts, n_items)
+ choices_batch : np.ndarray
+ Choices
+ Shape must be (n_contexts, )
+
+ Returns:
+ --------
+ tf.Tensor
+ Computed utilities of shape (n_choices, n_items).
+ """
+ _, _ = contexts_items_availabilities, choices
+ if "items" in self.indexes.keys():
+ if isinstance(fixed_items_features, tuple):
+ fixed_items_features = tf.concat(*fixed_items_features, axis=1)
+ fixed_items_utilities = tf.tensordot(
+ fixed_items_features, self.weights[self.indexes["items"]], axes=1
+ )
+ else:
+ fixed_items_utilities = 0
+
+ if "contexts" in self.indexes.keys():
+ if isinstance(contexts_features, tuple):
+ contexts_features = tf.concat(*contexts_features, axis=1)
+ contexts_utilities = tf.tensordot(
+ contexts_features, self.weights[self.indexes["contexts"]], axes=1
+ )
+ contexts_utilities = tf.expand_dims(contexts_utilities, axis=0)
+ else:
+ contexts_utilities = 0
+
+ if "contexts_items" in self.indexes.keys():
+ if isinstance(contexts_items_features, tuple):
+ contexts_items_features = tf.concat([*contexts_items_features], axis=2)
+ contexts_items_utilities = tf.tensordot(
+ contexts_items_features, self.weights[self.indexes["contexts_items"]], axes=1
+ )
+ else:
+ contexts_utilities = tf.zeros(
+ (contexts_utilities.shape[0], fixed_items_utilities.shape[1], 1)
+ )
+
+ if "intercept" in self.indexes.keys():
+ intercept = self.weights[self.indexes["intercept"]]
+ if self.intercept == "item":
+ intercept = tf.concat([tf.constant([0.0]), intercept], axis=0)
+ if self.intercept in ["item", "item-full"]:
+ intercept = tf.expand_dims(intercept, axis=0)
+ else:
+ intercept = 0
+
+ return fixed_items_utilities + contexts_utilities + contexts_items_utilities + intercept
+
+ def fit(self, choice_dataset, get_report=False, **kwargs):
+ """Main fit function to estimate the paramters.
+
+ Parameters
+ ----------
+ choice_dataset : ChoiceDataset
+ Choice dataset to use for the estimation.
+ get_report: bool, optional
+ Whether or not to compute a report of the estimation, by default False
+
+ Returns:
+ --------
+ ConditionalMNL
+ With estimated weights.
+ """
+ if not self.instantiated:
+ # Lazy Instantiation
+ print("Instantiation")
+ self.indexes, self.weights = self.instantiate(
+ n_items=choice_dataset.get_n_items(),
+ n_fixed_items_features=choice_dataset.get_n_fixed_items_features(),
+ n_contexts_features=choice_dataset.get_n_contexts_features(),
+ n_contexts_items_features=choice_dataset.get_n_contexts_items_features(),
+ )
+ self.instantiated = True
+ fit = super().fit(choice_dataset=choice_dataset, **kwargs)
+ if get_report:
+ self.report = self.compute_report(choice_dataset)
+ return fit
+
+ def _fit_with_lbfgs(
+ self, choice_dataset, epochs=None, sample_weight=None, get_report=False, **kwargs
+ ):
+ """Specific fit function to estimate the paramters with LBFGS.
+
+ Parameters
+ ----------
+ choice_dataset : ChoiceDataset
+ Choice dataset to use for the estimation.
+ n_epochs : int
+ Number of epochs to run.
+ sample_weight: Iterable, optional
+ list of each sample weight, by default None meaning that all samples have weight 1.
+ get_report: bool, optional
+ Whether or not to compute a report of the estimation, by default False.
+
+ Returns:
+ --------
+ conditionalMNL
+ self with estimated weights.
+ """
+ if not self.instantiated:
+ # Lazy Instantiation
+ print("Instantiation")
+ self.indexes, self.weights = self.instantiate(
+ n_items=choice_dataset.get_n_items(),
+ n_fixed_items_features=choice_dataset.get_n_fixed_items_features(),
+ n_contexts_features=choice_dataset.get_n_contexts_features(),
+ n_contexts_items_features=choice_dataset.get_n_contexts_items_features(),
+ )
+ self.instantiated = True
+ if epochs is None:
+ epochs = self.epochs
+ fit = super()._fit_with_lbfgs(
+ dataset=choice_dataset, epochs=epochs, sample_weight=sample_weight, **kwargs
+ )
+ if get_report:
+ self.report = self.compute_report(choice_dataset)
+ return fit
+
+ def compute_report(self, dataset):
+ """Computes a report of the estimated weights.
+
+ Parameters
+ ----------
+ dataset : ChoiceDataset
+ ChoiceDataset used for the estimation of the weights that will be
+ used to compute the Std Err of this estimation.
+
+ Returns:
+ --------
+ pandas.DataFrame
+ A DF with estimation, Std Err, z_value and p_value for each coefficient.
+ """
+ import tensorflow_probability as tfp
+
+ weights_std = self.get_weights_std(dataset)
+ dist = tfp.distributions.Normal(loc=0.0, scale=1.0)
+
+ names = []
+ z_values = []
+ estimations = []
+ p_z = []
+ i = 0
+ for weight in self.weights:
+ for j in range(weight.shape[0]):
+ names.append(f"{weight.name}_{j}")
+ estimations.append(weight.numpy()[j])
+ z_values.append(weight.numpy()[j] / weights_std[i].numpy())
+ p_z.append(2 * (1 - dist.cdf(tf.math.abs(z_values[-1])).numpy()))
+ i += 1
+
+ return pd.DataFrame(
+ {
+ "Coefficient Name": names,
+ "Coefficient Estimation": estimations,
+ "Std. Err": weights_std.numpy(),
+ "z_value": z_values,
+ "P(.>z)": p_z,
+ },
+ )
+
+ def get_weights_std(self, dataset):
+ """Approximates Std Err with Hessian matrix.
+
+ Parameters
+ ----------
+ dataset : ChoiceDataset
+ ChoiceDataset used for the estimation of the weights that will be
+ used to compute the Std Err of this estimation.
+
+ Returns:
+ --------
+ tf.Tensor
+ Estimation of the Std Err for the weights.
+ """
+ # Loops of differentiation
+ with tf.GradientTape() as tape_1:
+ with tf.GradientTape(persistent=True) as tape_2:
+ model = self.clone()
+ w = tf.concat(self.weights, axis=0)
+ tape_2.watch(w)
+ tape_1.watch(w)
+ mw = []
+ index = 0
+ for _w in self.weights:
+ mw.append(w[index : index + _w.shape[0]])
+ index += _w.shape[0]
+ model.weights = mw
+ for batch in dataset.iter_batch(batch_size=-1):
+ utilities = model.compute_batch_utility(*batch)
+ probabilities = tf.nn.softmax(utilities, axis=-1)
+ loss = tf.keras.losses.CategoricalCrossentropy(reduction="sum")(
+ y_pred=probabilities,
+ y_true=tf.one_hot(dataset.choices, depth=probabilities.shape[-1]),
+ )
+ # Compute the Jacobian
+ jacobian = tape_2.jacobian(loss, w)
+ # Compute the Hessian from the Jacobian
+ hessian = tape_1.jacobian(jacobian, w)
+ hessian = tf.linalg.inv(tf.squeeze(hessian))
+ return tf.sqrt([hessian[i][i] for i in range(len(tf.squeeze(hessian)))])
+
+ def clone(self):
+ """Returns a clone of the model."""
+ clone = SimpleMNL(
+ add_exit_choice=self.normalize_non_buy,
+ optimizer=self.optimizer_name,
+ )
+ if hasattr(self, "history"):
+ clone.history = self.history
+ if hasattr(self, "is_fitted"):
+ clone.is_fitted = self.is_fitted
+ if hasattr(self, "instantiated"):
+ clone.instantiated = self.instantiated
+ clone.loss = self.loss
+ clone.label_smoothing = self.label_smoothing
+ if hasattr(self, "report"):
+ clone.report = self.report
+ if hasattr(self, "weights"):
+ clone.weights = self.weights
+ if hasattr(self, "indexes"):
+ clone.indexes = self.indexes
+ if hasattr(self, "intercept"):
+ clone.intercept = self.intercept
+ if hasattr(self, "lr"):
+ clone.lr = self.lr
+ if hasattr(self, "_items_features_names"):
+ clone._items_features_names = self._items_features_names
+ if hasattr(self, "_contexts_features_names"):
+ clone._contexts_features_names = self._contexts_features_names
+ if hasattr(self, "_contexts_items_features_names"):
+ clone._contexts_items_features_names = self._contexts_items_features_names
+ return clone
diff --git a/notebooks/choice_learn_introduction_clogit.ipynb b/notebooks/choice_learn_introduction_clogit.ipynb
index bd20a3e0..406de3e3 100644
--- a/notebooks/choice_learn_introduction_clogit.ipynb
+++ b/notebooks/choice_learn_introduction_clogit.ipynb
@@ -211,8 +211,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "The average neg-loglikelihood is: 0.67447394\n",
- "The total neg-loglikelihood is: 1874.3630829453468\n"
+ "The average neg-loglikelihood is: 0.6744666\n",
+ "The total neg-loglikelihood is: 1874.3427090644836\n"
]
}
],
@@ -267,25 +267,25 @@
"
\n",
" 0 | \n",
" beta_inter:0_0 | \n",
- " 0.698367 | \n",
- " 1.280208 | \n",
- " 0.545511 | \n",
- " 5.854024e-01 | \n",
+ " 0.698380 | \n",
+ " 1.280237 | \n",
+ " 0.545508 | \n",
+ " 5.854039e-01 | \n",
"
\n",
" \n",
" 1 | \n",
" beta_inter:0_1 | \n",
- " 1.844104 | \n",
- " 0.708454 | \n",
- " 2.602998 | \n",
- " 9.241223e-03 | \n",
+ " 1.844129 | \n",
+ " 0.708489 | \n",
+ " 2.602904 | \n",
+ " 9.243727e-03 | \n",
"
\n",
" \n",
" 2 | \n",
" beta_inter:0_2 | \n",
- " 3.274187 | \n",
- " 0.624366 | \n",
- " 5.244018 | \n",
+ " 3.274206 | \n",
+ " 0.624402 | \n",
+ " 5.243744 | \n",
" 1.192093e-07 | \n",
"
\n",
" \n",
@@ -293,7 +293,7 @@
" beta_income:0_0 | \n",
" -0.089087 | \n",
" 0.018347 | \n",
- " -4.855643 | \n",
+ " -4.855632 | \n",
" 1.192093e-06 | \n",
"
\n",
" \n",
@@ -301,7 +301,7 @@
" beta_income:0_1 | \n",
" -0.027993 | \n",
" 0.003873 | \n",
- " -7.228673 | \n",
+ " -7.228651 | \n",
" 0.000000e+00 | \n",
"
\n",
" \n",
@@ -309,15 +309,15 @@
" beta_income:0_2 | \n",
" -0.038147 | \n",
" 0.004083 | \n",
- " -9.342690 | \n",
+ " -9.342653 | \n",
" 0.000000e+00 | \n",
"
\n",
" \n",
" 6 | \n",
" beta_ivt:0_0 | \n",
- " 0.059509 | \n",
+ " 0.059510 | \n",
" 0.010073 | \n",
- " 5.907992 | \n",
+ " 5.908023 | \n",
" 0.000000e+00 | \n",
"
\n",
" \n",
@@ -325,39 +325,39 @@
" beta_ivt:0_1 | \n",
" -0.006784 | \n",
" 0.004433 | \n",
- " -1.530137 | \n",
- " 1.259828e-01 | \n",
+ " -1.530130 | \n",
+ " 1.259845e-01 | \n",
"
\n",
" \n",
" 8 | \n",
" beta_ivt:0_2 | \n",
" -0.006460 | \n",
" 0.001898 | \n",
- " -3.403037 | \n",
- " 6.663799e-04 | \n",
+ " -3.402944 | \n",
+ " 6.666183e-04 | \n",
"
\n",
" \n",
" 9 | \n",
" beta_ivt:0_3 | \n",
" -0.001450 | \n",
" 0.001187 | \n",
- " -1.221401 | \n",
- " 2.219341e-01 | \n",
+ " -1.221381 | \n",
+ " 2.219417e-01 | \n",
"
\n",
" \n",
" 10 | \n",
" beta_cost:0_0 | \n",
" -0.033339 | \n",
" 0.007095 | \n",
- " -4.698925 | \n",
+ " -4.698679 | \n",
" 2.622604e-06 | \n",
"
\n",
" \n",
" 11 | \n",
" beta_freq:0_0 | \n",
- " 0.092529 | \n",
+ " 0.092530 | \n",
" 0.005098 | \n",
- " 18.151833 | \n",
+ " 18.151777 | \n",
" 0.000000e+00 | \n",
"
\n",
" \n",
@@ -365,7 +365,7 @@
" beta_ovt:0_0 | \n",
" -0.043004 | \n",
" 0.003225 | \n",
- " -13.335643 | \n",
+ " -13.335551 | \n",
" 0.000000e+00 | \n",
"
\n",
" \n",
@@ -374,19 +374,19 @@
],
"text/plain": [
" Coefficient Name Coefficient Estimation Std. Err z_value P(.>z)\n",
- "0 beta_inter:0_0 0.698367 1.280208 0.545511 5.854024e-01\n",
- "1 beta_inter:0_1 1.844104 0.708454 2.602998 9.241223e-03\n",
- "2 beta_inter:0_2 3.274187 0.624366 5.244018 1.192093e-07\n",
- "3 beta_income:0_0 -0.089087 0.018347 -4.855643 1.192093e-06\n",
- "4 beta_income:0_1 -0.027993 0.003873 -7.228673 0.000000e+00\n",
- "5 beta_income:0_2 -0.038147 0.004083 -9.342690 0.000000e+00\n",
- "6 beta_ivt:0_0 0.059509 0.010073 5.907992 0.000000e+00\n",
- "7 beta_ivt:0_1 -0.006784 0.004433 -1.530137 1.259828e-01\n",
- "8 beta_ivt:0_2 -0.006460 0.001898 -3.403037 6.663799e-04\n",
- "9 beta_ivt:0_3 -0.001450 0.001187 -1.221401 2.219341e-01\n",
- "10 beta_cost:0_0 -0.033339 0.007095 -4.698925 2.622604e-06\n",
- "11 beta_freq:0_0 0.092529 0.005098 18.151833 0.000000e+00\n",
- "12 beta_ovt:0_0 -0.043004 0.003225 -13.335643 0.000000e+00"
+ "0 beta_inter:0_0 0.698380 1.280237 0.545508 5.854039e-01\n",
+ "1 beta_inter:0_1 1.844129 0.708489 2.602904 9.243727e-03\n",
+ "2 beta_inter:0_2 3.274206 0.624402 5.243744 1.192093e-07\n",
+ "3 beta_income:0_0 -0.089087 0.018347 -4.855632 1.192093e-06\n",
+ "4 beta_income:0_1 -0.027993 0.003873 -7.228651 0.000000e+00\n",
+ "5 beta_income:0_2 -0.038147 0.004083 -9.342653 0.000000e+00\n",
+ "6 beta_ivt:0_0 0.059510 0.010073 5.908023 0.000000e+00\n",
+ "7 beta_ivt:0_1 -0.006784 0.004433 -1.530130 1.259845e-01\n",
+ "8 beta_ivt:0_2 -0.006460 0.001898 -3.402944 6.666183e-04\n",
+ "9 beta_ivt:0_3 -0.001450 0.001187 -1.221381 2.219417e-01\n",
+ "10 beta_cost:0_0 -0.033339 0.007095 -4.698679 2.622604e-06\n",
+ "11 beta_freq:0_0 0.092530 0.005098 18.151777 0.000000e+00\n",
+ "12 beta_ovt:0_0 -0.043004 0.003225 -13.335551 0.000000e+00"
]
},
"execution_count": null,
@@ -469,14 +469,14 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "100%|██████████| 1/1 [00:01<00:00, 2.00s/it]"
+ "100%|██████████| 1/1 [00:01<00:00, 1.73s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
- "'Ground Truth' Negative LogLikelihood: tf.Tensor(1874.3633, shape=(), dtype=float32)\n"
+ "'Ground Truth' Negative LogLikelihood: tf.Tensor(1874.3427, shape=(), dtype=float32)\n"
]
},
{
@@ -526,11 +526,11 @@
"output_type": "stream",
"text": [
"Purchase probability of each item for the first 5 sessions: tf.Tensor(\n",
- "[[0.1906135 0.00353266 0.4053667 0.4004831 ]\n",
- " [0.34869286 0.00069682 0.36830992 0.28229675]\n",
- " [0.14418365 0.00651285 0.40567666 0.44362238]\n",
- " [0.34869286 0.00069682 0.36830992 0.28229675]\n",
- " [0.34869286 0.00069682 0.36830992 0.28229675]], shape=(5, 4), dtype=float32)\n"
+ "[[0.19061361 0.00353295 0.4053689 0.4004805 ]\n",
+ " [0.3486952 0.00069691 0.36830923 0.28229502]\n",
+ " [0.14418328 0.00651326 0.40567988 0.44361907]\n",
+ " [0.3486952 0.00069691 0.36830923 0.28229502]\n",
+ " [0.3486952 0.00069691 0.36830923 0.28229502]], shape=(5, 4), dtype=float32)\n"
]
}
],
@@ -582,14 +582,14 @@
{
"data": {
"text/plain": [
- "[,\n",
- " ,\n",
- " ,\n",
- " ,\n",
+ "[,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
" ,\n",
- " ]"
+ " ]"
]
},
"execution_count": null,
@@ -611,7 +611,7 @@
{
"data": {
"text/plain": [
- ""
+ ""
]
},
"execution_count": null,
@@ -669,11 +669,11 @@
"text": [
"L-BFGS Opimization finished:\n",
"---------------------------------------------------------------\n",
- "Number of iterations: 170\n",
+ "Number of iterations: 190\n",
"Algorithm converged before reaching max iterations: True\n",
- "[, , , , , ]\n"
+ "[, , , , , ]\n"
]
}
],
@@ -834,16 +834,16 @@
{
"data": {
"text/plain": [
- "[,\n",
- " ,\n",
- " ,\n",
- " ,\n",
- " ,\n",
- " ,\n",
- " ,\n",
- " ,\n",
- " ,\n",
- " 1:0' shape=(1, 1) dtype=float32, numpy=array([[1.413982]], dtype=float32)>]"
+ "[,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " 1:0' shape=(1, 1) dtype=float32, numpy=array([[1.4228866]], dtype=float32)>]"
]
},
"execution_count": null,
@@ -865,7 +865,7 @@
{
"data": {
"text/plain": [
- ""
+ ""
]
},
"execution_count": null,
diff --git a/notebooks/choice_learn_introduction_data.ipynb b/notebooks/choice_learn_introduction_data.ipynb
index 05c0f692..cfb6ca8f 100644
--- a/notebooks/choice_learn_introduction_data.ipynb
+++ b/notebooks/choice_learn_introduction_data.ipynb
@@ -320,9 +320,11 @@
"source": [
"The ChoiceDataset is ready !\n",
"\n",
+ "If your DataFrame is in the wide format, you can use the equivalent method *from_single_wide_df*. An example can be found [here](https://github.com/artefactory/choice-learn-private/blob/main/notebooks/dataset_creation.ipynb) on the SwissMetro dataset: \n",
+ "\n",
"You now have three possibilities to continue discovering the choice-learn package:\n",
"- You can directly go [here]() to the modelling tutorial if you want to understand how a first simple ConditionMNl would be implementd.\n",
- "- You can go [here]() if your dataset is organized differently to see all the different ways to instantiate a ChoiceDataset. In particular it helps if you DataFrame is in the wide format or if it is splitted into several DataFrames.\n",
+ "- You can go [here]() if your dataset is organized differently to see all the different ways to instantiate a ChoiceDataset. In particular it helps if you data is splitted into several DataFrames or if you have another format of data.\n",
"- Or you can continue this current tutorial to better understand the ChoiceDataset machinery and everything there is to know about it.\n",
"\n",
"Whatever your choice, you can also check [here](#ready-to-use-datasets) the list of open source datasets available directly with the package."
diff --git a/notebooks/latent_class_model.ipynb b/notebooks/latent_class_model.ipynb
new file mode 100644
index 00000000..88cd8f10
--- /dev/null
+++ b/notebooks/latent_class_model.ipynb
@@ -0,0 +1,287 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Example of use of Latent Class MNL"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "\n",
+ "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"\"\n",
+ "\n",
+ "import sys\n",
+ "from pathlib import Path\n",
+ "\n",
+ "sys.path.append(\"../\")\n",
+ "\n",
+ "import matplotlib.pyplot as plt\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "\n",
+ "import tensorflow as tf"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's use the Electricity Dataset used in this [tutorial](https://rpubs.com/msarrias1986/335556)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from choice_learn.datasets import load_electricity\n",
+ "\n",
+ "elec_dataset = load_electricity(as_frame=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from choice_learn.models.simple_mnl import SimpleMNL\n",
+ "from choice_learn.models.latent_class_mnl import LatentClassSimpleMNL"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "lc_model = LatentClassSimpleMNL(n_latent_classes=3, fit_method=\"mle\", optimizer=\"lbfgs\", epochs=1000, tolerance=1e-10)\n",
+ "hist = lc_model.fit(elec_dataset, verbose=1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(\"Latent Class Model weights:\")\n",
+ "print(\"Classes Logits:\", lc_model.latent_logits)\n",
+ "for i in range(3):\n",
+ " print(\"\\n\")\n",
+ " print(f\"Model Nb {i}, weights:\", lc_model.models[i].weights)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(\"Negative Log-Likelihood:\")\n",
+ "lc_model.evaluate(elec_dataset) * len(elec_dataset)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Latent Conditional MNL\n",
+ "We used a very simple MNL. Here we simulate the same MNL, by using the Conditional-MNL formulation.\\\n",
+ "Don't hesitate to read the conditional-MNL tutorial to better understand how to use this formulation."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from choice_learn.models.latent_class_mnl import LatentClassConditionalMNL"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "lc_model_2 = LatentClassConditionalMNL(n_latent_classes=3,\n",
+ " fit_method=\"mle\",\n",
+ " optimizer=\"lbfgs\",\n",
+ " epochs=1000,\n",
+ " tolerance=1e-12)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "For each feature, let's add a coefficient that is shared by all items:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "lc_model_2.add_shared_coefficient(coefficient_name=\"pf\",\n",
+ " feature_name=\"pf\",\n",
+ " items_indexes=[0, 1, 2, 3])\n",
+ "lc_model_2.add_shared_coefficient(coefficient_name=\"cl\",\n",
+ " feature_name=\"cl\",\n",
+ " items_indexes=[0, 1, 2, 3])\n",
+ "lc_model_2.add_shared_coefficient(coefficient_name=\"loc\",\n",
+ " feature_name=\"loc\",\n",
+ " items_indexes=[0, 1, 2, 3])\n",
+ "lc_model_2.add_shared_coefficient(coefficient_name=\"wk\",\n",
+ " feature_name=\"wk\",\n",
+ " items_indexes=[0, 1, 2, 3])\n",
+ "lc_model_2.add_shared_coefficient(coefficient_name=\"tod\",\n",
+ " feature_name=\"tod\",\n",
+ " items_indexes=[0, 1, 2, 3])\n",
+ "lc_model_2.add_shared_coefficient(coefficient_name=\"seas\",\n",
+ " feature_name=\"seas\",\n",
+ " items_indexes=[0, 1, 2, 3])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Fit\n",
+ "hist2 = lc_model_2.fit(elec_dataset, verbose=1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(\"Negative Log-Likelihood:\", lc_model_2.evaluate(elec_dataset)*len(elec_dataset))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(\"Latent Class Model weights:\")\n",
+ "print(\"Classes Logits:\", lc_model_2.latent_logits)\n",
+ "for i in range(3):\n",
+ " print(\"\\n\")\n",
+ " print(f\"Model Nb {i}, weights:\", lc_model_2.models[i].weights)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Just like any ChoiceModel you can get the probabilities:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "lc_model.predict_probas(elec_dataset[:4])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "If you want to use more complex formulations of Latent Class models, you can directly use the *BaseLatentClassModel* from *choice_learn.models.base_model*:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from choice_learn.models.base_model import BaseLatentClassModel"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "manual_lc = BaseLatentClassModel(\n",
+ " model_class=SimpleMNL,\n",
+ " model_parameters={\"add_exit_choice\": False},\n",
+ " n_latent_classes=3,\n",
+ " fit_method=\"mle\",\n",
+ " epochs=1000,\n",
+ " optimizer=\"lbfgs\"\n",
+ " )\n",
+ "manual_lc.instantiate(n_items=4,\n",
+ " n_fixed_items_features=0,\n",
+ " n_contexts_features=0,\n",
+ " n_contexts_items_features=6)\n",
+ "manual_hist = manual_lc.fit(elec_dataset, verbose=1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "manual_lc.evaluate(elec_dataset) * len(elec_dataset)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "If you need to go deeper, you can look in *choice_learn/models/latent_class_mnl* to see different implementations that could help you."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.4"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebooks/logistic_regression.ipynb b/notebooks/logistic_regression.ipynb
new file mode 100644
index 00000000..45252ec3
--- /dev/null
+++ b/notebooks/logistic_regression.ipynb
@@ -0,0 +1,184 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Logistic Regression: 3-class Classifier\n",
+ "\n",
+ "The Conditional MNL is a generalization of the multi-class Logistic Regression.\n",
+ "Here, we recreate the scikit-learn tutorial that can be found [here](https://scikit-learn.org/stable/auto_examples/linear_model/plot_iris_logistic.html#sphx-glr-auto-examples-linear-model-plot-iris-logistic-py)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "\n",
+ "# Remove GPU use\n",
+ "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"\"\n",
+ "\n",
+ "import sys\n",
+ "\n",
+ "sys.path.append(\"../\")\n",
+ "\n",
+ "import matplotlib.pyplot as plt\n",
+ "import numpy as np\n",
+ "\n",
+ "from sklearn import datasets\n",
+ "from sklearn.inspection import DecisionBoundaryDisplay\n",
+ "\n",
+ "from choice_learn.models import ConditionalMNL\n",
+ "from choice_learn.data import ChoiceDataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# import some data to play with\n",
+ "iris = datasets.load_iris()\n",
+ "X = iris.data[:, :2] # we only take the first two features.\n",
+ "Y = iris.target"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We need to create a ChoiceDataset object. Features are contexts_features as they are shared by the three outcomes. The class labels are ''choices''."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dataset = ChoiceDataset(contexts_features=(X, ),\n",
+ "contexts_features_names=([\"feat_1\", \"feat_2\"], ),\n",
+ " fixed_items_features=np.ones((3, 3)),\n",
+ " choices=Y)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "For the model parametrization, we specify that we want to learn one weight by outcome for each feature: 'feat_1', 'feat_2' and the intercept. This is done with the keyword \"item-full\"."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "parametrization = {\n",
+ " \"intercept\": \"item-full\",\n",
+ " \"feat_1\": \"item-full\",\n",
+ " \"feat_2\": \"item-full\"\n",
+ "}\n",
+ "\n",
+ "# Let's estimate the weights\n",
+ "model = ConditionalMNL(parameters=parametrization, optimizer=\"lbfgs\")\n",
+ "hist = model.fit(dataset, epochs=100)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's display the resulting model, just as in the sk-learn tutorial."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "feature_1, feature_2 = np.meshgrid(\n",
+ " np.linspace(X[:, 0].min() - 0.5, X[:, 0].max() + 0.5),\n",
+ " np.linspace(X[:, 1].min() - 0.5, X[:, 1].max() + 0.5)\n",
+ ")\n",
+ "grid = np.vstack([feature_1.ravel(), feature_2.ravel()]).T\n",
+ "\n",
+ "grid_dataset = ChoiceDataset(contexts_features=(grid, ),\n",
+ "contexts_features_names=([\"feat_1\", \"feat_2\"], ),\n",
+ " fixed_items_features=np.ones((3, 3)),\n",
+ " choices=np.ones(len(grid), ))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "keep_output": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ "