diff --git a/README.md b/README.md index 005b18f6..da2f585e 100644 --- a/README.md +++ b/README.md @@ -45,6 +45,10 @@ If you are new to choice modelling, you can check this [resource](https://www.pu - Ready-To-Use datasets: - [SwissMetro](./choice_learn/datasets/data/swissmetro.csv.gz) from Bierlaire et al. (2001) [[2]](#citation) - [ModeCanada](./choice_learn/datasets/data/ModeCanada.csv.gz) from Koppelman et al. (1993) [[3]](#citation) + - The Train dataset from Ben Akiva et al. (1993) [5](#citation) + - The Heating & Electricity datasets from Kenneth Train described [here](https://rdrr.io/cran/mlogit/man/Electricity.html) and [here](https://rdrr.io/cran/mlogit/man/Heating.html) + - The TaFeng dataset from [Kaggle](https://www.kaggle.com/datasets/chiranjivdas09/ta-feng-grocery-dataset) + - ### Models - Ready-to-use models: @@ -91,10 +95,12 @@ Choice-Learn requires the following: - Python (>=3.8) - NumPy (>=1.24) - pandas (>=1.5) + For modelling you need: - TensorFlow (>=2.13) -Finally, an optional requirement used for report and LBFG-s use is: -- tensorflow_probability (>=0.20.1) + +Finally, an optional requirement used for report and LBFG-S optimization is: +- TensorFlow Probability (>=0.20.1) ## Usage ```python @@ -151,6 +157,7 @@ A detailed documentation of this project is available [here](https://artefactory [2][The Acceptance of Model Innovation: The Case of Swissmetro](https://www.researchgate.net/publication/37456549_The_acceptance_of_modal_innovation_The_case_of_Swissmetro), Bierlaire, M.; Axhausen, K., W.; Abay, G. (2001)\ [3][Applications and Interpretation of Nested Logit Models of Intercity Mode Choice](https://trid.trb.org/view/385097), Forinash, C., V.; Koppelman, F., S. (1993)\ [4][The Demand for Local Telephone Service: A Fully Discrete Model of Residential Calling Patterns and Service Choices](https://www.jstor.org/stable/2555538), Train K., E.; McFadden, D., L.; Moshe, B. (1987)\ +[5] [Estimation of Travel Choice Models with Randomly Distributed Values of Time](https://ideas.repec.org/p/fth/lavaen/9303.html), Ben-Akiva M; Bolduc D; Bradley M(1993) ### Code and Repositories - [1][RUMnet](https://github.com/antoinedesir/rumnet) diff --git a/choice_learn/data/choice_dataset.py b/choice_learn/data/choice_dataset.py index 56448da1..9984a215 100644 --- a/choice_learn/data/choice_dataset.py +++ b/choice_learn/data/choice_dataset.py @@ -614,7 +614,7 @@ def __len__(self): """ return len(self.choices) - def get_num_items(self): + def get_n_items(self): """Method to access the total number of different items. Returns: @@ -624,7 +624,7 @@ def get_num_items(self): """ return self.base_num_items - def get_num_choices(self): + def get_n_choices(self): """Method to access the total number of different choices. Redundant with __len__ method. @@ -689,7 +689,7 @@ def _contexts_items_features_df_to_np( sess_df.columns = sess_df.loc[items_id_column] if features is not None: contexts_items_features.append(sess_df[items_index].loc[features].T.values) - contexts_items_availabilities.append(np.ones(len(items_index))) + contexts_items_availabilities.append(np.ones(len(items_index)).astype("float32")) else: sess_feats = [] sess_av = [] @@ -806,9 +806,15 @@ def from_single_wide_df( else: contexts_items_availabilities = None - choices = df[choices_column] + choices = df[choices_column].to_numpy() + print("choice", choices) if choice_mode == "items_id": + if items_id is None: + raise ValueError("items_id must be given to use choice_mode 'items_id'") + items_id = np.array(items_id) choices = np.squeeze([np.where(items_id == c)[0] for c in choices]) + if choices.shape[0] == 0: + raise ValueError("No choice found in the items_id list") return ChoiceDataset( fixed_items_features=fixed_items_features, @@ -940,7 +946,7 @@ def summary(self): print("%=====================================================================%") print("%%% Summary of the dataset:") print("%=====================================================================%") - print("Number of items:", self.get_num_items()) + print("Number of items:", self.get_n_items()) print( "Number of choices:", len(self), @@ -1038,7 +1044,9 @@ def get_choices_batch(self, choices_indexes, features=None): ) if self.contexts_items_availabilities is None: - contexts_items_availabilities = np.ones((len(choices_indexes), self.base_num_items)) + contexts_items_availabilities = np.ones( + (len(choices_indexes), self.base_num_items) + ).astype("float32") else: contexts_items_availabilities = self.contexts_items_availabilities[choices_indexes] # .astype(self._return_types[3]) @@ -1179,7 +1187,7 @@ def get_choices_batch(self, choices_indexes, features=None): ) if self.contexts_items_availabilities is None: - contexts_items_availabilities = np.ones((self.base_num_items)) + contexts_items_availabilities = np.ones((self.base_num_items)).astype("float32") else: contexts_items_availabilities = self.contexts_items_availabilities[choices_indexes] @@ -1299,41 +1307,67 @@ def __getitem__(self, choices_indexes): elif isinstance(choices_indexes, slice): return self.__getitem__(list(range(*choices_indexes.indices(len(self.choices))))) - if self.fixed_items_features[0] is None: - fixed_items_features = None - else: + try: + if self.fixed_items_features[0] is None: + fixed_items_features = None + else: + fixed_items_features = self.fixed_items_features + except TypeError: fixed_items_features = self.fixed_items_features - if self.contexts_features[0] is None: + + try: + if self.contexts_features[0] is None: + contexts_features = None + else: + contexts_features = tuple( + self.contexts_features[i][choices_indexes] + for i in range(len(self.contexts_features)) + ) + except TypeError: contexts_features = None - else: - contexts_features = tuple( - self.contexts_features[i][choices_indexes] - for i in range(len(self.contexts_features)) - ) - if self.contexts_items_features[0] is None: + + try: + if self.contexts_items_features[0] is None: + contexts_items_features = None + else: + contexts_items_features = tuple( + self.contexts_items_features[i][choices_indexes] + for i in range(len(self.contexts_items_features)) + ) + except TypeError: contexts_items_features = None - else: - contexts_items_features = tuple( - self.contexts_items_features[i][choices_indexes] - for i in range(len(self.contexts_items_features)) - ) - if self.fixed_items_features_names[0] is None: + + try: + if self.fixed_items_features_names[0] is None: + fixed_items_features_names = None + else: + fixed_items_features_names = self.fixed_items_features_names + except TypeError: fixed_items_features_names = None - else: - fixed_items_features_names = self.fixed_items_features_names - if self.contexts_features_names[0] is None: + try: + if self.contexts_features_names[0] is None: + contexts_features_names = None + else: + contexts_features_names = self.contexts_features_names + except TypeError: contexts_features_names = None - else: - contexts_features_names = self.contexts_features_names - if self.contexts_items_features_names[0] is None: + try: + if self.contexts_items_features_names[0] is None: + contexts_items_features_names = None + else: + contexts_items_features_names = self.contexts_items_features_names + except TypeError: contexts_items_features_names = None - else: - contexts_items_features_names = self.contexts_items_features_names + + try: + contexts_items_availabilities = self.contexts_items_availabilities[choices_indexes] + except TypeError: + contexts_items_availabilities = None return ChoiceDataset( fixed_items_features=fixed_items_features, contexts_features=contexts_features, contexts_items_features=contexts_items_features, - contexts_items_availabilities=self.contexts_items_availabilities[choices_indexes], + contexts_items_availabilities=contexts_items_availabilities, choices=[self.choices[i] for i in choices_indexes], fixed_items_features_names=fixed_items_features_names, contexts_features_names=contexts_features_names, @@ -1391,8 +1425,53 @@ def filter(self, bool_list): Parameters ---------- bool_list : list of boolean - list of booleans of length self.get_num_sessions() to filter sessions. + list of booleans of length self.get_n_contexts() to filter contexts. True to keep, False to discard. """ indexes = [i for i, keep in enumerate(bool_list) if keep] return self[indexes] + + def get_n_fixed_items_features(self): + """Method to access the number of fixed items features. + + Returns: + ------- + int + number of fixed items features + """ + if self.fixed_items_features is not None: + n_features = 0 + for fixed_features in self.fixed_items_features: + n_features += fixed_features.shape[1] + return n_features + return 0 + + def get_n_contexts_features(self): + """Method to access the number of contexts features. + + Returns: + ------- + int + number of fixed items features + """ + if self.contexts_features is not None: + n_features = 0 + for context_features in self.contexts_features: + n_features += context_features.shape[1] + return n_features + return 0 + + def get_n_contexts_items_features(self): + """Method to access the number of context items features. + + Returns: + ------- + int + number of fixed items features + """ + if self.contexts_items_features is not None: + n_features = 0 + for contexts_items_features in self.contexts_items_features: + n_features += contexts_items_features.shape[2] + return n_features + return 0 diff --git a/choice_learn/data/indexer.py b/choice_learn/data/indexer.py index ddba5383..29ed0e2d 100644 --- a/choice_learn/data/indexer.py +++ b/choice_learn/data/indexer.py @@ -295,7 +295,7 @@ def __getitem__(self, choices_indexes): if self.choice_dataset.contexts_items_availabilities is None: contexts_items_availabilities = np.ones( (len(choices_indexes), self.choice_dataset.base_num_items) - ) + ).astype("float32") else: if hasattr(self.choice_dataset.contexts_items_availabilities, "batch"): contexts_items_availabilities = ( @@ -440,7 +440,9 @@ def __getitem__(self, choices_indexes): choice = self.choice_dataset.choices[choices_indexes] if self.choice_dataset.contexts_items_availabilities is None: - contexts_items_availabilities = np.ones((self.choice_dataset.base_num_items)) + contexts_items_availabilities = np.ones( + (self.choice_dataset.base_num_items) + ).astype("float32") else: contexts_items_availabilities = self.choice_dataset.contexts_items_availabilities[ choices_indexes diff --git a/choice_learn/datasets/__init__.py b/choice_learn/datasets/__init__.py index a16bf199..8a7ec3f4 100644 --- a/choice_learn/datasets/__init__.py +++ b/choice_learn/datasets/__init__.py @@ -1,8 +1,5 @@ """Init file for datasets module.""" -from .base import load_modecanada, load_swissmetro +from .base import load_electricity, load_heating, load_modecanada, load_swissmetro -__all__ = [ - "load_modecanada", - "load_swissmetro", -] +__all__ = ["load_modecanada", "load_swissmetro", "load_electricity", "load_heating"] diff --git a/choice_learn/datasets/base.py b/choice_learn/datasets/base.py index 92b65eab..6c977f55 100644 --- a/choice_learn/datasets/base.py +++ b/choice_learn/datasets/base.py @@ -367,3 +367,163 @@ def load_modecanada( choices_column=choice_column, choice_mode="one_zero", ) + + +def load_heating( + as_frame=False, + to_wide=False, +): + """Load and return the Heating dataset from Kenneth Train. + + Parameters + ---------- + as_frame : bool, optional + Whether to return the dataset as pd.DataFrame. If not, returned as ChoiceDataset, + by default False. + return_desc : bool, optional + Whether to return the description, by default False. + to_wide : bool, optional + Whether to return the dataset in wide format, + by default False (an thus retuned in long format). + + Returns: + -------- + ChoiceDataset + Loaded Heating dataset + """ + _ = to_wide + data_file_name = "heating_data.csv.gz" + names, data = load_gzip(data_file_name) + + heating_df = pd.read_csv(resources.files(DATA_MODULE) / "heating_data.csv.gz") + + if as_frame: + return heating_df + + contexts_features = ["income", "agehed", "rooms", "region"] + choice = ["depvar"] + contexts_items_features = ["ic.", "oc."] + items = ["gc", "gr", "ec", "er", "hp"] + + choices = np.array([items.index(val) for val in heating_df[choice].to_numpy().ravel()]) + contexts = heating_df[contexts_features].to_numpy() + contexts_items = np.stack( + [ + heating_df[[feat + item for feat in contexts_items_features]].to_numpy() + for item in items + ], + axis=1, + ) + return ChoiceDataset( + contexts_features=contexts, contexts_items_features=contexts_items, choices=choices + ) + + +def load_electricity( + as_frame=False, + to_wide=False, +): + """Load and return the Electricity dataset from Kenneth Train. + + Parameters + ---------- + as_frame : bool, optional + Whether to return the dataset as pd.DataFrame. If not, returned as ChoiceDataset, + by default False. + to_wide : bool, optional + Whether to return the dataset in wide format, + by default False (an thus retuned in long format). + + Returns: + -------- + ChoiceDataset + Loaded Electricity dataset + """ + _ = to_wide + data_file_name = "electricity.csv.gz" + names, data = load_gzip(data_file_name) + + elec_df = pd.read_csv(resources.files(DATA_MODULE) / data_file_name) + elec_df.choice = elec_df.choice.astype(int) + elec_df[["pf", "cl", "loc", "wk", "tod", "seas"]] = elec_df[ + ["pf", "cl", "loc", "wk", "tod", "seas"] + ].astype(float) + + if as_frame: + return elec_df + + return ChoiceDataset.from_single_long_df( + df=elec_df, + contexts_items_features_columns=["pf", "cl", "loc", "wk", "tod", "seas"], + items_id_column="alt", + contexts_id_column="chid", + choice_mode="one_zero", + ) + + +def load_train( + as_frame=False, + to_wide=False, + return_desc=False, +): + """Load and return the Train dataset from Koppleman et al. (1993). + + Parameters + ---------- + as_frame : bool, optional + Whether to return the dataset as pd.DataFrame. If not, returned as ChoiceDataset, + by default False. + to_wide : bool, optional + Whether to return the dataset in wide format, + by default False (an thus retuned in long format). + return_desc : bool, optional + Whether to return the description, by default False. + + Returns: + -------- + ChoiceDataset + Loaded Electricity dataset + """ + desc = "A sample of 235 Dutchindividuals facing 2929 choice situations." + desc += """Ben-Akiva M, Bolduc D, Bradley M(1993). + “Estimation of Travel Choice Models with Randomly Distributed Values of Time. + ”Papers 9303, Laval-Recherche en Energie. https://ideas.repec.org/p/fth/lavaen/9303.html.""" + _ = to_wide + data_file_name = "train_data.csv.gz" + names, data = load_gzip(data_file_name) + + train_df = pd.read_csv(resources.files(DATA_MODULE) / data_file_name) + + if return_desc: + return desc + + if as_frame: + return train_df + train_df["choice"] = train_df.apply(lambda row: row.choice[-1], axis=1) + train_df = train_df.rename( + columns={ + "price1": "1_price", + "time1": "1_time", + "change1": "1_change", + "comfort1": "1_comfort", + } + ) + train_df = train_df.rename( + columns={ + "price2": "2_price", + "time2": "2_time", + "change2": "2_change", + "comfort2": "2_comfort", + } + ) + print(train_df.head()) + return ChoiceDataset.from_single_wide_df( + df=train_df, + items_id=["1", "2"], + fixed_items_suffixes=None, + contexts_features_columns=["id"], + contexts_items_features_suffixes=["price", "time", "change", "comfort"], + contexts_items_availabilities_suffix=None, + choices_column="choice", + choice_mode="items_id", + ) diff --git a/choice_learn/datasets/data/electricity.csv.gz b/choice_learn/datasets/data/electricity.csv.gz new file mode 100644 index 00000000..da0a7ee1 Binary files /dev/null and b/choice_learn/datasets/data/electricity.csv.gz differ diff --git a/choice_learn/datasets/data/heating_data.csv.gz b/choice_learn/datasets/data/heating_data.csv.gz new file mode 100644 index 00000000..5fd53831 Binary files /dev/null and b/choice_learn/datasets/data/heating_data.csv.gz differ diff --git a/choice_learn/datasets/data/train_data.csv.gz b/choice_learn/datasets/data/train_data.csv.gz new file mode 100644 index 00000000..540f7d32 Binary files /dev/null and b/choice_learn/datasets/data/train_data.csv.gz differ diff --git a/choice_learn/models/base_model.py b/choice_learn/models/base_model.py index 4808324e..c25162cc 100644 --- a/choice_learn/models/base_model.py +++ b/choice_learn/models/base_model.py @@ -20,6 +20,7 @@ def __init__( label_smoothing=0.0, normalize_non_buy=False, optimizer="Adam", + tolerance=1e-8, callbacks=None, lr=0.001, epochs=1, @@ -38,6 +39,15 @@ def __init__( normalization,by default True callbacks : list of tf.kera callbacks, optional List of callbacks to add to model.fit, by default None and only add History + optimizer : str, optional + Name of the tf.keras.optimizers to be used, by default "Adam" + tolerance : float, optional + Tolerance for the L-BFGS optimizer if applied, by default 1e-8 + lr: float, optional + Learning rate for the optimizer if applied, by default 0.001 + epochs: int, optional + (Max) Number of epochs to train the model, by default 1 + batch_size: int, optional """ self.is_fitted = False self.normalize_non_buy = normalize_non_buy @@ -69,6 +79,7 @@ def __init__( self.epochs = epochs self.batch_size = batch_size + self.tolerance = tolerance @abstractmethod def compute_batch_utility( @@ -346,7 +357,7 @@ def fit( contexts_items_batch, availabilities_batch, choices_batch, - )[0] + )[0]["optimized_loss"] ) val_logs["val_loss"].append(test_losses[-1]) temps_logs = {k: tf.reduce_mean(v) for k, v in val_logs.items()} @@ -432,11 +443,18 @@ def batch_predict( # Compute loss from probabilities & actual choices # batch_loss = self.loss(probabilities, c_batch, sample_weight=sample_weight) - batch_loss = self.loss( - y_pred=probabilities, - y_true=tf.one_hot(choices, depth=probabilities.shape[1]), - sample_weight=sample_weight, - ) + batch_loss = { + "optimized_loss": self.loss( + y_pred=probabilities, + y_true=tf.one_hot(choices, depth=probabilities.shape[1]), + sample_weight=sample_weight, + ), + "NegativeLogLikelihood": tf.keras.losses.CategoricalCrossentropy()( + y_pred=probabilities, + y_true=tf.one_hot(choices, depth=probabilities.shape[1]), + sample_weight=sample_weight, + ), + } return batch_loss, probabilities def save_model(self, path): @@ -524,7 +542,7 @@ def predict_probas(self, choice_dataset, batch_size=-1): return tf.concat(stacked_probabilities, axis=0) - def evaluate(self, choice_dataset, batch_size=-1): + def evaluate(self, choice_dataset, sample_weight=None, batch_size=-1, mode="eval"): """Evaluates the model for each context and each product of a ChoiceDataset. Predicts the probabilities according to the model and computes the Negative-Log-Likelihood @@ -554,8 +572,12 @@ def evaluate(self, choice_dataset, batch_size=-1): contexts_items_features=contexts_items_features, contexts_items_availabilities=contexts_items_availabilities, choices=choices, + sample_weight=sample_weight, ) - batch_losses.append(loss) + if mode == "eval": + batch_losses.append(loss["NegativeLogLikelihood"]) + elif mode == "optim": + batch_losses.append(loss["optimized_loss"]) if batch_size != -1: last_batch_size = contexts_items_availabilities.shape[0] coefficients = tf.concat( @@ -567,13 +589,15 @@ def evaluate(self, choice_dataset, batch_size=-1): batch_loss = tf.reduce_mean(batch_losses) return batch_loss - def _lbfgs_train_step(self, dataset): + def _lbfgs_train_step(self, dataset, sample_weight=None): """A factory to create a function required by tfp.optimizer.lbfgs_minimize. Parameters ---------- dataset: ChoiceDataset Dataset on which to estimate the paramters. + sample_weight: np.ndarray, optional + Sample weights to apply, by default None Returns: -------- @@ -636,7 +660,9 @@ def f(params_1d): # update the parameters in the model assign_new_model_parameters(params_1d) # calculate the loss - loss_value = self.evaluate(dataset, batch_size=-1) + loss_value = self.evaluate( + dataset, sample_weight=sample_weight, batch_size=-1, mode="optim" + ) # calculate gradients and convert to 1D tf.Tensor grads = tape.gradient(loss_value, self.weights) @@ -659,7 +685,7 @@ def f(params_1d): f.history = [] return f - def _fit_with_lbfgs(self, dataset, epochs=None, tolerance=1e-8): + def _fit_with_lbfgs(self, dataset, epochs=None, sample_weight=None, verbose=0): """Fit function for L-BFGS optimizer. Replaces the .fit method when the optimizer is set to L-BFGS. @@ -668,10 +694,12 @@ def _fit_with_lbfgs(self, dataset, epochs=None, tolerance=1e-8): ---------- dataset : ChoiceDataset Dataset to be used for coefficients estimations - n_epochs : int + epochs : int Maximum number of epochs allowed to reach minimum - tolerance : float, optional - Maximum tolerance accepted, by default 1e-8 + sample_weight : np.ndarray, optional + Sample weights to apply, by default None + verbose : int, optional + print level, for debugging, by default 0 Returns: -------- @@ -684,7 +712,7 @@ def _fit_with_lbfgs(self, dataset, epochs=None, tolerance=1e-8): if epochs is None: epochs = self.epochs - func = self._lbfgs_train_step(dataset) + func = self._lbfgs_train_step(dataset, sample_weight=sample_weight) # convert initial model parameters to a 1D tf.Tensor init_params = tf.dynamic_stitch(func.idx, self.weights) @@ -694,7 +722,7 @@ def _fit_with_lbfgs(self, dataset, epochs=None, tolerance=1e-8): value_and_gradients_function=func, initial_position=init_params, max_iterations=epochs, - tolerance=tolerance, + tolerance=self.tolerance, f_absolute_tolerance=-1, f_relative_tolerance=-1, ) @@ -702,29 +730,97 @@ def _fit_with_lbfgs(self, dataset, epochs=None, tolerance=1e-8): # after training, the final optimized parameters are still in results.position # so we have to manually put them back to the model func.assign_new_model_parameters(results.position) - print("L-BFGS Opimization finished:") - print("---------------------------------------------------------------") - print("Number of iterations:", results[2].numpy()) - print("Algorithm converged before reaching max iterations:", results[0].numpy()) + if verbose > 0: + print("L-BFGS Opimization finished:") + print("---------------------------------------------------------------") + print("Number of iterations:", results[2].numpy()) + print("Algorithm converged before reaching max iterations:", results[0].numpy()) return func.history -class RandomChoiceModel(ChoiceModel): - """Dumb model that randomly attributes utilities to products.""" +class BaseLatentClassModel(object): # TODO: should inherit ChoiceModel ? + """Base Class to work with Mixtures of models.""" - def __init__(self, **kwargs): - """Initialization of the model.""" - super().__init__(**kwargs) + def __init__( + self, + n_latent_classes, + model_class, + model_parameters, + fit_method, + epochs, + optimizer=None, + add_exit_choice=False, + tolerance=1e-6, + lr=0.001, + ): + """Instantiation of the model mixture. - def compute_batch_utility( + Parameters + ---------- + n_latent_classes : int + Number of latent classes + model_class : BaseModel + class of models to get a mixture of + model_parameters : dict + hyper-parameters of the models + fit_method : str + Method to estimate the parameters: "EM", "MLE". + epochs : int + Number of epochs to train the model. + optimizer: str, optional + Name of the tf.keras.optimizers to be used if one is used, by default None + add_exit_choice : bool, optional + Whether or not to add an exit choice, by default False + tolerance: float, optional + Tolerance for the L-BFGS optimizer if applied, by default 1e-6 + lr: float, optional + Learning rate for the optimizer if applied, by default 0.001 + """ + self.n_latent_classes = n_latent_classes + if isinstance(model_parameters, list): + if not len(model_parameters) == n_latent_classes: + raise ValueError( + """If you specify a list of hyper-parameters, it means that you want to use\ + different hyper-parameters for each latent class. In this case, the length\ + of the list must be equal to the number of latent classes.""" + ) + self.model_parameters = model_parameters + else: + self.model_parameters = [model_parameters] * n_latent_classes + self.model_class = model_class + self.fit_method = fit_method + + self.epochs = epochs + self.add_exit_choice = add_exit_choice + self.tolerance = tolerance + self.optimizer = optimizer + self.lr = lr + + self.loss = tf_ops.CustomCategoricalCrossEntropy(from_logits=False, label_smoothing=0) + self.instantiated = False + + def instantiate(self, **kwargs): + """Instantiation.""" + init_logit = tf.Variable( + tf.random_normal_initializer(0.0, 0.02, seed=42)(shape=(self.n_latent_classes - 1,)), + name="Latent-Logits", + ) + self.latent_logits = init_logit + self.models = [self.model_class(**mp) for mp in self.model_parameters] + for model in self.models: + model.instantiate(**kwargs) + + # @tf.function + def batch_predict( self, fixed_items_features, contexts_features, contexts_items_features, contexts_items_availabilities, choices, + sample_weight=None, ): - """Computes the random utility for each product of each context. + """Function that represents one prediction (Probas + Loss) for one batch of a ChoiceDataset. Parameters ---------- @@ -744,42 +840,58 @@ def compute_batch_utility( choices_batch : np.ndarray Choices Shape must be (n_contexts, ) + sample_weight : np.ndarray, optional + List samples weights to apply during the gradient descent to the batch elements, + by default None Returns: -------- - tf.Tensor - (n_contexts, n_items) matrix of random utilities + tf.Tensor (1, ) + Value of NegativeLogLikelihood loss for the batch + tf.Tensor (batch_size, n_items) + Probabilities for each product to be chosen for each context """ - # In order to avoid unused arguments warnings - _ = fixed_items_features, contexts_features, contexts_items_availabilities, choices - return np.squeeze( - np.random.uniform(shape=(contexts_items_features.shape), minval=0, maxval=1) + # Compute utilities from features + utilities = self.compute_batch_utility( + fixed_items_features, + contexts_features, + contexts_items_features, + contexts_items_availabilities, + choices, ) - def fit(**kwargs): - """Make sure that nothing happens during .fit.""" - _ = kwargs - return {} - - -class DistribMimickingModel(ChoiceModel): - """Dumb class model that mimicks the probabilities. - - It stores the encountered in the train datasets and always returns them - """ - - def __init__(self, **kwargs): - """Initialization of the model.""" - super().__init__(**kwargs) - self.weights = [] + latent_probabilities = tf.concat( + [[tf.constant(1.0)], tf.math.exp(self.latent_logits)], axis=0 + ) + latent_probabilities = latent_probabilities / tf.reduce_sum(latent_probabilities) + # Compute probabilities from utilities & availabilties + probabilities = [] + for i, class_utilities in enumerate(utilities): + class_probabilities = tf_ops.softmax_with_availabilities( + contexts_items_logits=class_utilities, + contexts_items_availabilities=contexts_items_availabilities, + normalize_exit=self.add_exit_choice, + axis=-1, + ) + probabilities.append(class_probabilities * latent_probabilities[i]) + # Summing over the latent classes + probabilities = tf.reduce_sum(probabilities, axis=0) - def fit(self, choice_dataset, **kwargs): - """Computes the choice frequency of each product and defines it as choice probabilities.""" - _ = kwargs - choices = choice_dataset.choices - for i in range(choice_dataset.get_num_items()): - self.weights.append(tf.reduce_sum(tf.cast(choices == i, tf.float32))) - self.weights = tf.stack(self.weights) / len(choices) + # Compute loss from probabilities & actual choices + # batch_loss = self.loss(probabilities, c_batch, sample_weight=sample_weight) + batch_loss = { + "optimized_loss": self.loss( + y_pred=probabilities, + y_true=tf.one_hot(choices, depth=probabilities.shape[1]), + sample_weight=sample_weight, + ), + "NegativeLogLikelihood": tf.keras.losses.CategoricalCrossentropy()( + y_pred=probabilities, + y_true=tf.one_hot(choices, depth=probabilities.shape[1]), + sample_weight=sample_weight, + ), + } + return batch_loss, probabilities def compute_batch_utility( self, @@ -789,7 +901,9 @@ def compute_batch_utility( contexts_items_availabilities, choices, ): - """Returns utility that is fixed. U = log(P). + """Latent class computation of utility. + + It computes the utility for each of the latent models and stores them in a list. Parameters ---------- @@ -810,19 +924,425 @@ def compute_batch_utility( Choices Shape must be (n_contexts, ) + Returns: + -------- + list of np.ndarray + List of: + Utility of each product for each context. + Shape must be (n_contexts, n_items) + for each of the latent models. + """ + utilities = [] + # Iterates over latent models + for model in self.models: + model_utilities = model.compute_batch_utility( + fixed_items_features=fixed_items_features, + contexts_features=contexts_features, + contexts_items_features=contexts_items_features, + contexts_items_availabilities=contexts_items_availabilities, + choices=choices, + ) + utilities.append(model_utilities) + return utilities + + def fit(self, dataset, sample_weight=None, verbose=0): + """Fit the model on a ChoiceDataset. + + Parameters + ---------- + dataset : ChoiceDataset + Dataset to be used for coefficients estimations + sample_weight : np.ndarray, optional + sample weights to apply, by default None + verbose : int, optional + print level, for debugging, by default 0 + + Returns: + -------- + dict + Fit history + """ + if self.fit_method.lower() == "em": + self.minf = np.log(1e-3) + print("Expectation-Maximization estimation algorithm not well implemented yet.") + return self._em_fit(dataset=dataset, sample_weight=sample_weight, verbose=verbose) + + if self.fit_method.lower() == "mle": + if self.optimizer.lower() == "lbfgs" or self.optimizer.lower() == "l-bfgs": + return self._fit_with_lbfgs( + dataset=dataset, sample_weight=sample_weight, verbose=verbose + ) + + return self._fit_normal(dataset=dataset, sample_weight=sample_weight, verbose=verbose) + + raise ValueError(f"Fit method not implemented: {self.fit_method}") + + def evaluate(self, choice_dataset, sample_weight=None, batch_size=-1, mode="eval"): + """Evaluates the model for each context and each product of a ChoiceDataset. + + Predicts the probabilities according to the model and computes the Negative-Log-Likelihood + loss from the actual choices. + + Parameters + ---------- + choice_dataset : ChoiceDataset + Dataset on which to apply to prediction + Returns: -------- np.ndarray (n_contexts, n_items) - Utilities + Choice probabilties for each context and each product + """ + batch_losses = [] + for ( + fixed_items_features, + contexts_features, + contexts_items_features, + contexts_items_availabilities, + choices, + ) in choice_dataset.iter_batch(batch_size=batch_size): + loss, _ = self.batch_predict( + fixed_items_features=fixed_items_features, + contexts_features=contexts_features, + contexts_items_features=contexts_items_features, + contexts_items_availabilities=contexts_items_availabilities, + choices=choices, + sample_weight=sample_weight, + ) + if mode == "eval": + batch_losses.append(loss["NegativeLogLikelihood"]) + elif mode == "optim": + batch_losses.append(loss["optimized_loss"]) + if batch_size != -1: + last_batch_size = contexts_items_availabilities.shape[0] + coefficients = tf.concat( + [tf.ones(len(batch_losses) - 1) * batch_size, [last_batch_size]], axis=0 + ) + batch_losses = tf.multiply(batch_losses, coefficients) + batch_loss = tf.reduce_sum(batch_losses) / len(choice_dataset) + else: + batch_loss = tf.reduce_mean(batch_losses) + return batch_loss + + def _lbfgs_train_step(self, dataset, sample_weight=None): + """A factory to create a function required by tfp.optimizer.lbfgs_minimize. + + Parameters + ---------- + dataset: ChoiceDataset + Dataset on which to estimate the paramters. + sample_weight: np.ndarray, optional + Sample weights to apply, by default None + + Returns: + -------- + function + with the signature: + loss_value, gradients = f(model_parameters). + """ + # obtain the shapes of all trainable parameters in the model + weights = [] + w_to_model = [] + w_to_model_indexes = [] + for i, model in enumerate(self.models): + for j, w in enumerate(model.weights): + weights.append(w) + w_to_model.append(i) + w_to_model_indexes.append(j) + weights.append(self.latent_logits) + w_to_model.append(-1) + w_to_model_indexes.append(-1) + shapes = tf.shape_n(weights) + n_tensors = len(shapes) + + # we'll use tf.dynamic_stitch and tf.dynamic_partition later, so we need to + # prepare required information first + count = 0 + idx = [] # stitch indices + part = [] # partition indices + + for i, shape in enumerate(shapes): + n = np.product(shape) + idx.append(tf.reshape(tf.range(count, count + n, dtype=tf.int32), shape)) + part.extend([i] * n) + count += n + + part = tf.constant(part) + + @tf.function + def assign_new_model_parameters(params_1d): + """A function updating the model's parameters with a 1D tf.Tensor. - Raises: - ------- - ValueError - If the model has not been fitted cannot evaluate the utility + Pararmeters + ----------- + params_1d: tf.Tensor + a 1D tf.Tensor representing the model's trainable parameters. + """ + params = tf.dynamic_partition(params_1d, part, n_tensors) + for i, (shape, param) in enumerate(zip(shapes, params)): + if w_to_model[i] != -1: + self.models[w_to_model[i]].weights[w_to_model_indexes[i]].assign( + tf.reshape(param, shape) + ) + else: + self.latent_logits.assign(tf.reshape(param, shape)) + + # now create a function that will be returned by this factory + @tf.function + def f(params_1d): + """A function that can be used by tfp.optimizer.lbfgs_minimize. + + This function is created by function_factory. + + Parameters + ---------- + params_1d: tf.Tensor + a 1D tf.Tensor. + + Returns: + -------- + tf.Tensor + A scalar loss and the gradients w.r.t. the `params_1d`. + tf.Tensor + A 1D tf.Tensor representing the gradients w.r.t. the `params_1d`. + """ + # use GradientTape so that we can calculate the gradient of loss w.r.t. parameters + with tf.GradientTape() as tape: + # update the parameters in the model + assign_new_model_parameters(params_1d) + # calculate the loss + loss_value = self.evaluate( + dataset, sample_weight=sample_weight, batch_size=-1, mode="optim" + ) + # calculate gradients and convert to 1D tf.Tensor + grads = tape.gradient(loss_value, weights) + grads = tf.dynamic_stitch(idx, grads) + + # print out iteration & loss + f.iter.assign_add(1) + + # store loss value so we can retrieve later + tf.py_function(f.history.append, inp=[loss_value], Tout=[]) + + return loss_value, grads + + # store these information as members so we can use them outside the scope + f.iter = tf.Variable(0) + f.idx = idx + f.part = part + f.shapes = shapes + f.assign_new_model_parameters = assign_new_model_parameters + f.history = [] + return f + + def _fit_with_lbfgs(self, dataset, epochs=None, sample_weight=None, verbose=0): + """Fit function for L-BFGS optimizer. + + Replaces the .fit method when the optimizer is set to L-BFGS. + + Parameters + ---------- + dataset : ChoiceDataset + Dataset to be used for coefficients estimations + epochs : int + Maximum number of epochs allowed to reach minimum + sample_weight : np.ndarray, optional + Sample weights to apply, by default None + verbose : int, optional + print level, for debugging, by default 0 + + Returns: + -------- + dict + Fit history + """ + # Only import tensorflow_probability if LBFGS optimizer is used, avoid unnecessary + # dependency + import tensorflow_probability as tfp + + if epochs is None: + epochs = self.epochs + func = self._lbfgs_train_step(dataset, sample_weight=sample_weight) + + # convert initial model parameters to a 1D tf.Tensor + init = [] + for model in self.models: + for w in model.weights: + init.append(w) + init.append(self.latent_logits) + init_params = tf.dynamic_stitch(func.idx, init) + + # train the model with L-BFGS solver + results = tfp.optimizer.lbfgs_minimize( + value_and_gradients_function=func, + initial_position=init_params, + max_iterations=epochs, + tolerance=-1, + f_absolute_tolerance=self.tolerance, + f_relative_tolerance=-1, + x_tolerance=-1, + ) + + # after training, the final optimized parameters are still in results.position + # so we have to manually put them back to the model + func.assign_new_model_parameters(results.position) + if verbose > 0: + print("L-BFGS Opimization finished:") + print("---------------------------------------------------------------") + print("Number of iterations:", results[2].numpy()) + print("Algorithm converged before reaching max iterations:", results[0].numpy()) + return func.history + + def _gd_train_step(self, dataset, sample_weight=None): + pass + + def _nothing(self, inputs): + """_summary_. + + Parameters + ---------- + inputs : _type_ + _description_ + + Returns: + -------- + _type_ + _description_ + """ + latent_probas = tf.clip_by_value( + self.latent_logits - tf.reduce_max(self.latent_logits), self.minf, 0 + ) + latent_probas = tf.math.exp(latent_probas) + # latent_probas = tf.math.abs(self.logit_latent_probas) # alternative implementation + latent_probas = latent_probas / tf.reduce_sum(latent_probas) + proba_list = [] + avail = inputs[4] + for q in range(self.n_latent_classes): + combined = self.models[q].compute_batch_utility(*inputs) + combined = tf.clip_by_value( + combined - tf.reduce_max(combined, axis=1, keepdims=True), self.minf, 0 + ) + combined = tf.keras.layers.Activation(activation=tf.nn.softmax)(combined) + # combined = tf.keras.layers.Softmax()(combined) + combined = combined * avail + combined = latent_probas[q] * tf.math.divide( + combined, tf.reduce_sum(combined, axis=1, keepdims=True) + ) + combined = tf.expand_dims(combined, -1) + proba_list.append(combined) + # print(combined.get_shape()) # it is useful to print the shape of tensors for debugging + + proba_final = tf.keras.layers.Concatenate(axis=2)(proba_list) + return tf.math.reduce_sum(proba_final, axis=2, keepdims=False) + + def _expectation(self, dataset): + predicted_probas = [model.predict_probas(dataset) for model in self.models] + if np.sum(np.isnan(predicted_probas)) > 0: + print("Nan in probas") + predicted_probas = [ + latent + * tf.gather_nd( + params=proba, + indices=tf.stack([tf.range(0, len(dataset), 1), dataset.choices], axis=1), + ) + for latent, proba in zip(self.latent_logits, predicted_probas) + ] + + # E-step + ###### FILL THE CODE BELOW TO ESTIMATE DETERMINE THE WEIGHTS (weights = xxx) + predicted_probas = np.stack(predicted_probas, axis=1) + 1e-10 + loss = np.sum(np.log(np.sum(predicted_probas, axis=1))) + + return predicted_probas / np.sum(predicted_probas, axis=1, keepdims=True), loss + + def _maximization(self, dataset, verbose=0): + """_summary_. + + Parameters + ---------- + dataset : _type_ + _description_ + verbose : int, optional + print level, for debugging, by default 0 + + Returns: + -------- + _type_ + _description_ + """ + self.models = [self.model_class(**mp) for mp in self.model_parameters] + # M-step: MNL estimation + for q in range(self.n_latent_classes): + self.models[q].fit(dataset, sample_weight=self.weights[:, q], verbose=verbose) + + # M-step: latent probability estimation + latent_probas = np.sum(self.weights, axis=0) + + return latent_probas / np.sum(latent_probas) + + def _em_fit(self, dataset, verbose=0): + """Fit with Expectation-Maximization Algorithm. + + Parameters + ---------- + dataset: ChoiceDataset + Dataset to be used for coefficients estimations + verbose : int, optional + print level, for debugging, by default 0 + + Returns: + -------- + list + List of logits for each latent class + list + List of losses at each epoch + """ + hist_logits = [] + hist_loss = [] + # Initialization + for model in self.models: + # model.instantiate() + model.fit(dataset, sample_weight=np.random.rand(len(dataset)), verbose=verbose) + for i in tqdm.trange(self.epochs): + self.weights, loss = self._expectation(dataset) + self.latent_logits = self._maximization(dataset, verbose=verbose) + hist_logits.append(self.latent_logits) + hist_loss.append(loss) + if np.sum(np.isnan(self.latent_logits)) > 0: + print("Nan in logits") + break + return hist_logits, hist_loss + + def predict_probas(self, choice_dataset, batch_size=-1): + """Predicts the choice probabilities for each context and each product of a ChoiceDataset. + + Parameters + ---------- + choice_dataset : ChoiceDataset + Dataset on which to apply to prediction + batch_size : int, optional + Batch size to use for the prediction, by default -1 + + Returns: + -------- + np.ndarray (n_contexts, n_items) + Choice probabilties for each context and each product """ - # In order to avoid unused arguments warnings - _ = fixed_items_features, contexts_features, contexts_items_availabilities - _ = contexts_items_features - if self.weights is None: - raise ValueError("Model not fitted") - return np.stack([np.log(self.weights.numpy())] * len(choices), axis=0) + stacked_probabilities = [] + for ( + fixed_items_features, + contexts_features, + contexts_items_features, + contexts_items_availabilities, + choices, + ) in choice_dataset.iter_batch(batch_size=batch_size): + _, probabilities = self.batch_predict( + fixed_items_features=fixed_items_features, + contexts_features=contexts_features, + contexts_items_features=contexts_items_features, + contexts_items_availabilities=contexts_items_availabilities, + choices=choices, + ) + stacked_probabilities.append(probabilities) + + return tf.concat(stacked_probabilities, axis=0) diff --git a/choice_learn/models/baseline_models.py b/choice_learn/models/baseline_models.py new file mode 100644 index 00000000..4a93ebbf --- /dev/null +++ b/choice_learn/models/baseline_models.py @@ -0,0 +1,124 @@ +"""Models to be used as baselines for choice modeling. Nothing smart here.""" +import numpy as np +import tensorflow as tf + +from .base_model import ChoiceModel + + +class RandomChoiceModel(ChoiceModel): + """Dumb model that randomly attributes utilities to products.""" + + def __init__(self, **kwargs): + """Initialization of the model.""" + super().__init__(**kwargs) + + def compute_batch_utility( + self, + fixed_items_features, + contexts_features, + contexts_items_features, + contexts_items_availabilities, + choices, + ): + """Computes the random utility for each product of each context. + + Parameters + ---------- + fixed_items_features : tuple of np.ndarray + Fixed-Item-Features: formatting from ChoiceDataset: a matrix representing the products + constant/fixed features. + Shape must be (n_items, n_items_features) + contexts_features : tuple of np.ndarray (contexts_features) + a batch of contexts features + Shape must be (n_contexts, n_contexts_features) + contexts_items_features : tuple of np.ndarray (contexts_items_features) + a batch of contexts items features + Shape must be (n_contexts, n_contexts_items_features) + contexts_items_availabilities : np.ndarray + A batch of contexts items availabilities + Shape must be (n_contexts, n_items) + choices_batch : np.ndarray + Choices + Shape must be (n_contexts, ) + + Returns: + -------- + tf.Tensor + (n_contexts, n_items) matrix of random utilities + """ + # In order to avoid unused arguments warnings + _ = fixed_items_features, contexts_features, contexts_items_availabilities, choices + return np.squeeze( + np.random.uniform(shape=(contexts_items_features.shape), minval=0, maxval=1) + ) + + def fit(**kwargs): + """Make sure that nothing happens during .fit.""" + _ = kwargs + return {} + + +class DistribMimickingModel(ChoiceModel): + """Dumb class model that mimicks the probabilities. + + It stores the encountered in the train datasets and always returns them + """ + + def __init__(self, **kwargs): + """Initialization of the model.""" + super().__init__(**kwargs) + self.weights = [] + + def fit(self, choice_dataset, **kwargs): + """Computes the choice frequency of each product and defines it as choice probabilities.""" + _ = kwargs + choices = choice_dataset.choices + for i in range(choice_dataset.get_num_items()): + self.weights.append(tf.reduce_sum(tf.cast(choices == i, tf.float32))) + self.weights = tf.stack(self.weights) / len(choices) + + def compute_batch_utility( + self, + fixed_items_features, + contexts_features, + contexts_items_features, + contexts_items_availabilities, + choices, + ): + """Returns utility that is fixed. U = log(P). + + Parameters + ---------- + fixed_items_features : tuple of np.ndarray + Fixed-Item-Features: formatting from ChoiceDataset: a matrix representing the products + constant/fixed features. + Shape must be (n_items, n_items_features) + contexts_features : tuple of np.ndarray (contexts_features) + a batch of contexts features + Shape must be (n_contexts, n_contexts_features) + contexts_items_features : tuple of np.ndarray (contexts_items_features) + a batch of contexts items features + Shape must be (n_contexts, n_contexts_items_features) + contexts_items_availabilities : np.ndarray + A batch of contexts items availabilities + Shape must be (n_contexts, n_items) + choices_batch : np.ndarray + Choices + Shape must be (n_contexts, ) + + Returns: + -------- + np.ndarray (n_contexts, n_items) + Utilities + + Raises: + ------- + ValueError + If the model has not been fitted cannot evaluate the utility + """ + # In order to avoid unused arguments warnings + _ = fixed_items_features, contexts_features, contexts_items_availabilities + _ = contexts_items_features + if self.weights is None: + raise ValueError("Model not fitted") + return np.stack([np.log(self.weights.numpy())] * len(choices), axis=0) diff --git a/choice_learn/models/conditional_mnl.py b/choice_learn/models/conditional_mnl.py index 5a95b60c..27f8f54d 100644 --- a/choice_learn/models/conditional_mnl.py +++ b/choice_learn/models/conditional_mnl.py @@ -324,6 +324,7 @@ def instantiate_from_specifications(self): ## Fill items_indexes here # Better organize feat_to_weight and specifications + self.weights = weights return weights def _store_dataset_features_names(self, dataset): @@ -629,16 +630,18 @@ def instantiate( """ # Possibility to stack weights to be faster ???? if items_features_names is None: - items_features_names = [] + items_features_names = [()] if contexts_features_names is None: - contexts_features_names = [] + contexts_features_names = [()] if contexts_items_features_names is None: - contexts_items_features_names = [] + contexts_items_features_names = [()] weights = [] weights_count = 0 self._items_features_names = [] for feat_tuple in items_features_names: tuple_names = [] + if feat_tuple is None: + feat_tuple = () for feat in feat_tuple: if feat in self.params.keys(): if self.params[feat] == "constant": @@ -671,6 +674,8 @@ def instantiate( self._contexts_features_names = [] for feat_tuple in contexts_features_names: + if feat_tuple is None: + feat_tuple = () tuple_names = [] for feat in feat_tuple: if feat in self.params.keys(): @@ -706,6 +711,8 @@ def instantiate( self._contexts_items_features_names = [] for feat_tuple in contexts_items_features_names: + if feat_tuple is None: + feat_tuple = () tuple_names = [] for feat in feat_tuple: if feat in self.params.keys(): @@ -783,6 +790,7 @@ def instantiate( self.instantiated = True else: raise ValueError("No weights instantiated") + self.weights = weights return weights def compute_batch_utility( @@ -820,6 +828,7 @@ def compute_batch_utility( Computed utilities of shape (n_choices, n_items). """ if isinstance(self.params, ModelSpecification): + print("Model in instantiated using manual specification") return self.compute_batch_utility_from_specification( fixed_items_features=fixed_items_features, contexts_features=contexts_features, @@ -1001,7 +1010,14 @@ def fit(self, choice_dataset, get_report=False, **kwargs): self.report = self.compute_report(choice_dataset) return fit - def _fit_with_lbfgs(self, choice_dataset, epochs=None, tolerance=1e-8, get_report=False): + def _fit_with_lbfgs( + self, + choice_dataset, + epochs=None, + sample_weight=None, + get_report=False, + **kwargs, + ): """Specific fit function to estimate the paramters with LBFGS. Parameters @@ -1034,7 +1050,12 @@ def _fit_with_lbfgs(self, choice_dataset, epochs=None, tolerance=1e-8, get_repor self.instantiated = True if epochs is None: epochs = self.epochs - fit = super()._fit_with_lbfgs(choice_dataset, epochs, tolerance) + fit = super()._fit_with_lbfgs( + dataset=choice_dataset, + epochs=epochs, + sample_weight=sample_weight, + **kwargs, + ) if get_report: self.report = self.compute_report(choice_dataset) return fit @@ -1113,7 +1134,7 @@ def get_weights_std(self, dataset): probabilities = tf.nn.softmax(utilities, axis=-1) loss = tf.keras.losses.CategoricalCrossentropy(reduction="sum")( y_pred=probabilities, - y_true=tf.one_hot(dataset.choices, depth=4), + y_true=tf.one_hot(dataset.choices, depth=probabilities.shape[1]), ) # Compute the Jacobian jacobian = tape_2.jacobian(loss, w) diff --git a/choice_learn/models/latent_class_mnl.py b/choice_learn/models/latent_class_mnl.py new file mode 100644 index 00000000..94561ff6 --- /dev/null +++ b/choice_learn/models/latent_class_mnl.py @@ -0,0 +1,349 @@ +"""Latent Class MNL models.""" +import copy + +import tensorflow as tf + +from .base_model import BaseLatentClassModel +from .conditional_mnl import ConditionalMNL, ModelSpecification +from .simple_mnl import SimpleMNL + + +class LatentClassSimpleMNL(BaseLatentClassModel): + """Latent Class for SimpleMNL.""" + + def __init__( + self, + n_latent_classes, + fit_method, + epochs, + add_exit_choice=False, + tolerance=1e-6, + intercept=None, + optimizer="Adam", + lr=0.001, + **kwargs, + ): + """Initialization. + + Parameters + ---------- + n_latent_classes : int + Number of latent classes. + fit_method : str + Method to be used to estimate the model. + epochs : int + Number of epochs + add_exit_choice : bool, optional + Whether to normalize probabilities with exit choice, by default False + tolerance : float, optional + LBFG-S tolerance, by default 1e-6 + intercept : str, optional + Type of intercept to include in the SimpleMNL. + Must be in (None, 'item', 'item-full', 'constant'), by default None + optimizer : str, optional + tf.keras.optimizers to be used, by default "Adam" + lr : float, optional + Learning rate to use for optimizer if relevant, by default 0.001 + """ + self.n_latent_classes = n_latent_classes + self.intercept = intercept + model_params = { + "add_exit_choice": add_exit_choice, + "intercept": intercept, + "optimizer": optimizer, + "tolerance": tolerance, + "lr": lr, + "epochs": epochs, + } + + super().__init__( + model_class=SimpleMNL, + model_parameters=model_params, + n_latent_classes=n_latent_classes, + fit_method=fit_method, + epochs=epochs, + add_exit_choice=add_exit_choice, + tolerance=tolerance, + optimizer=optimizer, + lr=lr, + **kwargs, + ) + + def instantiate_latent_models( + self, n_items, n_fixed_items_features, n_contexts_features, n_contexts_items_features + ): + """Instantiation of the Latent Models that are SimpleMNLs. + + Parameters + ---------- + n_items : int + Number of items/aternatives to consider. + n_fixed_items_features : int + Number of fixed items features. + n_contexts_features : int + Number of contexts features + n_contexts_items_features : int + Number of contexts items features + """ + for model in self.models: + model.indexes, model.weights = model.instantiate( + n_items, n_fixed_items_features, n_contexts_features, n_contexts_items_features + ) + model.instantiated = True + + def instantiate( + self, n_items, n_fixed_items_features, n_contexts_features, n_contexts_items_features + ): + """Instantiation of the Latent Class MNL model.""" + self.latent_logits = tf.Variable( + tf.random_normal_initializer(0.0, 0.02, seed=42)(shape=(self.n_latent_classes - 1,)), + name="Latent-Logits", + ) + + self.models = [self.model_class(**mp) for mp in self.model_parameters] + + self.instantiate_latent_models( + n_items=n_items, + n_fixed_items_features=n_fixed_items_features, + n_contexts_features=n_contexts_features, + n_contexts_items_features=n_contexts_items_features, + ) + + def fit(self, dataset, **kwargs): + """Fit the model to the dataset. + + Parameters + ---------- + dataset : ChoiceDataset + Dataset to fit the model to. + """ + if not self.instantiated: + self.instantiate( + n_items=dataset.get_n_items(), + n_fixed_items_features=dataset.get_n_fixed_items_features(), + n_contexts_features=dataset.get_n_contexts_features(), + n_contexts_items_features=dataset.get_n_contexts_items_features(), + ) + return super().fit(dataset, **kwargs) + + +class LatentClassConditionalMNL(BaseLatentClassModel): + """Latent Class for ConditionalMNL.""" + + def __init__( + self, + n_latent_classes, + fit_method, + parameters=None, + epochs=1, + add_exit_choice=False, + tolerance=1e-6, + optimizer="Adam", + lr=0.001, + **kwargs, + ): + """Initialization. + + Parameters + ---------- + n_latent_classes : int + Number of latent classes. + fit_method : str + Method to be used to estimate the model. + parameters : dict or ModelSpecification + Dictionnary containing the parametrization of the model. + The dictionnary must have the following structure: + {feature_name_1: mode_1, feature_name_2: mode_2, ...} + mode must be among "constant", "item", "item-full" for now + (same specifications as torch-choice). + epochs : int + Number of epochs + add_exit_choice : bool, optional + Whether to normalize probabilities with exit choice, by default False + tolerance : float, optional + LBFG-S tolerance, by default 1e-6 + optimizer : str, optional + tf.keras.optimizers to be used, by default "Adam" + lr : float, optional + Learning rate to use for optimizer if relevant, by default 0.001 + """ + self.n_latent_classes = n_latent_classes + self.fit_method = fit_method + self.params = parameters + self.epochs = epochs + self.add_exit_choice = add_exit_choice + self.tolerance = tolerance + self.optimizer = optimizer + self.lr = lr + + model_params = { + "parameters": self.params, + "add_exit_choice": self.add_exit_choice, + "optimizer": self.optimizer, + "tolerance": self.tolerance, + "lr": self.lr, + "epochs": self.epochs, + } + + super().__init__( + model_class=ConditionalMNL, + model_parameters=model_params, + n_latent_classes=n_latent_classes, + fit_method=fit_method, + epochs=epochs, + add_exit_choice=add_exit_choice, + tolerance=tolerance, + optimizer=optimizer, + lr=lr, + **kwargs, + ) + + def instantiate_latent_models( + self, + n_items, + items_features_names, + contexts_features_names, + contexts_items_features_names, + ): + """Instantiation of the Latent Models that are SimpleMNLs. + + Parameters + ---------- + n_items : int + Number of items/aternatives to consider. + items_features_names: str, + Names of fixed_items_features + contexts_features_names: str, + Names of contexts features + contexts_items_features_names: str, + Names of contexts items features + """ + if isinstance(self.params, ModelSpecification): + for model in self.models: + model.params = copy.deepcopy(self.params) + model.weights = model.instantiate_from_specifications() + + model._items_features_names = items_features_names + model._contexts_features_names = contexts_features_names + model._contexts_items_features_names = contexts_items_features_names + else: + for model in self.models: + model.params = self.params + model.indexes, model.weights = model.instantiate( + num_items=n_items, + items_features_names=items_features_names, + contexts_features_names=contexts_features_names, + contexts_items_features_names=contexts_items_features_names, + ) + model.instantiated = True + + def instantiate( + self, + n_items, + items_features_names, + contexts_features_names, + contexts_items_features_names, + ): + """Instantiation of the Latent Class MNL model.""" + self.latent_logits = tf.Variable( + tf.random_normal_initializer(0.0, 0.02, seed=42)(shape=(self.n_latent_classes - 1,)), + name="Latent-Logits", + ) + + self.models = [self.model_class(**mp) for mp in self.model_parameters] + + self.instantiate_latent_models( + n_items=n_items, + items_features_names=items_features_names, + contexts_features_names=contexts_features_names, + contexts_items_features_names=contexts_items_features_names, + ) + + def add_coefficients( + self, coefficient_name, feature_name, items_indexes=None, items_names=None + ): + """Adds a coefficient to the model throught the specification of the utility. + + Parameters + ---------- + coefficient_name : str + Name given to the coefficient. + feature_name : str + features name to which the coefficient is associated. It should work with + the names given. + in the ChoiceDataset that will be used for parameters estimation. + items_indexes : list of int, optional + list of items indexes (in the ChoiceDataset) for which we need to add a coefficient, + by default None + items_names : list of str, optional + list of items names (in the ChoiceDataset) for which we need to add a coefficient, + by default None + + Raises: + ------- + ValueError + When names or indexes are both not specified. + """ + if self.params is None: + self.params = ModelSpecification() + elif not isinstance(self.params, ModelSpecification): + raise ValueError("Cannot add coefficient on top of a dict instantiation.") + self.params.add_coefficients( + coefficient_name=coefficient_name, + feature_name=feature_name, + items_indexes=items_indexes, + items_names=items_names, + ) + + def add_shared_coefficient( + self, coefficient_name, feature_name, items_indexes=None, items_names=None + ): + """Adds a single, shared coefficient to the model throught the specification of the utility. + + Parameters + ---------- + coefficient_name : str + Name given to the coefficient. + feature_name : str + features name to which the coefficient is associated. It should work with + the names given. + in the ChoiceDataset that will be used for parameters estimation. + items_indexes : list of int, optional + list of items indexes (in the ChoiceDataset) for which the coefficient will be used, + by default None + items_names : list of str, optional + list of items names (in the ChoiceDataset) for which the coefficient will be used, + by default None + + Raises: + ------- + ValueError + When names or indexes are both not specified. + """ + if self.params is None: + self.params = ModelSpecification() + elif not isinstance(self.params, ModelSpecification): + raise ValueError("Cannot add shared coefficient on top of a dict instantiation.") + self.params.add_shared_coefficient( + coefficient_name=coefficient_name, + feature_name=feature_name, + items_indexes=items_indexes, + items_names=items_names, + ) + + def fit(self, dataset, **kwargs): + """Fit the model to the dataset. + + Parameters + ---------- + dataset : ChoiceDataset + Dataset to fit the model to. + """ + if not self.instantiated: + self.instantiate( + n_items=dataset.get_n_items(), + items_features_names=dataset.fixed_items_features_names, + contexts_features_names=dataset.contexts_features_names, + contexts_items_features_names=dataset.contexts_items_features_names, + ) + return super().fit(dataset, **kwargs) diff --git a/choice_learn/models/rumnet.py b/choice_learn/models/rumnet.py index 09f08fa9..2872bce7 100644 --- a/choice_learn/models/rumnet.py +++ b/choice_learn/models/rumnet.py @@ -1133,9 +1133,17 @@ def batch_predict( probabilities = tf.divide( probabilities, tf.reduce_sum(probabilities, axis=1, keepdims=True) + 1e-5 ) - batch_loss = self.loss( - y_pred=probabilities, - y_true=tf.one_hot(choices, depth=probabilities.shape[1]), - sample_weight=sample_weight, - ) + + batch_loss = { + "optimized_loss": self.loss( + y_pred=probabilities, + y_true=tf.one_hot(choices, depth=probabilities.shape[1]), + sample_weight=sample_weight, + ), + "NegativeLogLikelihood": tf.keras.losses.CategoricalCrossentropy()( + y_pred=probabilities, + y_true=tf.one_hot(choices, depth=probabilities.shape[1]), + sample_weight=sample_weight, + ), + } return batch_loss, probabilities diff --git a/choice_learn/models/simple_mnl.py b/choice_learn/models/simple_mnl.py new file mode 100644 index 00000000..b23bf9f2 --- /dev/null +++ b/choice_learn/models/simple_mnl.py @@ -0,0 +1,368 @@ +"""Implementation of the simple linear multinomial logit model. + +It is a multi output logistic regression. +""" + +import pandas as pd +import tensorflow as tf + +from .base_model import ChoiceModel + + +class SimpleMNL(ChoiceModel): + """Simple MNL with one linear coefficient to estimate by feature.""" + + def __init__( + self, + add_exit_choice=False, + intercept=None, + optimizer="Adam", + lr=0.001, + **kwargs, + ): + """Initialization of Simple-MNL. + + Parameters: + ----------- + add_exit_choice : bool, optional + Whether or not to normalize the probabilities computation with an exit choice + whose utility would be 1, by default True + optimizer: str + TensorFlow optimizer to be used for estimation + lr: float + Learning Rate to be used with optimizer. + """ + super().__init__(normalize_non_buy=add_exit_choice, optimizer=optimizer, lr=lr, **kwargs) + self.instantiated = False + self.intercept = intercept + + def instantiate( + self, n_items, n_fixed_items_features, n_contexts_features, n_contexts_items_features + ): + """Instantiate the model from ModelSpecification object. + + Parameters + -------- + Parameters + ---------- + n_items : int + Number of items/aternatives to consider. + n_fixed_items_features : int + Number of fixed items features. + n_contexts_features : int + Number of contexts features + n_contexts_items_features : int + Number of contexts items features + + Returns: + -------- + list of tf.Tensor + List of the weights created coresponding to the specification. + """ + weights = [] + indexes = {} + for n_feat, feat_name in zip( + [n_fixed_items_features, n_contexts_features, n_contexts_items_features], + ["items", "contexts", "contexts_items"], + ): + if n_feat > 0: + weights = [ + tf.Variable( + tf.random_normal_initializer(0.0, 0.02, seed=42)(shape=(n_feat,)), + name=f"Weights_{feat_name}", + ) + ] + indexes[feat_name] = len(weights) - 1 + if self.intercept is None: + print("No intercept in the model") + elif self.intercept == "item": + weights.append( + tf.Variable( + tf.random_normal_initializer(0.0, 0.02, seed=42)(shape=(n_items - 1,)), + name="Intercept", + ) + ) + indexes["intercept"] = len(weights) - 1 + elif self.intercept == "item-full": + print("Are you sure you do not want to normalize an intercept to 0?") + weights.append( + tf.Variable( + tf.random_normal_initializer(0.0, 0.02, seed=42)(shape=(n_items,)), + name="Intercept", + ) + ) + indexes["intercept"] = len(weights) - 1 + else: + weights.append( + tf.Variable( + tf.random_normal_initializer(0.0, 0.02, seed=42)(shape=(1,)), + name="Intercept", + ) + ) + indexes["intercept"] = len(weights) - 1 + + self.instantiated = True + self.indexes = indexes + self.weights = weights + return indexes, weights + + def compute_batch_utility( + self, + fixed_items_features, + contexts_features, + contexts_items_features, + contexts_items_availabilities, + choices, + ): + """Main method to compute the utility of the model. Selects the right method to compute. + + Parameters + ---------- + fixed_items_features : tuple of np.ndarray + Fixed-Item-Features: formatting from ChoiceDataset: a matrix representing the products + constant/fixed features. + Shape must be (n_items, n_items_features) + contexts_features : tuple of np.ndarray (contexts_features) + a batch of contexts features + Shape must be (n_contexts, n_contexts_features) + contexts_items_features : tuple of np.ndarray (contexts_items_features) + a batch of contexts items features + Shape must be (n_contexts, n_contexts_items_features) + contexts_items_availabilities : np.ndarray + A batch of contexts items availabilities + Shape must be (n_contexts, n_items) + choices_batch : np.ndarray + Choices + Shape must be (n_contexts, ) + + Returns: + -------- + tf.Tensor + Computed utilities of shape (n_choices, n_items). + """ + _, _ = contexts_items_availabilities, choices + if "items" in self.indexes.keys(): + if isinstance(fixed_items_features, tuple): + fixed_items_features = tf.concat(*fixed_items_features, axis=1) + fixed_items_utilities = tf.tensordot( + fixed_items_features, self.weights[self.indexes["items"]], axes=1 + ) + else: + fixed_items_utilities = 0 + + if "contexts" in self.indexes.keys(): + if isinstance(contexts_features, tuple): + contexts_features = tf.concat(*contexts_features, axis=1) + contexts_utilities = tf.tensordot( + contexts_features, self.weights[self.indexes["contexts"]], axes=1 + ) + contexts_utilities = tf.expand_dims(contexts_utilities, axis=0) + else: + contexts_utilities = 0 + + if "contexts_items" in self.indexes.keys(): + if isinstance(contexts_items_features, tuple): + contexts_items_features = tf.concat([*contexts_items_features], axis=2) + contexts_items_utilities = tf.tensordot( + contexts_items_features, self.weights[self.indexes["contexts_items"]], axes=1 + ) + else: + contexts_utilities = tf.zeros( + (contexts_utilities.shape[0], fixed_items_utilities.shape[1], 1) + ) + + if "intercept" in self.indexes.keys(): + intercept = self.weights[self.indexes["intercept"]] + if self.intercept == "item": + intercept = tf.concat([tf.constant([0.0]), intercept], axis=0) + if self.intercept in ["item", "item-full"]: + intercept = tf.expand_dims(intercept, axis=0) + else: + intercept = 0 + + return fixed_items_utilities + contexts_utilities + contexts_items_utilities + intercept + + def fit(self, choice_dataset, get_report=False, **kwargs): + """Main fit function to estimate the paramters. + + Parameters + ---------- + choice_dataset : ChoiceDataset + Choice dataset to use for the estimation. + get_report: bool, optional + Whether or not to compute a report of the estimation, by default False + + Returns: + -------- + ConditionalMNL + With estimated weights. + """ + if not self.instantiated: + # Lazy Instantiation + print("Instantiation") + self.indexes, self.weights = self.instantiate( + n_items=choice_dataset.get_n_items(), + n_fixed_items_features=choice_dataset.get_n_fixed_items_features(), + n_contexts_features=choice_dataset.get_n_contexts_features(), + n_contexts_items_features=choice_dataset.get_n_contexts_items_features(), + ) + self.instantiated = True + fit = super().fit(choice_dataset=choice_dataset, **kwargs) + if get_report: + self.report = self.compute_report(choice_dataset) + return fit + + def _fit_with_lbfgs( + self, choice_dataset, epochs=None, sample_weight=None, get_report=False, **kwargs + ): + """Specific fit function to estimate the paramters with LBFGS. + + Parameters + ---------- + choice_dataset : ChoiceDataset + Choice dataset to use for the estimation. + n_epochs : int + Number of epochs to run. + sample_weight: Iterable, optional + list of each sample weight, by default None meaning that all samples have weight 1. + get_report: bool, optional + Whether or not to compute a report of the estimation, by default False. + + Returns: + -------- + conditionalMNL + self with estimated weights. + """ + if not self.instantiated: + # Lazy Instantiation + print("Instantiation") + self.indexes, self.weights = self.instantiate( + n_items=choice_dataset.get_n_items(), + n_fixed_items_features=choice_dataset.get_n_fixed_items_features(), + n_contexts_features=choice_dataset.get_n_contexts_features(), + n_contexts_items_features=choice_dataset.get_n_contexts_items_features(), + ) + self.instantiated = True + if epochs is None: + epochs = self.epochs + fit = super()._fit_with_lbfgs( + dataset=choice_dataset, epochs=epochs, sample_weight=sample_weight, **kwargs + ) + if get_report: + self.report = self.compute_report(choice_dataset) + return fit + + def compute_report(self, dataset): + """Computes a report of the estimated weights. + + Parameters + ---------- + dataset : ChoiceDataset + ChoiceDataset used for the estimation of the weights that will be + used to compute the Std Err of this estimation. + + Returns: + -------- + pandas.DataFrame + A DF with estimation, Std Err, z_value and p_value for each coefficient. + """ + import tensorflow_probability as tfp + + weights_std = self.get_weights_std(dataset) + dist = tfp.distributions.Normal(loc=0.0, scale=1.0) + + names = [] + z_values = [] + estimations = [] + p_z = [] + i = 0 + for weight in self.weights: + for j in range(weight.shape[0]): + names.append(f"{weight.name}_{j}") + estimations.append(weight.numpy()[j]) + z_values.append(weight.numpy()[j] / weights_std[i].numpy()) + p_z.append(2 * (1 - dist.cdf(tf.math.abs(z_values[-1])).numpy())) + i += 1 + + return pd.DataFrame( + { + "Coefficient Name": names, + "Coefficient Estimation": estimations, + "Std. Err": weights_std.numpy(), + "z_value": z_values, + "P(.>z)": p_z, + }, + ) + + def get_weights_std(self, dataset): + """Approximates Std Err with Hessian matrix. + + Parameters + ---------- + dataset : ChoiceDataset + ChoiceDataset used for the estimation of the weights that will be + used to compute the Std Err of this estimation. + + Returns: + -------- + tf.Tensor + Estimation of the Std Err for the weights. + """ + # Loops of differentiation + with tf.GradientTape() as tape_1: + with tf.GradientTape(persistent=True) as tape_2: + model = self.clone() + w = tf.concat(self.weights, axis=0) + tape_2.watch(w) + tape_1.watch(w) + mw = [] + index = 0 + for _w in self.weights: + mw.append(w[index : index + _w.shape[0]]) + index += _w.shape[0] + model.weights = mw + for batch in dataset.iter_batch(batch_size=-1): + utilities = model.compute_batch_utility(*batch) + probabilities = tf.nn.softmax(utilities, axis=-1) + loss = tf.keras.losses.CategoricalCrossentropy(reduction="sum")( + y_pred=probabilities, + y_true=tf.one_hot(dataset.choices, depth=probabilities.shape[-1]), + ) + # Compute the Jacobian + jacobian = tape_2.jacobian(loss, w) + # Compute the Hessian from the Jacobian + hessian = tape_1.jacobian(jacobian, w) + hessian = tf.linalg.inv(tf.squeeze(hessian)) + return tf.sqrt([hessian[i][i] for i in range(len(tf.squeeze(hessian)))]) + + def clone(self): + """Returns a clone of the model.""" + clone = SimpleMNL( + add_exit_choice=self.normalize_non_buy, + optimizer=self.optimizer_name, + ) + if hasattr(self, "history"): + clone.history = self.history + if hasattr(self, "is_fitted"): + clone.is_fitted = self.is_fitted + if hasattr(self, "instantiated"): + clone.instantiated = self.instantiated + clone.loss = self.loss + clone.label_smoothing = self.label_smoothing + if hasattr(self, "report"): + clone.report = self.report + if hasattr(self, "weights"): + clone.weights = self.weights + if hasattr(self, "indexes"): + clone.indexes = self.indexes + if hasattr(self, "intercept"): + clone.intercept = self.intercept + if hasattr(self, "lr"): + clone.lr = self.lr + if hasattr(self, "_items_features_names"): + clone._items_features_names = self._items_features_names + if hasattr(self, "_contexts_features_names"): + clone._contexts_features_names = self._contexts_features_names + if hasattr(self, "_contexts_items_features_names"): + clone._contexts_items_features_names = self._contexts_items_features_names + return clone diff --git a/notebooks/choice_learn_introduction_clogit.ipynb b/notebooks/choice_learn_introduction_clogit.ipynb index bd20a3e0..406de3e3 100644 --- a/notebooks/choice_learn_introduction_clogit.ipynb +++ b/notebooks/choice_learn_introduction_clogit.ipynb @@ -211,8 +211,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "The average neg-loglikelihood is: 0.67447394\n", - "The total neg-loglikelihood is: 1874.3630829453468\n" + "The average neg-loglikelihood is: 0.6744666\n", + "The total neg-loglikelihood is: 1874.3427090644836\n" ] } ], @@ -267,25 +267,25 @@ " \n", " 0\n", " beta_inter:0_0\n", - " 0.698367\n", - " 1.280208\n", - " 0.545511\n", - " 5.854024e-01\n", + " 0.698380\n", + " 1.280237\n", + " 0.545508\n", + " 5.854039e-01\n", " \n", " \n", " 1\n", " beta_inter:0_1\n", - " 1.844104\n", - " 0.708454\n", - " 2.602998\n", - " 9.241223e-03\n", + " 1.844129\n", + " 0.708489\n", + " 2.602904\n", + " 9.243727e-03\n", " \n", " \n", " 2\n", " beta_inter:0_2\n", - " 3.274187\n", - " 0.624366\n", - " 5.244018\n", + " 3.274206\n", + " 0.624402\n", + " 5.243744\n", " 1.192093e-07\n", " \n", " \n", @@ -293,7 +293,7 @@ " beta_income:0_0\n", " -0.089087\n", " 0.018347\n", - " -4.855643\n", + " -4.855632\n", " 1.192093e-06\n", " \n", " \n", @@ -301,7 +301,7 @@ " beta_income:0_1\n", " -0.027993\n", " 0.003873\n", - " -7.228673\n", + " -7.228651\n", " 0.000000e+00\n", " \n", " \n", @@ -309,15 +309,15 @@ " beta_income:0_2\n", " -0.038147\n", " 0.004083\n", - " -9.342690\n", + " -9.342653\n", " 0.000000e+00\n", " \n", " \n", " 6\n", " beta_ivt:0_0\n", - " 0.059509\n", + " 0.059510\n", " 0.010073\n", - " 5.907992\n", + " 5.908023\n", " 0.000000e+00\n", " \n", " \n", @@ -325,39 +325,39 @@ " beta_ivt:0_1\n", " -0.006784\n", " 0.004433\n", - " -1.530137\n", - " 1.259828e-01\n", + " -1.530130\n", + " 1.259845e-01\n", " \n", " \n", " 8\n", " beta_ivt:0_2\n", " -0.006460\n", " 0.001898\n", - " -3.403037\n", - " 6.663799e-04\n", + " -3.402944\n", + " 6.666183e-04\n", " \n", " \n", " 9\n", " beta_ivt:0_3\n", " -0.001450\n", " 0.001187\n", - " -1.221401\n", - " 2.219341e-01\n", + " -1.221381\n", + " 2.219417e-01\n", " \n", " \n", " 10\n", " beta_cost:0_0\n", " -0.033339\n", " 0.007095\n", - " -4.698925\n", + " -4.698679\n", " 2.622604e-06\n", " \n", " \n", " 11\n", " beta_freq:0_0\n", - " 0.092529\n", + " 0.092530\n", " 0.005098\n", - " 18.151833\n", + " 18.151777\n", " 0.000000e+00\n", " \n", " \n", @@ -365,7 +365,7 @@ " beta_ovt:0_0\n", " -0.043004\n", " 0.003225\n", - " -13.335643\n", + " -13.335551\n", " 0.000000e+00\n", " \n", " \n", @@ -374,19 +374,19 @@ ], "text/plain": [ " Coefficient Name Coefficient Estimation Std. Err z_value P(.>z)\n", - "0 beta_inter:0_0 0.698367 1.280208 0.545511 5.854024e-01\n", - "1 beta_inter:0_1 1.844104 0.708454 2.602998 9.241223e-03\n", - "2 beta_inter:0_2 3.274187 0.624366 5.244018 1.192093e-07\n", - "3 beta_income:0_0 -0.089087 0.018347 -4.855643 1.192093e-06\n", - "4 beta_income:0_1 -0.027993 0.003873 -7.228673 0.000000e+00\n", - "5 beta_income:0_2 -0.038147 0.004083 -9.342690 0.000000e+00\n", - "6 beta_ivt:0_0 0.059509 0.010073 5.907992 0.000000e+00\n", - "7 beta_ivt:0_1 -0.006784 0.004433 -1.530137 1.259828e-01\n", - "8 beta_ivt:0_2 -0.006460 0.001898 -3.403037 6.663799e-04\n", - "9 beta_ivt:0_3 -0.001450 0.001187 -1.221401 2.219341e-01\n", - "10 beta_cost:0_0 -0.033339 0.007095 -4.698925 2.622604e-06\n", - "11 beta_freq:0_0 0.092529 0.005098 18.151833 0.000000e+00\n", - "12 beta_ovt:0_0 -0.043004 0.003225 -13.335643 0.000000e+00" + "0 beta_inter:0_0 0.698380 1.280237 0.545508 5.854039e-01\n", + "1 beta_inter:0_1 1.844129 0.708489 2.602904 9.243727e-03\n", + "2 beta_inter:0_2 3.274206 0.624402 5.243744 1.192093e-07\n", + "3 beta_income:0_0 -0.089087 0.018347 -4.855632 1.192093e-06\n", + "4 beta_income:0_1 -0.027993 0.003873 -7.228651 0.000000e+00\n", + "5 beta_income:0_2 -0.038147 0.004083 -9.342653 0.000000e+00\n", + "6 beta_ivt:0_0 0.059510 0.010073 5.908023 0.000000e+00\n", + "7 beta_ivt:0_1 -0.006784 0.004433 -1.530130 1.259845e-01\n", + "8 beta_ivt:0_2 -0.006460 0.001898 -3.402944 6.666183e-04\n", + "9 beta_ivt:0_3 -0.001450 0.001187 -1.221381 2.219417e-01\n", + "10 beta_cost:0_0 -0.033339 0.007095 -4.698679 2.622604e-06\n", + "11 beta_freq:0_0 0.092530 0.005098 18.151777 0.000000e+00\n", + "12 beta_ovt:0_0 -0.043004 0.003225 -13.335551 0.000000e+00" ] }, "execution_count": null, @@ -469,14 +469,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 1/1 [00:01<00:00, 2.00s/it]" + "100%|██████████| 1/1 [00:01<00:00, 1.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "'Ground Truth' Negative LogLikelihood: tf.Tensor(1874.3633, shape=(), dtype=float32)\n" + "'Ground Truth' Negative LogLikelihood: tf.Tensor(1874.3427, shape=(), dtype=float32)\n" ] }, { @@ -526,11 +526,11 @@ "output_type": "stream", "text": [ "Purchase probability of each item for the first 5 sessions: tf.Tensor(\n", - "[[0.1906135 0.00353266 0.4053667 0.4004831 ]\n", - " [0.34869286 0.00069682 0.36830992 0.28229675]\n", - " [0.14418365 0.00651285 0.40567666 0.44362238]\n", - " [0.34869286 0.00069682 0.36830992 0.28229675]\n", - " [0.34869286 0.00069682 0.36830992 0.28229675]], shape=(5, 4), dtype=float32)\n" + "[[0.19061361 0.00353295 0.4053689 0.4004805 ]\n", + " [0.3486952 0.00069691 0.36830923 0.28229502]\n", + " [0.14418328 0.00651326 0.40567988 0.44361907]\n", + " [0.3486952 0.00069691 0.36830923 0.28229502]\n", + " [0.3486952 0.00069691 0.36830923 0.28229502]], shape=(5, 4), dtype=float32)\n" ] } ], @@ -582,14 +582,14 @@ { "data": { "text/plain": [ - "[,\n", - " ,\n", - " ,\n", - " ,\n", + "[,\n", + " ,\n", + " ,\n", + " ,\n", " ,\n", - " ]" + " ]" ] }, "execution_count": null, @@ -611,7 +611,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": null, @@ -669,11 +669,11 @@ "text": [ "L-BFGS Opimization finished:\n", "---------------------------------------------------------------\n", - "Number of iterations: 170\n", + "Number of iterations: 190\n", "Algorithm converged before reaching max iterations: True\n", - "[, , , , , ]\n" + "[, , , , , ]\n" ] } ], @@ -834,16 +834,16 @@ { "data": { "text/plain": [ - "[,\n", - " ,\n", - " ,\n", - " ,\n", - " ,\n", - " ,\n", - " ,\n", - " ,\n", - " ,\n", - " 1:0' shape=(1, 1) dtype=float32, numpy=array([[1.413982]], dtype=float32)>]" + "[,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " 1:0' shape=(1, 1) dtype=float32, numpy=array([[1.4228866]], dtype=float32)>]" ] }, "execution_count": null, @@ -865,7 +865,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": null, diff --git a/notebooks/choice_learn_introduction_data.ipynb b/notebooks/choice_learn_introduction_data.ipynb index 05c0f692..cfb6ca8f 100644 --- a/notebooks/choice_learn_introduction_data.ipynb +++ b/notebooks/choice_learn_introduction_data.ipynb @@ -320,9 +320,11 @@ "source": [ "The ChoiceDataset is ready !\n", "\n", + "If your DataFrame is in the wide format, you can use the equivalent method *from_single_wide_df*. An example can be found [here](https://github.com/artefactory/choice-learn-private/blob/main/notebooks/dataset_creation.ipynb) on the SwissMetro dataset: \n", + "\n", "You now have three possibilities to continue discovering the choice-learn package:\n", "- You can directly go [here]() to the modelling tutorial if you want to understand how a first simple ConditionMNl would be implementd.\n", - "- You can go [here]() if your dataset is organized differently to see all the different ways to instantiate a ChoiceDataset. In particular it helps if you DataFrame is in the wide format or if it is splitted into several DataFrames.\n", + "- You can go [here]() if your dataset is organized differently to see all the different ways to instantiate a ChoiceDataset. In particular it helps if you data is splitted into several DataFrames or if you have another format of data.\n", "- Or you can continue this current tutorial to better understand the ChoiceDataset machinery and everything there is to know about it.\n", "\n", "Whatever your choice, you can also check [here](#ready-to-use-datasets) the list of open source datasets available directly with the package." diff --git a/notebooks/latent_class_model.ipynb b/notebooks/latent_class_model.ipynb new file mode 100644 index 00000000..88cd8f10 --- /dev/null +++ b/notebooks/latent_class_model.ipynb @@ -0,0 +1,287 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Example of use of Latent Class MNL" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"\"\n", + "\n", + "import sys\n", + "from pathlib import Path\n", + "\n", + "sys.path.append(\"../\")\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "import tensorflow as tf" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's use the Electricity Dataset used in this [tutorial](https://rpubs.com/msarrias1986/335556)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from choice_learn.datasets import load_electricity\n", + "\n", + "elec_dataset = load_electricity(as_frame=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from choice_learn.models.simple_mnl import SimpleMNL\n", + "from choice_learn.models.latent_class_mnl import LatentClassSimpleMNL" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lc_model = LatentClassSimpleMNL(n_latent_classes=3, fit_method=\"mle\", optimizer=\"lbfgs\", epochs=1000, tolerance=1e-10)\n", + "hist = lc_model.fit(elec_dataset, verbose=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Latent Class Model weights:\")\n", + "print(\"Classes Logits:\", lc_model.latent_logits)\n", + "for i in range(3):\n", + " print(\"\\n\")\n", + " print(f\"Model Nb {i}, weights:\", lc_model.models[i].weights)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Negative Log-Likelihood:\")\n", + "lc_model.evaluate(elec_dataset) * len(elec_dataset)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Latent Conditional MNL\n", + "We used a very simple MNL. Here we simulate the same MNL, by using the Conditional-MNL formulation.\\\n", + "Don't hesitate to read the conditional-MNL tutorial to better understand how to use this formulation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from choice_learn.models.latent_class_mnl import LatentClassConditionalMNL" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lc_model_2 = LatentClassConditionalMNL(n_latent_classes=3,\n", + " fit_method=\"mle\",\n", + " optimizer=\"lbfgs\",\n", + " epochs=1000,\n", + " tolerance=1e-12)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For each feature, let's add a coefficient that is shared by all items:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lc_model_2.add_shared_coefficient(coefficient_name=\"pf\",\n", + " feature_name=\"pf\",\n", + " items_indexes=[0, 1, 2, 3])\n", + "lc_model_2.add_shared_coefficient(coefficient_name=\"cl\",\n", + " feature_name=\"cl\",\n", + " items_indexes=[0, 1, 2, 3])\n", + "lc_model_2.add_shared_coefficient(coefficient_name=\"loc\",\n", + " feature_name=\"loc\",\n", + " items_indexes=[0, 1, 2, 3])\n", + "lc_model_2.add_shared_coefficient(coefficient_name=\"wk\",\n", + " feature_name=\"wk\",\n", + " items_indexes=[0, 1, 2, 3])\n", + "lc_model_2.add_shared_coefficient(coefficient_name=\"tod\",\n", + " feature_name=\"tod\",\n", + " items_indexes=[0, 1, 2, 3])\n", + "lc_model_2.add_shared_coefficient(coefficient_name=\"seas\",\n", + " feature_name=\"seas\",\n", + " items_indexes=[0, 1, 2, 3])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Fit\n", + "hist2 = lc_model_2.fit(elec_dataset, verbose=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Negative Log-Likelihood:\", lc_model_2.evaluate(elec_dataset)*len(elec_dataset))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Latent Class Model weights:\")\n", + "print(\"Classes Logits:\", lc_model_2.latent_logits)\n", + "for i in range(3):\n", + " print(\"\\n\")\n", + " print(f\"Model Nb {i}, weights:\", lc_model_2.models[i].weights)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Just like any ChoiceModel you can get the probabilities:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lc_model.predict_probas(elec_dataset[:4])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you want to use more complex formulations of Latent Class models, you can directly use the *BaseLatentClassModel* from *choice_learn.models.base_model*:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from choice_learn.models.base_model import BaseLatentClassModel" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "manual_lc = BaseLatentClassModel(\n", + " model_class=SimpleMNL,\n", + " model_parameters={\"add_exit_choice\": False},\n", + " n_latent_classes=3,\n", + " fit_method=\"mle\",\n", + " epochs=1000,\n", + " optimizer=\"lbfgs\"\n", + " )\n", + "manual_lc.instantiate(n_items=4,\n", + " n_fixed_items_features=0,\n", + " n_contexts_features=0,\n", + " n_contexts_items_features=6)\n", + "manual_hist = manual_lc.fit(elec_dataset, verbose=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "manual_lc.evaluate(elec_dataset) * len(elec_dataset)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you need to go deeper, you can look in *choice_learn/models/latent_class_mnl* to see different implementations that could help you." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/logistic_regression.ipynb b/notebooks/logistic_regression.ipynb new file mode 100644 index 00000000..45252ec3 --- /dev/null +++ b/notebooks/logistic_regression.ipynb @@ -0,0 +1,184 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Logistic Regression: 3-class Classifier\n", + "\n", + "The Conditional MNL is a generalization of the multi-class Logistic Regression.\n", + "Here, we recreate the scikit-learn tutorial that can be found [here](https://scikit-learn.org/stable/auto_examples/linear_model/plot_iris_logistic.html#sphx-glr-auto-examples-linear-model-plot-iris-logistic-py)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "# Remove GPU use\n", + "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"\"\n", + "\n", + "import sys\n", + "\n", + "sys.path.append(\"../\")\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "\n", + "from sklearn import datasets\n", + "from sklearn.inspection import DecisionBoundaryDisplay\n", + "\n", + "from choice_learn.models import ConditionalMNL\n", + "from choice_learn.data import ChoiceDataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# import some data to play with\n", + "iris = datasets.load_iris()\n", + "X = iris.data[:, :2] # we only take the first two features.\n", + "Y = iris.target" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We need to create a ChoiceDataset object. Features are contexts_features as they are shared by the three outcomes. The class labels are ''choices''." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset = ChoiceDataset(contexts_features=(X, ),\n", + "contexts_features_names=([\"feat_1\", \"feat_2\"], ),\n", + " fixed_items_features=np.ones((3, 3)),\n", + " choices=Y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For the model parametrization, we specify that we want to learn one weight by outcome for each feature: 'feat_1', 'feat_2' and the intercept. This is done with the keyword \"item-full\"." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "parametrization = {\n", + " \"intercept\": \"item-full\",\n", + " \"feat_1\": \"item-full\",\n", + " \"feat_2\": \"item-full\"\n", + "}\n", + "\n", + "# Let's estimate the weights\n", + "model = ConditionalMNL(parameters=parametrization, optimizer=\"lbfgs\")\n", + "hist = model.fit(dataset, epochs=100)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's display the resulting model, just as in the sk-learn tutorial." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "feature_1, feature_2 = np.meshgrid(\n", + " np.linspace(X[:, 0].min() - 0.5, X[:, 0].max() + 0.5),\n", + " np.linspace(X[:, 1].min() - 0.5, X[:, 1].max() + 0.5)\n", + ")\n", + "grid = np.vstack([feature_1.ravel(), feature_2.ravel()]).T\n", + "\n", + "grid_dataset = ChoiceDataset(contexts_features=(grid, ),\n", + "contexts_features_names=([\"feat_1\", \"feat_2\"], ),\n", + " fixed_items_features=np.ones((3, 3)),\n", + " choices=np.ones(len(grid), ))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "keep_output": true + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "keep_output": true + }, + "output_type": "display_data" + } + ], + "source": [ + "y_pred = np.reshape(np.argmax(model.predict_probas(grid_dataset), axis=1), feature_1.shape)\n", + "display = DecisionBoundaryDisplay(\n", + " xx0=feature_1, xx1=feature_2, response=y_pred\n", + ")\n", + "display.plot(plot_method=\"pcolormesh\",\n", + " cmap=plt.cm.Paired,\n", + " shading=\"auto\",\n", + " xlabel=\"Sepal length\",\n", + " ylabel=\"Sepal width\")\n", + "display.ax_.scatter(\n", + " X[:, 0], X[:, 1], c=Y, edgecolor=\"black\", \n", + " cmap=plt.cm.Paired,\n", + ")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It sure looks alike !" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "tf_env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/rumnet_example.ipynb b/notebooks/rumnet_example.ipynb index ce18c800..0bbd6693 100644 --- a/notebooks/rumnet_example.ipynb +++ b/notebooks/rumnet_example.ipynb @@ -28,6 +28,7 @@ "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", + "import tensorflow as tf\n", "\n", "from choice_learn.data import ChoiceDataset\n", "from choice_learn.models import RUMnet\n", @@ -192,6 +193,9 @@ " \"logmin\": 1e-10,\n", " \"label_smoothing\": 0.02,\n", " \"callbacks\": [],\n", + " \"epochs\": 15,\n", + " \"batch_size\": 128,\n", + " \"tol\": 1e-5,\n", "}" ] }, @@ -217,8 +221,11 @@ " model = RUMnet(**model_args)\n", " model.instantiate()\n", "\n", - " losses = model.fit(train_dataset, n_epochs=5000, batch_size=128)\n", - " test_eval.append(model.evaluate(test_dataset))\n", + " losses = model.fit(train_dataset, val_dataset=test_dataset)\n", + " probas = model.predict_probas(test_dataset)\n", + " eval = tf.keras.losses.CategoricalCrossentropy(from_logits=False)(y_pred=model.predict_probas(test_dataset), y_true=tf.one_hot(test_dataset.choices, 3))\n", + " test_eval.append(eval)\n", + " print(test_eval)\n", "\n", " fit_losses.append(losses)" ] @@ -229,8 +236,11 @@ "metadata": {}, "outputs": [], "source": [ + "cmap = plt.cm.coolwarm\n", + "colors = [cmap(j / 4) for j in range(5)]\n", "for i in range(len(fit_losses)):\n", - " plt.plot(fit_losses[i][\"train_loss\"], label=f\"fold {i}\")\n", + " plt.plot(fit_losses[i][\"train_loss\"], c=colors[i], linestyle=\"--\")\n", + " plt.plot(fit_losses[i][\"test_loss\"], label=f\"fold {i}\", c=colors[i])\n", "plt.legend()" ] }, @@ -240,7 +250,7 @@ "metadata": {}, "outputs": [], "source": [ - "print(\"Average LogLikeliHood on test:\", np.mean(test_eval))" + "model.evaluate(test_dataset)" ] }, { @@ -248,33 +258,21 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "print(\"Average LogLikeliHood on test:\", np.mean(test_eval))" + ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [] } ], "metadata": { "kernelspec": { - "display_name": "tf_env", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.4" } }, "nbformat": 4, diff --git a/notebooks/simple_mnl_mlogit.ipynb b/notebooks/simple_mnl_mlogit.ipynb new file mode 100644 index 00000000..e298dc3f --- /dev/null +++ b/notebooks/simple_mnl_mlogit.ipynb @@ -0,0 +1,196 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Simple MNL: Comparison with R's mlogit package" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "# Remove GPU use\n", + "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"\"\n", + "\n", + "import sys\n", + "\n", + "sys.path.append(\"../\")\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "\n", + "from choice_learn.models.simple_mnl import SimpleMNL\n", + "from choice_learn.data import ChoiceDataset\n", + "from choice_learn.datasets.base import load_heating" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's recreate this [tutorial](https://cran.r-project.org/web/packages/mlogit/vignettes/e1mlogit.html) by Yves Croissant for the mlogit R package.\n", + "\n", + "It uses the Heating dataset, where we try to predict which heating harware a houseold will chose. The dataset is integrated in the package, you can find information [here]." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "heating_df = load_heating(as_frame=True)\n", + "\n", + "contexts_features = [\"income\", \"agehed\", \"rooms\"]\n", + "choice = [\"depvar\"]\n", + "contexts_items_features = [\"ic.\", \"oc.\"]\n", + "items = [\"hp\", \"gc\", \"gr\", \"ec\", \"er\"]\n", + "\n", + "choices = np.array([items.index(val) for val in heating_df[choice].to_numpy().ravel()])\n", + "contexts = heating_df[contexts_features].to_numpy()\n", + "contexts_items = np.stack([heating_df[[feat + item for feat in contexts_items_features]].to_numpy() for item in items], axis=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First part estimates a simple MNL without intercept from the 'ic' and 'oc' features. By default, SimpleMNL does not integrate any intercept, but you can precise 'None'." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset = ChoiceDataset(contexts_items_features=contexts_items, choices=choices)\n", + "model = SimpleMNL(optimizer=\"lbfgs\", intercept=None)\n", + "history = model.fit(dataset, epochs=100, get_report=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Estimation Negative LogLikelihood:\",\n", + " model.evaluate(dataset) * len(dataset))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model.report" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We reach very similar results. The second part is about modelling useing the ic + oc/0.12 ratio. Here is how it can be done:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ratio_contexts_items = []\n", + "for case in range(contexts_items.shape[0]):\n", + " feat = []\n", + " for item in range(contexts_items.shape[1]):\n", + " feat.append([contexts_items[case, item, 0] + contexts_items[case, item, 1] / 0.12])\n", + " ratio_contexts_items.append(feat)\n", + "ratio_contexts_items = np.array(ratio_contexts_items)\n", + "ratio_contexts_items.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ratio_dataset = ChoiceDataset(contexts_items_features=ratio_contexts_items, choices=choices)\n", + "model = SimpleMNL(optimizer=\"lbfgs\")\n", + "history = model.fit(ratio_dataset, epochs=100, get_report=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Weights:\", model.weights)\n", + "print(\"Estimation Negative LogLikelihood:\", model.evaluate(ratio_dataset) * len(ratio_dataset))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, to add itemwise intercept for the last part, here is how it can be done:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model = SimpleMNL(optimizer=\"lbfgs\", intercept=\"item\")\n", + "history = model.fit(dataset, epochs=100, get_report=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model.report" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "tf_env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.1.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tests/unit_tests/data/test_choice_dataset.py b/tests/unit_tests/data/test_choice_dataset.py index ed2b495f..a29a8f96 100644 --- a/tests/unit_tests/data/test_choice_dataset.py +++ b/tests/unit_tests/data/test_choice_dataset.py @@ -248,8 +248,8 @@ def test_shape(): choices=choices, ) - assert dataset.get_num_items() == 3 - assert dataset.get_num_choices() == 3 + assert dataset.get_n_items() == 3 + assert dataset.get_n_choices() == 3 def test_from_df():