diff --git a/README.md b/README.md
index 6232be2c..ef732481 100644
--- a/README.md
+++ b/README.md
@@ -48,6 +48,7 @@ If you are new to choice modelling, you can check this [resource](https://www.pu
   - The [Train](./choice_learn/datasets/data/train_data.csv.gz) [[5]](#citation)
   - The [Heating](./choice_learn/datasets/data/heating_data.csv.gz) & [Electricity](./choice_learn/datasets/data/electricity.csv.gz) datasets from Kenneth Train described [here](https://rdrr.io/cran/mlogit/man/Electricity.html) and [here](https://rdrr.io/cran/mlogit/man/Heating.html)
   - The [TaFeng](./choice_learn/datasets/data/ta_feng.csv.zip) dataset from [Kaggle](https://www.kaggle.com/datasets/chiranjivdas09/ta-feng-grocery-dataset)
+  - The IDCM-2013 [Expedia](./choice_learn/datasets/expedia.py) dataset from [Kaggle](https://www.kaggle.com/c/expedia-personalized-sort) [[6]](#citation)
 
 ### Models
 - Ready-to-use models:
@@ -124,17 +125,29 @@ model = ConditionalMNL(optimizer="lbfgs")
 
 # add_coefficients adds one coefficient for each specified item_index
 # intercept, and income are added for each item except the first one that needs to be zeroed
-model.add_coefficients(coefficient_name="beta_inter", feature_name="intercept", items_indexes=[1, 2, 3])
-model.add_coefficients(coefficient_name="beta_income", feature_name="income", items_indexes=[1, 2, 3])
+model.add_coefficients(coefficient_name="beta_inter",
+                       feature_name="intercept",
+                       items_indexes=[1, 2, 3])
+model.add_coefficients(coefficient_name="beta_income",
+                       feature_name="income",
+                       items_indexes=[1, 2, 3])
 
 # ivt is added for each item:
-model.add_coefficients(coefficient_name="beta_ivt", feature_name="ivt", items_indexes=[0, 1, 2, 3])
+model.add_coefficients(coefficient_name="beta_ivt",
+                       feature_name="ivt",
+                       items_indexes=[0, 1, 2, 3])
 
 # shared_coefficient add one coefficient that is used for all items specified in the items_indexes:
 # Here, cost, freq and ovt coefficients are shared between all items
-model.add_shared_coefficient(coefficient_name="beta_cost", feature_name="cost", items_indexes=[0, 1, 2, 3])
-model.add_shared_coefficient(coefficient_name="beta_freq", feature_name="freq", items_indexes=[0, 1, 2, 3])
-model.add_shared_coefficient(coefficient_name="beta_ovt", feature_name="ovt", items_indexes=[0, 1, 2, 3])
+model.add_shared_coefficient(coefficient_name="beta_cost",
+                             feature_name="cost",
+                             items_indexes=[0, 1, 2, 3])
+model.add_shared_coefficient(coefficient_name="beta_freq",
+                             feature_name="freq",
+                             items_indexes=[0, 1, 2, 3])
+model.add_shared_coefficient(coefficient_name="beta_ovt",
+                             feature_name="ovt",
+                             items_indexes=[0, 1, 2, 3])
 
 history = model.fit(dataset, epochs=1000, get_report=True)
 print("The average neg-loglikelihood is:", model.evaluate(dataset).numpy())
@@ -157,7 +170,8 @@ A detailed documentation of this project is available [here](https://artefactory
 [2][The Acceptance of Model Innovation: The Case of Swissmetro](https://www.researchgate.net/publication/37456549_The_acceptance_of_modal_innovation_The_case_of_Swissmetro), Bierlaire, M.; Axhausen, K., W.; Abay, G. (2001)\
 [3][Applications and Interpretation of Nested Logit Models of Intercity Mode Choice](https://trid.trb.org/view/385097), Forinash, C., V.; Koppelman, F., S. (1993)\
 [4][The Demand for Local Telephone Service: A Fully Discrete Model of Residential Calling Patterns and Service Choices](https://www.jstor.org/stable/2555538), Train K., E.; McFadden, D., L.; Moshe, B. (1987)\
-[5] [Estimation of Travel Choice Models with Randomly Distributed Values of Time](https://ideas.repec.org/p/fth/lavaen/9303.html), Ben-Akiva M; Bolduc D; Bradley M(1993)
+[5] [Estimation of Travel Choice Models with Randomly Distributed Values of Time](https://ideas.repec.org/p/fth/lavaen/9303.html), Ben-Akiva M; Bolduc D; Bradley M(1993)\
+[6] [Personalize Expedia Hotel Searches - ICDM 2013](https://www.kaggle.com/c/expedia-personalized-sort), Ben Hamner, A.; Friedman, D.; SSA_Expedia. (2013)
 
 ### Code and Repositories
 - [1][RUMnet](https://github.com/antoinedesir/rumnet)
diff --git a/choice_learn/data/choice_dataset.py b/choice_learn/data/choice_dataset.py
index 5ba813ad..96b40a1c 100644
--- a/choice_learn/data/choice_dataset.py
+++ b/choice_learn/data/choice_dataset.py
@@ -375,9 +375,12 @@ def _build_features_by_ids(self):
                 "No features_names given, match with fiven features_by_ids impossible."
             )
         if (
-            self.fixed_items_features_names == (None,)
-            and self.contexts_features_names == (None,)
-            and self.contexts_items_features_names == (None,)
+            isinstance(self.fixed_items_features_names, tuple)
+            and self.fixed_items_features_names[0] is None
+            and isinstance(self.contexts_features_names, tuple)
+            and self.contexts_features_names[0] is None
+            and isinstance(self.contexts_features_names, tuple)
+            and self.contexts_features_names[0] is None
         ):
             raise ValueError(
                 "No features_names given, match with fiven features_by_ids impossible."
@@ -805,10 +808,9 @@ def from_single_wide_df(
                         raise ValueError(
                             f"More than one value for feature {feature} for item {item}"
                         )
-                    fixed_items_features[feature] = (
-                        fixed_items_features.get(feature, []),
-                        +[feature_value],
-                    )
+                    fixed_items_features[feature] = fixed_items_features.get(feature, []) + [
+                        feature_value[0]
+                    ]
             fixed_items_features = pd.DataFrame(fixed_items_features)
         elif fixed_items_prefixes is not None:
             fixed_items_features = {"item_id": []}
@@ -820,10 +822,9 @@ def from_single_wide_df(
                         raise ValueError(
                             f"More than one value for feature {feature} for item {item}"
                         )
-                    fixed_items_features[feature] = (
-                        fixed_items_features.get(feature, []),
-                        +[feature_value],
-                    )
+                    fixed_items_features[feature] = fixed_items_features.get(feature, []) + [
+                        feature_value[0]
+                    ]
             fixed_items_features = pd.DataFrame(fixed_items_features)
         else:
             fixed_items_features = None
diff --git a/choice_learn/datasets/base.py b/choice_learn/datasets/base.py
index 77b91959..e4ec0a39 100644
--- a/choice_learn/datasets/base.py
+++ b/choice_learn/datasets/base.py
@@ -11,6 +11,31 @@
 DATA_MODULE = "choice_learn.datasets.data"
 
 
+def get_path(data_file_name, module=DATA_MODULE):
+    """Function to get path toward data file.
+
+    Specifically used to handled Python 3.8 and 3.9+ differences in importlib.resources handling.
+    Parameters:
+    -----------
+    module : str, optional
+        path to directory containing the data file, by default DATA_MODULE
+    data_file_name : str
+        name of the csv file to load
+
+    Returns:
+    --------
+    Path
+        path to the data file
+    """
+    import sys
+
+    if sys.version >= "3.9":
+        return resources.files(module) / data_file_name
+
+    with resources.path(module, data_file_name) as path:
+        return path
+
+
 def load_csv(data_file_name, data_module=DATA_MODULE, encoding="utf-8"):
     """Base function to load csv files.
 
@@ -123,12 +148,14 @@ def load_swissmetro(add_items_one_hot=False, as_frame=False, return_desc=False,
     Ascona, Switzerland."""
 
     data_file_name = "swissmetro.csv.gz"
-    names, data = load_gzip(data_file_name)
-    data = data.astype(int)
+    full_path = get_path(data_file_name, module=DATA_MODULE)
+    swiss_df = pd.read_csv(full_path)
+    swiss_df["CAR_HE"] = 0.0
+    # names, data = load_gzip(data_file_name)
+    # data = data.astype(int)
 
     items = ["TRAIN", "SM", "CAR"]
-    items_features_names = []
-    session_features_names = [
+    contexts_features_names = [
         "GROUP",
         "PURPOSE",
         "FIRST",
@@ -142,20 +169,21 @@ def load_swissmetro(add_items_one_hot=False, as_frame=False, return_desc=False,
         "ORIGIN",
         "DEST",
     ]
-    sessions_items_features_names = ["TT", "CO", "HE"]
-    sessions_items_features_names = [
-        [f"{item}_{feature}" for feature in sessions_items_features_names] for item in items
-    ]
-    sessions_items_availabilities = ["TRAIN_AV", "SM_AV", "CAR_AV"]
+    contexts_items_features_names = ["CO", "TT", "HE", "SEATS"]
     choice_column = "CHOICE"
+    availabilities_column = "AV"
 
     if add_items_one_hot:
-        items_features = np.eye(len(items), dtype=np.float64)
         items_features_names = [f"oh_{item}" for item in items]
+        for item in items:
+            for item2 in items:
+                if item == item2:
+                    swiss_df[f"{item}_oh_{item}"] = 1
+                else:
+                    swiss_df[f"{item2}_oh_{item}"] = 0
     else:
-        items_features = None
         items_features_names = None
-
+    """
     # Adding dummy CAR_HE feature as 0 for consistency
     names.append("CAR_HE")
     data = np.hstack([data, np.zeros((data.shape[0], 1))])
@@ -177,15 +205,16 @@ def load_swissmetro(add_items_one_hot=False, as_frame=False, return_desc=False,
 
     # choices renormalization
     choices = choices - 1
+    """
 
     if return_desc:
         return description
 
     if as_frame:
-        return pd.DataFrame(data, columns=names)
+        return swiss_df
 
     if preprocessing == "tutorial":
-        swiss_df = pd.DataFrame(data, columns=names)
+        # swiss_df = pd.DataFrame(data, columns=names)
         # Removing unknown choices
         swiss_df = swiss_df.loc[swiss_df.CHOICE != 0]
         # Keep only commute an dbusiness trips
@@ -249,7 +278,7 @@ def load_swissmetro(add_items_one_hot=False, as_frame=False, return_desc=False,
             choices=choices,
         )
     if preprocessing == "rumnet":
-        swiss_df = pd.DataFrame(data, columns=names)
+        # swiss_df = pd.DataFrame(data, columns=names)
         swiss_df = swiss_df.loc[swiss_df.CHOICE != 0]
         choices = swiss_df.CHOICE.to_numpy() - 1
         contexts_items_availabilities = swiss_df[["TRAIN_AV", "SM_AV", "CAR_AV"]].to_numpy()
@@ -326,15 +355,15 @@ def load_swissmetro(add_items_one_hot=False, as_frame=False, return_desc=False,
             choices=choices,
         )
 
-    return ChoiceDataset(
-        fixed_items_features=items_features,
-        contexts_features=session_features,
-        contexts_items_features=sessions_items_features,
-        contexts_items_availabilities=sessions_items_availabilities,
-        choices=choices,
-        fixed_items_features_names=items_features_names,
-        contexts_features_names=session_features_names,
-        contexts_items_features_names=sessions_items_features_names,
+    return ChoiceDataset.from_single_wide_df(
+        df=swiss_df,
+        items_id=items,
+        fixed_items_suffixes=items_features_names,
+        contexts_features_columns=contexts_features_names,
+        contexts_items_features_suffixes=contexts_items_features_names,
+        contexts_items_availabilities_suffix=availabilities_column,
+        choices_column=choice_column,
+        choice_mode="item_index",
     )
 
 
@@ -389,9 +418,12 @@ def load_modecanada(
      nested logit models of intercity mode choice,” Transportation Research Record 1413, 98-106. """
     _ = to_wide
     data_file_name = "ModeCanada.csv.gz"
-    names, data = load_gzip(data_file_name)
-    names = [name.replace('"', "") for name in names]
-    canada_df = pd.DataFrame(data[:, 1:], index=data[:, 0].astype(int), columns=names[1:])
+    # names, data = load_gzip(data_file_name)
+    # names = [name.replace('"', "") for name in names]
+    # canada_df = pd.DataFrame(data[:, 1:], index=data[:, 0].astype(int), columns=names[1:])
+
+    full_path = get_path(data_file_name, module=DATA_MODULE)
+    canada_df = pd.read_csv(full_path)
     canada_df["alt"] = canada_df.apply(lambda row: row.alt.replace('"', ""), axis=1)
     # Just some typing
     canada_df.income = canada_df.income.astype("float32")
@@ -578,9 +610,9 @@ def load_heating(
     Train, K.E. (2003) Discrete Choice Methods with Simulation. Cambridge University Press."""
     _ = to_wide
     data_file_name = "heating_data.csv.gz"
-    names, data = load_gzip(data_file_name)
 
-    heating_df = pd.read_csv(resources.files(DATA_MODULE) / "heating_data.csv.gz")
+    full_path = get_path(data_file_name, module=DATA_MODULE)
+    heating_df = pd.read_csv(full_path)
 
     if return_desc:
         return desc
@@ -632,7 +664,7 @@ def load_electricity(
     """
     _ = to_wide
     data_file_name = "electricity.csv.gz"
-    names, data = load_gzip(data_file_name)
+    # names, data = load_gzip(data_file_name)
 
     description = """A sample of 2308 households in the United States.
     - choice: the choice of the individual, one of 1, 2, 3, 4,
@@ -657,7 +689,8 @@ def load_electricity(
     Train, K.E. (2003) Discrete Choice Methods with Simulation. Cambridge University Press.
     """
 
-    elec_df = pd.read_csv(resources.files(DATA_MODULE) / data_file_name)
+    full_path = get_path(data_file_name, module=DATA_MODULE)
+    elec_df = pd.read_csv(full_path)
     elec_df.choice = elec_df.choice.astype(int)
     elec_df[["pf", "cl", "loc", "wk", "tod", "seas"]] = elec_df[
         ["pf", "cl", "loc", "wk", "tod", "seas"]
@@ -706,9 +739,10 @@ def load_train(
     ”Papers 9303, Laval-Recherche en Energie. https://ideas.repec.org/p/fth/lavaen/9303.html."""
     _ = to_wide
     data_file_name = "train_data.csv.gz"
-    names, data = load_gzip(data_file_name)
+    # names, data = load_gzip(data_file_name)
 
-    train_df = pd.read_csv(resources.files(DATA_MODULE) / data_file_name)
+    full_path = get_path(data_file_name, module=DATA_MODULE)
+    train_df = pd.read_csv(full_path)
 
     if return_desc:
         return desc
diff --git a/choice_learn/datasets/data/__init__.py b/choice_learn/datasets/data/__init__.py
new file mode 100644
index 00000000..c24dcc41
--- /dev/null
+++ b/choice_learn/datasets/data/__init__.py
@@ -0,0 +1 @@
+"""Directory to store datasets as zipped .csv files."""
diff --git a/choice_learn/datasets/examples.py b/choice_learn/datasets/examples.py
index d25b7cc8..a5761f74 100644
--- a/choice_learn/datasets/examples.py
+++ b/choice_learn/datasets/examples.py
@@ -1,10 +1,10 @@
 """Some datasets used for personal examples."""
-from importlib import resources
 
 import numpy as np
 import pandas as pd
 
 from choice_learn.data.choice_dataset import ChoiceDataset
+from choice_learn.datasets.base import get_path
 
 DATA_MODULE = "choice_learn.datasets.data"
 
@@ -30,7 +30,8 @@ def load_tafeng(as_frame=False, return_desc=False, preprocessing=None):
         TaFeng Grocery Dataset.
     """
     filename = "ta_feng.csv.zip"
-    filepath = resources.files(DATA_MODULE) / filename
+
+    filepath = get_path(filename, module=DATA_MODULE)
     # url = "https://www.kaggle.com/datasets/chiranjivdas09/ta-feng-grocery-dataset/download?datasetVersionNumber=1"
     # if not os.path.exists(filepath):
     #     with urllib.request.urlopen(url) as f:
@@ -125,4 +126,4 @@ def load_tafeng(as_frame=False, return_desc=False, preprocessing=None):
             contexts_items_availabilities=np.ones((len(choices), 25)).astype("float32"),
         )
 
-    return tafeng_df
+    return load_tafeng(as_frame=False, preprocessing="assort_example")
diff --git a/choice_learn/datasets/expedia.py b/choice_learn/datasets/expedia.py
index 2b8f53a3..94838389 100644
--- a/choice_learn/datasets/expedia.py
+++ b/choice_learn/datasets/expedia.py
@@ -1,6 +1,4 @@
 """ICDM 2013 Expedia dataset."""
-import os
-from importlib import resources
 from pathlib import Path
 
 import numpy as np
@@ -8,6 +6,7 @@
 
 from choice_learn.data.choice_dataset import ChoiceDataset
 from choice_learn.data.storage import OneHotStorage
+from choice_learn.datasets.base import get_path
 
 DATA_MODULE = "choice_learn.datasets.data"
 
@@ -15,18 +14,16 @@
 def load_expedia(as_frame=False, preprocessing="rumnet"):
     """Load the Expedia dataset."""
     filename = "expedia.csv"
-    data_path = resources.files(DATA_MODULE)
-    if not Path.exists((data_path / filename)):
+    data_path = get_path(filename, module=DATA_MODULE)
+    if not Path.exists(data_path):
         print("In order to use the Expedia dataset, please download it from:")
         print("https://www.kaggle.com/c/expedia-personalized-sort")
         print("and save it in the following location:")
-        print(os.path.join(DATA_MODULE, filename))
+        print(data_path)
         print("The downloaded train.csv file should be named 'expedia.csv'")
-        raise FileNotFoundError(
-            f"File {filename} not found in {os.path.join(DATA_MODULE, filename)}"
-        )
+        raise FileNotFoundError(f"File {filename} not found in {data_path}")
 
-    expedia_df = pd.read_csv((data_path / filename))
+    expedia_df = pd.read_csv(data_path)
     if as_frame:
         return expedia_df
 
@@ -35,6 +32,20 @@ def load_expedia(as_frame=False, preprocessing="rumnet"):
         expedia_df.loc[:, "day_of_week"] = expedia_df.loc[:, "date_time"].dt.dayofweek
         expedia_df.loc[:, "month"] = expedia_df.loc[:, "date_time"].dt.month
         expedia_df.loc[:, "hour"] = expedia_df.loc[:, "date_time"].dt.hour
+
+        for id_col in [
+            "site_id",
+            "visitor_location_country_id",
+            "prop_country_id",
+            "srch_destination_id",
+        ]:
+            value_counts = expedia_df[["srch_id", id_col]].drop_duplicates()[id_col].value_counts()
+            kept_ids = value_counts.index[value_counts.gt(1000)]
+            for id_ in expedia_df[id_col].unique():
+                if id_ not in kept_ids:
+                    expedia_df.loc[expedia_df[id_col] == id_, id_col] = -1
+
+        # Filtering
         expedia_df = expedia_df[expedia_df.price_usd <= 1000]
         expedia_df = expedia_df[expedia_df.price_usd >= 10]
         expedia_df["log_price"] = expedia_df.price_usd.apply(np.log)
diff --git a/choice_learn/models/rumnet.py b/choice_learn/models/rumnet.py
index 2872bce7..3efd36ec 100644
--- a/choice_learn/models/rumnet.py
+++ b/choice_learn/models/rumnet.py
@@ -443,6 +443,17 @@ class PaperRUMnet(ChoiceModel):
     Representing Random Utility Choice Models with Neural Networks from Ali Aouad and Antoine Désir
     https://arxiv.org/abs/2207.12877
 
+    --- Attention: ---
+    Note that the model uses two type of features that are treated differently:
+        - customer features
+        - product features
+    >>> In this implementation, please make sure that the features are correctly formatted:
+        - customer features: (n_contexts, n_features) are given as 'contexts_features' in the
+        ChoiceDataset used to fit the model
+        - product features: (n_contexts, n_items, n_features) are given as 'contexts_items_features'
+        in the ChoiceDataset used to fit the model
+    ---
+
     Inherits from base_model.ChoiceModel
     TODO: Verify that all parameters are implemented.
     """
@@ -782,12 +793,19 @@ def batch_predict(
             probabilities, tf.reduce_sum(probabilities, axis=1, keepdims=True) + 1e-5
         )
 
-        batch_nll = self.loss(
-            y_pred=probabilities,
-            y_true=tf.one_hot(choices, depth=probabilities.shape[1]),
-            sample_weight=sample_weight,
-        )
-        return batch_nll, probabilities
+        batch_loss = {
+            "optimized_loss": self.loss(
+                y_pred=probabilities,
+                y_true=tf.one_hot(choices, depth=probabilities.shape[1]),
+                sample_weight=sample_weight,
+            ),
+            "NegativeLogLikelihood": tf.keras.losses.CategoricalCrossentropy()(
+                y_pred=probabilities,
+                y_true=tf.one_hot(choices, depth=probabilities.shape[1]),
+                sample_weight=sample_weight,
+            ),
+        }
+        return batch_loss, probabilities
 
 
 class CPURUMnet(PaperRUMnet):
@@ -831,9 +849,30 @@ def compute_batch_utility(
         """
         (_, _) = contexts_items_availabilities, choices
         ### Restacking of the item features
-        stacked_fixed_items_features = tf.concat([*fixed_items_features], axis=-1)
-        stacked_contexts_features = tf.concat([*contexts_features], axis=-1)
-        stacked_contexts_items_features = tf.concat([*contexts_items_features], axis=-1)
+        if fixed_items_features is not None and fixed_items_features[0] is not None:
+            stacked_fixed_items_features = tf.cast(
+                tf.concat([*fixed_items_features], axis=-1), tf.float32
+            )
+        else:
+            if contexts_items_features is None or contexts_items_features[0] is None:
+                raise ValueError("No item features provided")
+            stacked_fixed_items_features = tf.zeros((contexts_items_features[0].shape[1], 0))
+        if contexts_features is not None and contexts_features[0] is not None:
+            stacked_contexts_features = tf.cast(
+                tf.concat([*contexts_features], axis=-1), tf.float32
+            )
+        else:
+            raise ValueError("No Customer features provided")
+        if contexts_items_features is not None and contexts_items_features[0] is not None:
+            stacked_contexts_items_features = tf.cast(
+                tf.concat([*contexts_items_features], axis=-1), tf.float32
+            )
+        else:
+            if fixed_items_features is None or fixed_items_features[0] is None:
+                raise ValueError("No item features provided")
+            stacked_fixed_items_features = tf.zeros(
+                (contexts_items_features.shape[0], fixed_items_features[0].shape[0], 0)
+            )
 
         full_item_features = tf.stack(
             [stacked_fixed_items_features] * stacked_contexts_items_features.shape[0], axis=0
@@ -950,9 +989,24 @@ def compute_batch_utility(
         (_, _) = contexts_items_availabilities, choices
 
         ### Restacking of the item features
-        stacked_fixed_items_features = tf.concat([*fixed_items_features], axis=-1)
-        stacked_contexts_features = tf.concat([*contexts_features], axis=-1)
-        stacked_contexts_items_features = tf.concat([*contexts_items_features], axis=-1)
+        if fixed_items_features is not None and fixed_items_features[0] is not None:
+            stacked_fixed_items_features = tf.concat([*fixed_items_features], axis=-1)
+        else:
+            if contexts_items_features is None or contexts_items_features[0] is None:
+                raise ValueError("No item features provided")
+            stacked_fixed_items_features = tf.zeros((contexts_items_features.shape[1], 0))
+        if contexts_features is not None and contexts_features[0] is not None:
+            stacked_contexts_features = tf.concat([*contexts_features], axis=-1)
+        else:
+            raise ValueError("No Customer features provided")
+        if contexts_items_features is not None and contexts_items_features[0] is not None:
+            stacked_contexts_items_features = tf.concat([*contexts_items_features], axis=-1)
+        else:
+            if fixed_items_features is None or fixed_items_features[0] is None:
+                raise ValueError("No item features provided")
+            stacked_fixed_items_features = tf.zeros(
+                (contexts_items_features.shape[0], fixed_items_features.shape[0], 0)
+            )
 
         # Reshaping
         # Beware if contexts_items_features is None...!
diff --git a/choice_learn/toolbox/__init__.py b/choice_learn/toolbox/__init__.py
new file mode 100644
index 00000000..ef1f3eb0
--- /dev/null
+++ b/choice_learn/toolbox/__init__.py
@@ -0,0 +1 @@
+"""Different tools to help with choice models manipulation."""
diff --git a/notebooks/latent_class_model.ipynb b/notebooks/latent_class_model.ipynb
index 88cd8f10..59f4de11 100644
--- a/notebooks/latent_class_model.ipynb
+++ b/notebooks/latent_class_model.ipynb
@@ -22,7 +22,6 @@
     "\n",
     "sys.path.append(\"../\")\n",
     "\n",
-    "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
     "import pandas as pd\n",
     "\n",
diff --git a/notebooks/rumnet_example.ipynb b/notebooks/rumnet_example.ipynb
index 06034272..8f1f6284 100644
--- a/notebooks/rumnet_example.ipynb
+++ b/notebooks/rumnet_example.ipynb
@@ -271,19 +271,64 @@
     "print(\"Average LogLikeliHood on test:\", np.mean(test_eval))"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## A larger and more complex dataset: Expedia ICDM 2013\n",
+    "The RUMnet paper benchmarks the model on a second dataset. If you want to use it you need to download the file from [Kaggle](https://www.kaggle.com/c/expedia-personalized-sort) and place the train.csv file in the folder choice_learn/datasets/data with the name expedia.csv."
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "test_eval"
+    "from choice_learn.datasets import load_expedia\n",
+    "\n",
+    "expedia_dataset = load_expedia(preprocessing=\"rumnet\")"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
-   "source": []
+   "outputs": [],
+   "source": [
+    "test_dataset = expedia_dataset[int(len(expedia_dataset)*0.8):]\n",
+    "train_dataset = expedia_dataset[:int(len(expedia_dataset)*0.8)]\n",
+    "\n",
+    "model_args = {\n",
+    "    \"num_products_features\": 46,\n",
+    "    \"num_customer_features\": 84,\n",
+    "    \"width_eps_x\": 10,\n",
+    "    \"depth_eps_x\": 3,\n",
+    "    \"heterogeneity_x\": 5,\n",
+    "    \"width_eps_z\": 10,\n",
+    "    \"depth_eps_z\": 3,\n",
+    "    \"heterogeneity_z\": 5,\n",
+    "    \"width_u\": 10,\n",
+    "    \"depth_u\": 3,\n",
+    "    \"tol\": 0,\n",
+    "    \"optimizer\": \"Adam\",\n",
+    "    \"lr\": 0.001,\n",
+    "    \"logmin\": 1e-10,\n",
+    "    \"label_smoothing\": 0.02,\n",
+    "    \"callbacks\": [],\n",
+    "    \"epochs\": 15,\n",
+    "    \"batch_size\": 128,\n",
+    "    \"tol\": 1e-5,\n",
+    "}\n",
+    "model = RUMnet(**model_args)\n",
+    "model.instantiate()\n",
+    "\n",
+    "losses = model.fit(train_dataset, val_dataset=test_dataset)\n",
+    "probas = model.predict_probas(test_dataset)\n",
+    "test_loss = tf.keras.losses.CategoricalCrossentropy(from_logits=False)(y_pred=model.predict_probas(test_dataset), y_true=tf.one_hot(test_dataset.choices, 39))\n",
+    "\n",
+    "print(test_loss)"
+   ]
   }
  ],
  "metadata": {
diff --git a/notebooks/simple_mnl_mlogit.ipynb b/notebooks/simple_mnl_mlogit.ipynb
index e298dc3f..8d246cc5 100644
--- a/notebooks/simple_mnl_mlogit.ipynb
+++ b/notebooks/simple_mnl_mlogit.ipynb
@@ -22,7 +22,6 @@
     "\n",
     "sys.path.append(\"../\")\n",
     "\n",
-    "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
     "\n",
     "from choice_learn.models.simple_mnl import SimpleMNL\n",
@@ -36,7 +35,7 @@
    "source": [
     "Let's recreate this [tutorial](https://cran.r-project.org/web/packages/mlogit/vignettes/e1mlogit.html) by Yves Croissant for the mlogit R package.\n",
     "\n",
-    "It uses the Heating dataset, where we try to predict which heating harware a houseold will chose. The dataset is integrated in the package, you can find information [here]."
+    "It uses the Heating dataset, where we try to predict which heating hardware a houseold will chose available in choice_learn.datasets !"
    ]
   },
   {
diff --git a/requirements-complete.txt b/requirements-complete.txt
new file mode 100644
index 00000000..4d18e3a2
--- /dev/null
+++ b/requirements-complete.txt
@@ -0,0 +1,101 @@
+absl-py==1.4.0
+aiohttp==3.9.3
+aiosignal==1.2.0
+appnope==0.1.4
+asttokens==2.4.1
+astunparse==1.6.3
+async-timeout==4.0.3
+attrs==23.1.0
+backcall==0.2.0
+blinker==1.6.2
+Bottleneck==1.3.7
+Brotli==1.0.9
+cachetools==4.2.2
+certifi==2024.2.2
+cffi==1.16.0
+charset-normalizer==2.0.4
+click==8.1.7
+cloudpickle==2.2.1
+comm==0.2.2
+cryptography==41.0.3
+debugpy==1.6.7
+decorator==5.1.1
+dm-tree==0.1.7
+executing==2.0.1
+flatbuffers==2.0
+frozenlist==1.4.0
+gast==0.4.0
+google-auth==2.6.0
+google-auth-oauthlib==0.4.4
+google-pasta==0.2.0
+grpcio==1.42.0
+h5py==3.9.0
+idna==3.4
+importlib_metadata==7.0.2
+ipykernel==6.29.3
+ipython==8.12.0
+jax==0.3.25
+jaxlib==0.3.25
+jedi==0.19.1
+jupyter_client==8.6.1
+jupyter_core==5.7.2
+keras==2.11.0
+Keras-Preprocessing==1.1.2
+Markdown==3.4.1
+MarkupSafe==2.1.3
+matplotlib-inline==0.1.6
+multidict==6.0.4
+nest_asyncio==1.6.0
+numexpr==2.8.4
+numpy==1.24.3
+oauthlib==3.2.2
+opt-einsum==3.3.0
+packaging==24.0
+pandas==2.0.3
+parso==0.8.3
+pexpect==4.9.0
+pickleshare==0.7.5
+pip==23.3.1
+platformdirs==4.2.0
+pooch==1.7.0
+prompt-toolkit==3.0.42
+protobuf==3.20.3
+psutil==5.9.8
+ptyprocess==0.7.0
+pure-eval==0.2.2
+pyasn1==0.4.8
+pyasn1-modules==0.2.8
+pycparser==2.21
+Pygments==2.17.2
+PyJWT==2.4.0
+pyOpenSSL==23.2.0
+PySocks==1.7.1
+python-dateutil==2.8.2
+pytz==2023.3.post1
+pyzmq==24.0.1
+requests==2.31.0
+requests-oauthlib==1.3.0
+rsa==4.7.2
+scipy==1.10.1
+setuptools==68.2.2
+six==1.16.0
+stack-data==0.6.2
+tensorboard==2.11.0
+tensorboard-data-server==0.6.1
+tensorboard-plugin-wit==1.6.0
+tensorflow==2.11.0
+tensorflow-estimator==2.11.0
+tensorflow-probability==0.19.0
+termcolor==2.1.0
+tornado==6.4
+tqdm==4.65.0
+traitlets==5.14.2
+typing_extensions==4.10.0
+tzdata==2023.3
+urllib3==2.1.0
+wcwidth==0.2.13
+Werkzeug==2.3.8
+wheel==0.35.1
+wrapt==1.14.1
+yarl==1.9.3
+zipp==3.17.0
diff --git a/tests/unit_tests/test_os_datasets.py b/tests/unit_tests/test_os_datasets.py
new file mode 100644
index 00000000..8349c18a
--- /dev/null
+++ b/tests/unit_tests/test_os_datasets.py
@@ -0,0 +1,74 @@
+"""Unit testing for included Open Source datasets loaders."""
+import pandas as pd
+
+from choice_learn.data import ChoiceDataset
+from choice_learn.datasets import (
+    load_electricity,
+    load_heating,
+    load_modecanada,
+    load_swissmetro,
+    load_tafeng,
+    load_train,
+)
+
+
+def test_swissmetro_loader():
+    """Test loading the Swissmetro dataset."""
+    swissmetro = load_swissmetro(as_frame=True)
+    assert isinstance(swissmetro, pd.DataFrame)
+    assert swissmetro.shape == (10728, 29)
+
+    swissmetro = load_swissmetro()
+    assert isinstance(swissmetro, ChoiceDataset)
+    swissmetro = load_swissmetro(add_items_one_hot=True)
+    assert isinstance(swissmetro, ChoiceDataset)
+
+
+def test_modecanada_loader():
+    """Test loading the Canada dataset."""
+    canada = load_modecanada(as_frame=True)
+    assert isinstance(canada, pd.DataFrame)
+    assert canada.shape == (15520, 12)
+
+    canada = load_modecanada()
+    assert isinstance(canada, ChoiceDataset)
+
+
+def test_electricity_loader():
+    """Test loading the Electricity dataset."""
+    electricity = load_electricity(as_frame=True)
+    assert isinstance(electricity, pd.DataFrame)
+    assert electricity.shape == (17232, 10)
+
+    electricity = load_electricity()
+    assert isinstance(electricity, ChoiceDataset)
+
+
+def test_train_loader():
+    """Test loading the Train dataset."""
+    train = load_train(as_frame=True)
+    assert isinstance(train, pd.DataFrame)
+    assert train.shape == (2929, 11)
+
+    train = load_train()
+    assert isinstance(train, ChoiceDataset)
+
+
+def test_tafeng_loader():
+    """Test loading the TaFeng dataset."""
+    tafeng = load_tafeng(as_frame=True)
+    assert isinstance(tafeng, pd.DataFrame)
+    assert tafeng.shape == (817741, 9)
+
+    tafeng = load_tafeng()
+    assert isinstance(tafeng, ChoiceDataset)
+
+
+def test_heating_loader():
+    """Test loading the heating dataset."""
+    heating = load_heating(as_frame=True)
+    assert isinstance(heating, pd.DataFrame)
+    assert heating.shape == (900, 16)
+
+    heating = load_heating()
+    assert isinstance(heating, ChoiceDataset)