From fcd761e1f622a0af99c6982dc9fdfddf2bf8a02a Mon Sep 17 00:00:00 2001
From: Sander Vanden Hautte <sander.vandenhautte@tobania.be>
Date: Fri, 6 Aug 2021 09:28:26 +0200
Subject: [PATCH 1/4] Issue #67: Complete review of the target encoder code for
 linear regression: mainly docstring modifications, but verified the approach,
 it will work for regression without changes.

---
 cobra/preprocessing/target_encoder.py | 141 +++++++++++++++-----------
 1 file changed, 84 insertions(+), 57 deletions(-)

diff --git a/cobra/preprocessing/target_encoder.py b/cobra/preprocessing/target_encoder.py
index 0351049..a828545 100644
--- a/cobra/preprocessing/target_encoder.py
+++ b/cobra/preprocessing/target_encoder.py
@@ -1,6 +1,6 @@
 """
 Incidence Replacement Module. The implementation is inspired by
-https://contrib.scikit-learn.org/categorical-encoding/index.html
+https://github.com/scikit-learn-contrib/category_encoders.
 
 Authors:
 
@@ -9,7 +9,6 @@
 """
 import logging
 
-#import numpy as np
 import pandas as pd
 from tqdm.auto import tqdm
 from sklearn.base import BaseEstimator
@@ -20,45 +19,69 @@
 
 class TargetEncoder(BaseEstimator):
 
-    """Target encoding for categorical features.
+    """Target encoding for categorical features, inspired by
+    http://contrib.scikit-learn.org/category_encoders/targetencoder.html.
 
     Replace each value of the categorical feature with the average of the
     target values (in case of a binary target, this is the incidence of the
     group). This encoding scheme is also called Mean encoding.
 
+    Note that, when applying this target encoding, values of the categorical
+    feature that have not been seen during fit will be imputed according to the
+    configured imputation strategy: replacement with the mean, minimum or
+    maximum value of the categorical variable.
+
     The main problem with Target encoding is overfitting; the fact that we are
     encoding the feature based on target classes may lead to data leakage,
-    rendering the feature biased. This can be solved using some type of
-    regularization. A popular way to handle this is to use cross-validation
-    and compute the means in each out-of-fold. However, the approach
-    implemented here makes use of additive smoothing
-    (https://en.wikipedia.org/wiki/Additive_smoothing)
+    rendering the feature biased.
+    This can be solved using some type of regularization. A popular way to
+    handle this is to use cross-validation and compute the means in each
+    out-of-fold. However, the approach implemented here makes use of
+    additive smoothing (https://en.wikipedia.org/wiki/Additive_smoothing).
+
+    In summary:
+
+    - with a binary classification target, a value of a categorical variable is
+    replaced with:
+
+    [count(variable=value) * P(target=1|variable=value) + weight * P(target=1)]
+    / [count(variable=value) + weight]
+
+    - with a regression target, a value of a categorical variable is replaced
+    with:
+
+    [count(variable=value) * E(target|variable=value) + weight * E(target)]
+    / [count(variable=value) + weight]
 
     Attributes
     ----------
     imputation_strategy : str
         in case there is a particular column which contains new categories,
         the encoding will lead to NULL values which should be imputed.
-        Valid strategies are to replace with the global mean of the train
-        set or the min (resp. max) incidence of the categories of that
-        particular variable.
+        Valid strategies then are to replace the NULL values with the global
+        mean of the train set or the min (resp. max) incidence of the
+        categories of that particular variable.
     weight : float
-        Smoothing parameters (non-negative). The higher the value of the
-        parameter, the bigger the contribution of the overall mean. When set to
-        zero, there is no smoothing (e.g. the pure target incidence is used).
+        Smoothing parameter (non-negative). The higher the value of the
+        parameter, the bigger the contribution of the overall mean of targets
+        learnt from all training data (prior) and the smaller the contribution
+        of the mean target learnt from data with the current categorical value
+        (posterior), so the bigger the smoothing (regularization) effect.
+        When set to zero, there is no smoothing (e.g. the mean target of the
+        current categorical value is used).
     """
 
-    valid_strategies = ("mean", "min", "max")
+    valid_imputation_strategies = ("mean", "min", "max")
 
     def __init__(self, weight: float=0.0,
                  imputation_strategy: str="mean"):
 
         if weight < 0:
             raise ValueError("The value of weight cannot be smaller than zero")
-        elif imputation_strategy not in self.valid_strategies:
+        elif imputation_strategy not in self.valid_imputation_strategies:
             raise ValueError("Valid options for 'imputation_strategy' are {}."
-                             " Got imputation_strategy={!r} instead"
-                             .format(self.valid_strategies,
+                             " Got imputation_strategy={!r} instead."
+                             .format(self.valid_imputation_strategies,
                                      imputation_strategy))
 
         self.weight = weight
@@ -69,7 +92,7 @@ def __init__(self, weight: float=0.0,
         self._global_mean = None
 
     def attributes_to_dict(self) -> dict:
-        """Return the attributes of TargetEncoder in a dictionary
+        """Return the attributes of TargetEncoder in a dictionary.
 
         Returns
         -------
@@ -98,13 +121,11 @@ def set_attributes_from_dict(self, params: dict):
             Contains the attributes of TargetEncoder with their
             names as key.
         """
-
         if "weight" in params and type(params["weight"]) == float:
             self.weight = params["weight"]
 
         if ("imputation_strategy" in params and
-                params["imputation_strategy"] in self.valid_strategies):
-
+                params["imputation_strategy"] in self.valid_imputation_strategies):
             self.imputation_strategy = params["imputation_strategy"]
 
         if "_global_mean" in params and type(params["_global_mean"]) == float:
@@ -128,7 +149,7 @@ def dict_to_series(key, value):
 
     def fit(self, data: pd.DataFrame, column_names: list,
             target_column: str):
-        """Fit the TargetEncoder to the data
+        """Fit the TargetEncoder to the data.
 
         Parameters
         ----------
@@ -140,7 +161,6 @@ def fit(self, data: pd.DataFrame, column_names: list,
         target_column : str
             Column name of the target
         """
-
         # compute global mean (target incidence in case of binary target)
         y = data[target_column]
         self._global_mean = y.sum() / y.count()
@@ -154,7 +174,9 @@ def fit(self, data: pd.DataFrame, column_names: list,
             self._mapping[column] = self._fit_column(data[column], y)
 
     def _fit_column(self, X: pd.Series, y: pd.Series) -> pd.Series:
-        """Summary
+        """Replace the values of a column, holding a categorical value,
+        with a new value reflecting the formulas mentioned in the docstring
+        of this class.
 
         Parameters
         ----------
@@ -162,100 +184,103 @@ def _fit_column(self, X: pd.Series, y: pd.Series) -> pd.Series:
             data used to compute the encoding mapping for an individual
             categorical variable.
         y : pd.Series
-            series containing the targets for each observation
+            series containing the targets for each observation (value) of
+            this categorical variable.
 
         Returns
         -------
         pd.Series
-            Mapping containing the value to replace each group of the
-            categorical with.
+            Mapping containing the new value to replace each distinct value
+            of the categorical variable with.
         """
         stats = y.groupby(X).agg(["mean", "count"])
 
-        # Note if self.weight = 0, we have the ordinary incidence replacement
-        numerator = (stats["count"]*stats["mean"]
+        # Note: if self.weight = 0, we have the ordinary incidence replacement
+        numerator = (stats["count"] * stats["mean"]
                      + self.weight * self._global_mean)
 
         denominator = stats["count"] + self.weight
 
-        return numerator/denominator
+        return numerator / denominator
 
     def transform(self, data: pd.DataFrame,
                   column_names: list) -> pd.DataFrame:
-        """Replace (e.g. encode) categories of each column with its average
-        incidence which was computed when the fit method was called
+        """Replace (e.g. encode) values of each categorical column with a
+        new value (reflecting the corresponding average target value,
+        optionally smoothed by a regularization weight),
+        which was computed when the fit method was called.
 
         Parameters
         ----------
-        X : pd.DataFrame
-            data to encode
+        data : pd.DataFrame
+            the data to encode.
         column_names : list
-             Columns of data to be encoded
+            the name of the categorical columns in the data to be encoded.
 
         Returns
         -------
         pd.DataFrame
-            transformed data
+            the resulting transformed data.
 
         Raises
         ------
         NotFittedError
             Exception when TargetEncoder was not fitted before calling this
-            method
-
+            method.
         """
         if (len(self._mapping) == 0) or (self._global_mean is None):
             msg = ("This {} instance is not fitted yet. Call 'fit' with "
                    "appropriate arguments before using this method.")
-
             raise NotFittedError(msg.format(self.__class__.__name__))
 
         for column in tqdm(column_names, desc="Applying target encoding..."):
-
             if column not in data.columns:
-                log.warning("Unknown column '{}' will be skipped"
+                log.warning("Unknown column '{}' will be skipped."
                             .format(column))
                 continue
             elif column not in self._mapping:
                 log.warning("Column '{}' is not in fitted output "
-                            "and will be skipped".format(column))
+                            "and will be skipped.".format(column))
                 continue
-
             data = self._transform_column(data, column)
 
         return data
 
     def _transform_column(self, data: pd.DataFrame,
                           column_name: str) -> pd.DataFrame:
-        """Replace (e.g. encode) categories of each column with its average
-        incidence which was computed when the fit method was called
+        """Replace (e.g. encode) values of a categorical column with a
+        new value (reflecting the corresponding average target value,
+        optionally smoothed by a regularization weight),
+        which was computed when the fit method was called.
 
         Parameters
         ----------
-        X : pd.DataFrame
-            data to encode
+        data : pd.DataFrame
+            the data to encode.
         column_name : str
-            Name of the column in data to be encoded
+            the name of the column in the data to be encoded.
 
         Returns
         -------
         pd.DataFrame
-            transformed data
+            the resulting transformed data.
         """
         new_column = TargetEncoder._clean_column_name(column_name)
 
-        # Convert dtype to float because when the original dtype
-        # is of type "category", the resulting dtype is also of type
-        # "category"
+        # Convert dtype to float, because when the original dtype
+        # is of type "category", the resulting dtype would otherwise also be of
+        # type "category":
         data[new_column] = (data[column_name].map(self._mapping[column_name])
                             .astype("float"))
 
         # In case of categorical data, it could be that new categories will
         # emerge which were not present in the train set, so this will result
-        # in missing values (which should be replaced)
+        # in missing values, which should be replaced according to the
+        # configured imputation strategy:
         if data[new_column].isnull().sum() > 0:
             if self.imputation_strategy == "mean":
-                data[new_column].fillna(self._global_mean, inplace=True)
+                data[new_column].fillna(self._global_mean,
+                                        inplace=True)
             elif self.imputation_strategy == "min":
                 data[new_column].fillna(data[new_column].min(),
                                         inplace=True)
@@ -282,14 +307,16 @@ def fit_transform(self, data: pd.DataFrame,
         Returns
         -------
         pd.DataFrame
-            data with additional discretized variables
+            data with additional columns, holding the target-encoded variables.
         """
         self.fit(data, column_names, target_column)
         return self.transform(data, column_names)
 
     @staticmethod
     def _clean_column_name(column_name: str) -> str:
-        """Clean column name string by removing "_bin" and adding "_enc"
+        """Generate a name for the new column that this target encoder
+        generates in the given data, by removing "_bin", "_processed" or
+        "_cleaned" from the original categorical column, and adding "_enc".
 
         Parameters
         ----------

From 872da784966a02d6097f2389a26446caf2c71791 Mon Sep 17 00:00:00 2001
From: Sander Vanden Hautte <sander.vandenhautte@tobania.be>
Date: Fri, 6 Aug 2021 15:35:45 +0200
Subject: [PATCH 2/4] Issue #67: Unit testing target encoder for linear
 regression.

---
 tests/preprocessing/test_target_encoder.py | 169 ++++++++++++++++++---
 1 file changed, 152 insertions(+), 17 deletions(-)

diff --git a/tests/preprocessing/test_target_encoder.py b/tests/preprocessing/test_target_encoder.py
index 609f9b1..d6007c9 100644
--- a/tests/preprocessing/test_target_encoder.py
+++ b/tests/preprocessing/test_target_encoder.py
@@ -1,18 +1,22 @@
 import pytest
 import pandas as pd
+from sklearn.exceptions import NotFittedError
 
 from cobra.preprocessing.target_encoder import TargetEncoder
 
 
 class TestTargetEncoder:
 
-    def test_target_encoder_constructor_value_error(self):
+    def test_target_encoder_constructor_weight_value_error(self):
         with pytest.raises(ValueError):
             TargetEncoder(weight=-1)
 
+    def test_target_encoder_constructor_imputation_value_error(self):
+        with pytest.raises(ValueError):
+            TargetEncoder(imputation_strategy="median")
+
     # Tests for attributes_attributes_to_dict and set_attributes_from_dict
     def test_target_encoder_attributes_to_dict(self):
-
         encoder = TargetEncoder()
 
         mapping_data = pd.Series(data=[0.333333, 0.50000, 0.666667],
@@ -40,7 +44,6 @@ def test_target_encoder_attributes_to_dict(self):
                              ["weight", "mapping"],
                              ids=["test_weight", "test_mapping"])
     def test_target_encoder_set_attributes_from_dict_unfitted(self, attribute):
-
         encoder = TargetEncoder()
 
         data = {"weight": 1.0}
@@ -58,7 +61,6 @@ def test_target_encoder_set_attributes_from_dict_unfitted(self, attribute):
             assert expected == actual
 
     def test_target_encoder_set_attributes_from_dict(self):
-
         encoder = TargetEncoder()
 
         data = {"weight": 0.0,
@@ -79,9 +81,8 @@ def test_target_encoder_set_attributes_from_dict(self):
 
         pd.testing.assert_series_equal(actual, expected)
 
-    # Tests for _fit_column
-    def test_target_encoder_fit_column(self):
-
+    # Tests for _fit_column:
+    def test_target_encoder_fit_column_binary_classification(self):
         df = pd.DataFrame({'variable': ['positive', 'positive', 'negative',
                                         'neutral', 'negative', 'positive',
                                         'negative', 'neutral', 'neutral',
@@ -98,8 +99,24 @@ def test_target_encoder_fit_column(self):
 
         pd.testing.assert_series_equal(actual, expected)
 
-    def test_target_encoder_fit_column_global_mean(self):
+    def test_target_encoder_fit_column_linear_regression(self):
+        df = pd.DataFrame({'variable': ['positive', 'positive', 'negative',
+                                        'neutral', 'negative', 'positive',
+                                        'negative', 'neutral', 'neutral',
+                                        'neutral', 'positive'],
+                           'target': [5, 4, -5, 0, -4, 5, -5, 0, 1, 0, 4]})
+
+        encoder = TargetEncoder()
+        encoder._global_mean = 0.454545
+        actual = encoder._fit_column(X=df.variable, y=df.target)
+
+        expected = pd.Series(data=[-4.666667, 0.250000, 4.500000],
+                             index=["negative", "neutral", "positive"])
+        expected.index.name = "variable"
+
+        pd.testing.assert_series_equal(actual, expected)
 
+    def test_target_encoder_fit_column_global_mean_binary_classification(self):
         df = pd.DataFrame({'variable': ['positive', 'positive', 'negative',
                                         'neutral', 'negative', 'positive',
                                         'negative', 'neutral', 'neutral',
@@ -117,9 +134,33 @@ def test_target_encoder_fit_column_global_mean(self):
 
         pd.testing.assert_series_equal(actual, expected)
 
-    # Tests for fit method
-    def test_target_encoder_fit(self):
+    def test_target_encoder_fit_column_global_mean_linear_regression(self):
+        df = pd.DataFrame({'variable': ['positive', 'positive', 'negative',
+                                        'neutral', 'negative', 'positive',
+                                        'negative', 'neutral', 'neutral',
+                                        'neutral', 'positive'],
+                           'target': [5, 4, -5, 0, -4, 5, -5, 0, 1, 0, 4]})
+
+        encoder = TargetEncoder(weight=1)
+        encoder._global_mean = 0.454545
+
+        actual = encoder._fit_column(X=df.variable, y=df.target)
+
+        # expected new value:
+        # [count of the value * its mean encoding + weight (= 1) * global mean]
+        # / [count of the value + weight (=1)].
+        expected = pd.Series(data=[(3 * -4.666667 + 1 * 0.454545) / (3 + 1),
+                                   (4 * 0.250000 + 1 * 0.454545) / (4 + 1),
+                                   (4 * 4.500000 + 1 * 0.454545) / (4 + 1)],
+                             index=["negative", "neutral", "positive"])
+        expected.index.name = "variable"
+
+        pd.testing.assert_series_equal(actual, expected)
 
+    # Tests for fit method
+    def test_target_encoder_fit_binary_classification(self):
+        # test_target_encoder_fit_column_linear_regression() tested on one
+        # column input as a numpy series; this test runs on a dataframe input.
         df = pd.DataFrame({'variable': ['positive', 'positive', 'negative',
                                         'neutral', 'negative', 'positive',
                                         'negative', 'neutral', 'neutral',
@@ -136,9 +177,41 @@ def test_target_encoder_fit(self):
 
         pd.testing.assert_series_equal(actual, expected)
 
+    def test_target_encoder_fit_linear_regression(self):
+        # test_target_encoder_fit_column_linear_regression() tested on one
+        # column input as a numpy series; this test runs on a dataframe input.
+        df = pd.DataFrame({'variable': ['positive', 'positive', 'negative',
+                                        'neutral', 'negative', 'positive',
+                                        'negative', 'neutral', 'neutral',
+                                        'neutral', 'positive'],
+                           'target': [5, 4, -5, 0, -4, 5, -5, 0, 1, 0, 4]})
+
+        encoder = TargetEncoder()
+        encoder.fit(data=df, column_names=["variable"], target_column="target")
+
+        expected = pd.Series(data=[-4.666667, 0.250000, 4.500000],
+                             index=["negative", "neutral", "positive"])
+        expected.index.name = "variable"
+        actual = encoder._mapping["variable"]
+
+        pd.testing.assert_series_equal(actual, expected)
+
     # Tests for transform method
-    def test_target_encoder_transform(self):
+    def test_target_encoder_transform_when_not_fitted(self):
+        df = pd.DataFrame({'variable': ['positive', 'positive', 'negative',
+                                        'neutral', 'negative', 'positive',
+                                        'negative', 'neutral', 'neutral',
+                                        'neutral'],
+                           'target': [1, 1, 0, 0, 1, 0, 0, 0, 1, 1]})
+
+        # inputs of TargetEncoder will be of dtype category
+        df["variable"] = df["variable"].astype("category")
 
+        encoder = TargetEncoder()
+        with pytest.raises(NotFittedError):
+            encoder.transform(data=df, column_names=["variable"])
+
+    def test_target_encoder_transform_binary_classification(self):
         df = pd.DataFrame({'variable': ['positive', 'positive', 'negative',
                                         'neutral', 'negative', 'positive',
                                         'negative', 'neutral', 'neutral',
@@ -159,8 +232,28 @@ def test_target_encoder_transform(self):
 
         pd.testing.assert_frame_equal(actual, expected)
 
-    def test_target_encoder_transform_new_category(self):
+    def test_target_encoder_transform_linear_regression(self):
+        df = pd.DataFrame({'variable': ['positive', 'positive', 'negative',
+                                        'neutral', 'negative', 'positive',
+                                        'negative', 'neutral', 'neutral',
+                                        'neutral', 'positive'],
+                           'target': [5, 4, -5, 0, -4, 5, -5, 0, 1, 0, 4]})
+
+        # inputs of TargetEncoder will be of dtype category
+        df["variable"] = df["variable"].astype("category")
+
+        expected = df.copy()
+        expected["variable_enc"] = [4.500000, 4.500000, -4.666667, 0.250000,
+                                    -4.666667, 4.500000, -4.666667, 0.250000,
+                                    0.250000, 0.250000, 4.500000]
+
+        encoder = TargetEncoder()
+        encoder.fit(data=df, column_names=["variable"], target_column="target")
+        actual = encoder.transform(data=df, column_names=["variable"])
+
+        pd.testing.assert_frame_equal(actual, expected)
 
+    def test_target_encoder_transform_new_category_binary_classification(self):
         df = pd.DataFrame({'variable': ['positive', 'positive', 'negative',
                                         'neutral', 'negative', 'positive',
                                         'negative', 'neutral', 'neutral',
@@ -185,10 +278,35 @@ def test_target_encoder_transform_new_category(self):
 
         pd.testing.assert_frame_equal(actual, expected)
 
-    # Tests for _clean_column_name
-    def test_target_encoder_clean_column_name(self):
+    def test_target_encoder_transform_new_category_linear_regression(self):
+        df = pd.DataFrame({'variable': ['positive', 'positive', 'negative',
+                                        'neutral', 'negative', 'positive',
+                                        'negative', 'neutral', 'neutral',
+                                        'neutral', 'positive'],
+                           'target': [5, 4, -5, 0, -4, 5, -5, 0, 1, 0, 4]})
 
-        column_name = "test_column"
+        df_appended = df.append({"variable": "new", "target": 10},
+                                ignore_index=True)
+
+        # inputs of TargetEncoder will be of dtype category
+        df["variable"] = df["variable"].astype("category")
+        df_appended["variable"] = df_appended["variable"].astype("category")
+
+        expected = df_appended.copy()
+        expected["variable_enc"] = [4.500000, 4.500000, -4.666667, 0.250000,
+                                    -4.666667, 4.500000, -4.666667, 0.250000,
+                                    0.250000, 0.250000, 4.500000,
+                                    -4.666667] # min imputation for new value
+
+        encoder = TargetEncoder(imputation_strategy="min")
+        encoder.fit(data=df, column_names=["variable"], target_column="target")
+        actual = encoder.transform(data=df_appended, column_names=["variable"])
+
+        pd.testing.assert_frame_equal(actual, expected)
+
+    # Tests for _clean_column_name:
+    def test_target_encoder_clean_column_name_binned_column(self):
+        column_name = "test_column_bin"
         expected = "test_column_enc"
 
         encoder = TargetEncoder()
@@ -196,9 +314,26 @@ def test_target_encoder_clean_column_name(self):
 
         assert actual == expected
 
-    def test_target_encoder_clean_column_name_binned_column(self):
+    def test_target_encoder_clean_column_name_processed_column(self):
+        column_name = "test_column_processed"
+        expected = "test_column_enc"
 
-        column_name = "test_column_bin"
+        encoder = TargetEncoder()
+        actual = encoder._clean_column_name(column_name)
+
+        assert actual == expected
+
+    def test_target_encoder_clean_column_name_cleaned_column(self):
+        column_name = "test_column_cleaned"
+        expected = "test_column_enc"
+
+        encoder = TargetEncoder()
+        actual = encoder._clean_column_name(column_name)
+
+        assert actual == expected
+
+    def test_target_encoder_clean_column_other_name(self):
+        column_name = "test_column"
         expected = "test_column_enc"
 
         encoder = TargetEncoder()

From c48919147ff9be666ab5aa74af398437852a2c2b Mon Sep 17 00:00:00 2001
From: Sander Vanden Hautte <sander.vandenhautte@tobania.be>
Date: Fri, 6 Aug 2021 16:11:39 +0200
Subject: [PATCH 3/4] Issue #67: Adding warning about potential overfitting in
 target encoding when weight=0.

---
 cobra/preprocessing/target_encoder.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/cobra/preprocessing/target_encoder.py b/cobra/preprocessing/target_encoder.py
index a828545..603779b 100644
--- a/cobra/preprocessing/target_encoder.py
+++ b/cobra/preprocessing/target_encoder.py
@@ -78,6 +78,10 @@ def __init__(self, weight: float=0.0,
 
         if weight < 0:
             raise ValueError("The value of weight cannot be smaller than zero")
+        elif weight == 0:
+            log.warning("The target encoder's additive smoothing weight is "
+                        "set to 0. This disables smoothing and may make the "
+                        "encoding prone to overfitting.")
         elif imputation_strategy not in self.valid_imputation_strategies:
             raise ValueError("Valid options for 'imputation_strategy' are {}."
                              " Got imputation_strategy={!r} instead."

From 0165d4af9ade6c6bed756d597b6e3eb226693aad Mon Sep 17 00:00:00 2001
From: Sander Vanden Hautte <sander.vandenhautte@tobania.be>
Date: Fri, 6 Aug 2021 16:12:57 +0200
Subject: [PATCH 4/4] Issue #67: Adding warning about potential overfitting in
 target encoding when weight=0. (part 2, fixing an oopsie.)

---
 cobra/preprocessing/target_encoder.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/cobra/preprocessing/target_encoder.py b/cobra/preprocessing/target_encoder.py
index 603779b..64cdfcd 100644
--- a/cobra/preprocessing/target_encoder.py
+++ b/cobra/preprocessing/target_encoder.py
@@ -78,16 +78,17 @@ def __init__(self, weight: float=0.0,
 
         if weight < 0:
             raise ValueError("The value of weight cannot be smaller than zero")
-        elif weight == 0:
-            log.warning("The target encoder's additive smoothing weight is "
-                        "set to 0. This disables smoothing and may make the "
-                        "encoding prone to overfitting.")
         elif imputation_strategy not in self.valid_imputation_strategies:
             raise ValueError("Valid options for 'imputation_strategy' are {}."
                              " Got imputation_strategy={!r} instead."
                              .format(self.valid_imputation_strategies,
                                      imputation_strategy))
 
+        if weight == 0:
+            log.warning("The target encoder's additive smoothing weight is "
+                        "set to 0. This disables smoothing and may make the "
+                        "encoding prone to overfitting.")
+
         self.weight = weight
         self.imputation_strategy = imputation_strategy