From 053658600de0c60f709e812e0a904a6046a7f897 Mon Sep 17 00:00:00 2001
From: Lilly <lilly.may@tum.de>
Date: Sat, 13 Apr 2024 09:18:23 +0200
Subject: [PATCH] Doc string improvements

---
 ehrapy/preprocessing/_bias.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/ehrapy/preprocessing/_bias.py b/ehrapy/preprocessing/_bias.py
index ee35f776..034601c1 100644
--- a/ehrapy/preprocessing/_bias.py
+++ b/ehrapy/preprocessing/_bias.py
@@ -14,7 +14,7 @@ def bias_detection(
     sensitive_features: Iterable[str] | Literal["all"],
     corr_threshold: float = 0.5,
     smd_threshold: float = 0.5,
-    feature_importance_threshold: float = 0.01,
+    feature_importance_threshold: float = 0.1,
     prediction_confidence_threshold: float = 0.5,
 ):
     """Detects bias in the data.
@@ -22,9 +22,12 @@ def bias_detection(
     Args:
         adata: An annotated data matrix containing patient data.
         sensitive_features: A list of sensitive features to check for bias.
-
-    Returns:
-        #TODO
+        corr_threshold: The threshold for the correlation coefficient between two features to be considered of interest. Defaults to 0.5.
+        smd_threshold: The threshold for the standardized mean difference between two features to be considered of interest. Defaults to 0.5.
+        feature_importance_threshold: The threshold for the feature importance of a sensitive feature for predicting another feature to be considered
+            of interest. Defaults to 0.1.
+        prediction_confidence_threshold: The threshold for the prediction confidence (R2 or accuracy) of a sensitive feature for predicting another
+            feature to be considered of interest. Defaults to 0.5.
     """
     from ehrapy.tools import rank_features_supervised
 
@@ -89,10 +92,10 @@ def _standardized_mean_differences(adata: AnnData, features: Iterable[str]) -> d
     Args:
         adata: An annotated data matrix containing patient data.
         features: A list of features to compute the standardized mean differences (SMD) for. For each listed feature, the SMD is computed for each
-            feature for all groups within the respected feature.
+            feature, comparing one group to the rest. Thus, we obtain a n_groups_in_feature x n_features matrix of SMDs for each listed feature.
 
     Returns:
-        A pandas DataFrame containing the standardized mean differences.
+        A dictionary mapping each feature to a pandas DataFrame containing the standardized mean differences.
     """
     df = anndata_to_df(adata)
     smd_results = {}  # type: ignore