From 877034d1e672206e98c170cbdb4ee240777487a9 Mon Sep 17 00:00:00 2001
From: eroell <eljas.roellin@ikmail.com>
Date: Sun, 1 Dec 2024 21:19:30 +0100
Subject: [PATCH 1/5] initial suggestions of array type checks on example of
 scale_norm

---
 ehrapy/preprocessing/_normalization.py    | 22 ++++++++++++++++++----
 tests/preprocessing/test_normalization.py | 19 ++++++++++++++++++-
 2 files changed, 36 insertions(+), 5 deletions(-)

diff --git a/ehrapy/preprocessing/_normalization.py b/ehrapy/preprocessing/_normalization.py
index de6cf646..d6bd5f22 100644
--- a/ehrapy/preprocessing/_normalization.py
+++ b/ehrapy/preprocessing/_normalization.py
@@ -1,7 +1,9 @@
 from __future__ import annotations
 
+from functools import singledispatch
 from typing import TYPE_CHECKING
 
+import dask.array as da
 import numpy as np
 import sklearn.preprocessing as sklearn_pp
 
@@ -69,6 +71,21 @@ def _scale_func_group(
         return None
 
 
+@singledispatch
+def _scale_norm_function(arr):
+    raise NotImplementedError(f"scale_norm does not support data to be of type {type(arr)}")
+
+
+@_scale_norm_function.register
+def _(arr: np.ndarray, **kwargs):
+    return sklearn_pp.StandardScaler(**kwargs).fit_transform
+
+
+@_scale_norm_function.register
+def _(arr: da.Array, **kwargs):
+    return sklearn_pp.StandardScaler(**kwargs).fit_transform
+
+
 def scale_norm(
     adata: AnnData,
     vars: str | Sequence[str] | None = None,
@@ -98,10 +115,7 @@ def scale_norm(
         >>> adata_norm = ep.pp.scale_norm(adata, copy=True)
     """
 
-    if is_dask_array(adata.X):
-        scale_func = daskml_pp.StandardScaler(**kwargs).fit_transform
-    else:
-        scale_func = sklearn_pp.StandardScaler(**kwargs).fit_transform
+    scale_func = _scale_norm_function(adata.X, **kwargs)
 
     return _scale_func_group(
         adata=adata,
diff --git a/tests/preprocessing/test_normalization.py b/tests/preprocessing/test_normalization.py
index 75df11ae..bbdecfa0 100644
--- a/tests/preprocessing/test_normalization.py
+++ b/tests/preprocessing/test_normalization.py
@@ -2,6 +2,7 @@
 from collections import OrderedDict
 from pathlib import Path
 
+import dask.array as da
 import numpy as np
 import pandas as pd
 import pytest
@@ -13,6 +14,7 @@
 from tests.conftest import ARRAY_TYPES, TEST_DATA_PATH
 
 CURRENT_DIR = Path(__file__).parent
+from scipy import sparse
 
 
 @pytest.fixture
@@ -74,7 +76,14 @@ def test_vars_checks(adata_to_norm):
         ep.pp.scale_norm(adata_to_norm, vars=["String1"])
 
 
-@pytest.mark.parametrize("array_type", ARRAY_TYPES)
+# TODO: where to list the supported types?
+norm_scale_supported_types = [np.asarray, da.asarray]
+norm_scale_unsupported_types = [sparse.csc_matrix]
+
+
+# TODO: find consens for "minimal" test of ehrapy functions when make this casting test. vanilla settings, all defaults?
+# even test for value matchings?
+@pytest.mark.parametrize("array_type", norm_scale_supported_types)
 def test_norm_scale(array_type, adata_to_norm):
     """Test for the scaling normalization method."""
     warnings.filterwarnings("ignore")
@@ -94,6 +103,14 @@ def test_norm_scale(array_type, adata_to_norm):
     assert np.allclose(adata_norm.X[:, 5], adata_to_norm_casted.X[:, 5], equal_nan=True)
 
 
+@pytest.mark.parametrize("array_type", norm_scale_unsupported_types)
+def test_norm_scale_notimplemented(array_type, adata_to_norm):
+    adata_to_norm_casted = adata_to_norm.copy()
+    adata_to_norm_casted.X = array_type(adata_to_norm_casted.X)
+    with pytest.raises(NotImplementedError):
+        ep.pp.scale_norm(adata_to_norm_casted)
+
+
 @pytest.mark.parametrize("array_type", ARRAY_TYPES)
 def test_norm_scale_kwargs(array_type, adata_to_norm):
     adata_to_norm_casted = adata_to_norm.copy()

From d6dc2c99c537beb13c00b5c74e7e148d8dc6ac53 Mon Sep 17 00:00:00 2001
From: eroell <eljas.roellin@ikmail.com>
Date: Fri, 6 Dec 2024 17:19:53 +0100
Subject: [PATCH 2/5] singledispatch normalization functions and test them

---
 ehrapy/preprocessing/_normalization.py    |  96 +++++++++++----
 tests/preprocessing/test_normalization.py | 135 +++++++++++++++++++---
 2 files changed, 196 insertions(+), 35 deletions(-)

diff --git a/ehrapy/preprocessing/_normalization.py b/ehrapy/preprocessing/_normalization.py
index d6bd5f22..ca18c4cb 100644
--- a/ehrapy/preprocessing/_normalization.py
+++ b/ehrapy/preprocessing/_normalization.py
@@ -83,7 +83,7 @@ def _(arr: np.ndarray, **kwargs):
 
 @_scale_norm_function.register
 def _(arr: da.Array, **kwargs):
-    return sklearn_pp.StandardScaler(**kwargs).fit_transform
+    return daskml_pp.StandardScaler(**kwargs).fit_transform
 
 
 def scale_norm(
@@ -127,6 +127,21 @@ def scale_norm(
     )
 
 
+@singledispatch
+def _minmax_norm_function(arr):
+    raise NotImplementedError(f"minmax_norm does not support data to be of type {type(arr)}")
+
+
+@_minmax_norm_function.register
+def _(arr: np.ndarray, **kwargs):
+    return sklearn_pp.MinMaxScaler(**kwargs).fit_transform
+
+
+@_minmax_norm_function.register
+def _(arr: da.Array, **kwargs):
+    return daskml_pp.MinMaxScaler(**kwargs).fit_transform
+
+
 def minmax_norm(
     adata: AnnData,
     vars: str | Sequence[str] | None = None,
@@ -157,10 +172,7 @@ def minmax_norm(
         >>> adata_norm = ep.pp.minmax_norm(adata, copy=True)
     """
 
-    if is_dask_array(adata.X):
-        scale_func = daskml_pp.MinMaxScaler(**kwargs).fit_transform
-    else:
-        scale_func = sklearn_pp.MinMaxScaler(**kwargs).fit_transform
+    scale_func = _minmax_norm_function(adata.X, **kwargs)
 
     return _scale_func_group(
         adata=adata,
@@ -172,6 +184,16 @@ def minmax_norm(
     )
 
 
+@singledispatch
+def _maxabs_norm_function(arr):
+    raise NotImplementedError(f"maxabs_norm does not support data to be of type {type(arr)}")
+
+
+@_maxabs_norm_function.register
+def _(arr: np.ndarray):
+    return sklearn_pp.MaxAbsScaler().fit_transform
+
+
 def maxabs_norm(
     adata: AnnData,
     vars: str | Sequence[str] | None = None,
@@ -198,10 +220,8 @@ def maxabs_norm(
         >>> adata = ep.dt.mimic_2(encoded=True)
         >>> adata_norm = ep.pp.maxabs_norm(adata, copy=True)
     """
-    if is_dask_array(adata.X):
-        raise NotImplementedError("MaxAbsScaler is not implemented in dask_ml.")
-    else:
-        scale_func = sklearn_pp.MaxAbsScaler().fit_transform
+
+    scale_func = _maxabs_norm_function(adata.X)
 
     return _scale_func_group(
         adata=adata,
@@ -213,6 +233,21 @@ def maxabs_norm(
     )
 
 
+@singledispatch
+def _robust_scale_norm_function(arr, **kwargs):
+    raise NotImplementedError(f"robust_scale_norm does not support data to be of type {type(arr)}")
+
+
+@_robust_scale_norm_function.register
+def _(arr: np.ndarray, **kwargs):
+    return sklearn_pp.RobustScaler(**kwargs).fit_transform
+
+
+@_robust_scale_norm_function.register
+def _(arr: da.Array, **kwargs):
+    return daskml_pp.RobustScaler(**kwargs).fit_transform
+
+
 def robust_scale_norm(
     adata: AnnData,
     vars: str | Sequence[str] | None = None,
@@ -243,10 +278,8 @@ def robust_scale_norm(
         >>> adata = ep.dt.mimic_2(encoded=True)
         >>> adata_norm = ep.pp.robust_scale_norm(adata, copy=True)
     """
-    if is_dask_array(adata.X):
-        scale_func = daskml_pp.RobustScaler(**kwargs).fit_transform
-    else:
-        scale_func = sklearn_pp.RobustScaler(**kwargs).fit_transform
+
+    scale_func = _robust_scale_norm_function(adata.X, **kwargs)
 
     return _scale_func_group(
         adata=adata,
@@ -258,6 +291,21 @@ def robust_scale_norm(
     )
 
 
+@singledispatch
+def _quantile_norm_function(arr):
+    raise NotImplementedError(f"robust_scale_norm does not support data to be of type {type(arr)}")
+
+
+@_quantile_norm_function.register
+def _(arr: np.ndarray, **kwargs):
+    return sklearn_pp.QuantileTransformer(**kwargs).fit_transform
+
+
+@_quantile_norm_function.register
+def _(arr: da.Array, **kwargs):
+    return daskml_pp.QuantileTransformer(**kwargs).fit_transform
+
+
 def quantile_norm(
     adata: AnnData,
     vars: str | Sequence[str] | None = None,
@@ -287,10 +335,8 @@ def quantile_norm(
         >>> adata = ep.dt.mimic_2(encoded=True)
         >>> adata_norm = ep.pp.quantile_norm(adata, copy=True)
     """
-    if is_dask_array(adata.X):
-        scale_func = daskml_pp.QuantileTransformer(**kwargs).fit_transform
-    else:
-        scale_func = sklearn_pp.QuantileTransformer(**kwargs).fit_transform
+
+    scale_func = _quantile_norm_function(adata.X, **kwargs)
 
     return _scale_func_group(
         adata=adata,
@@ -302,6 +348,16 @@ def quantile_norm(
     )
 
 
+@singledispatch
+def _power_norm_function(arr, **kwargs):
+    raise NotImplementedError(f"power_norm does not support data to be of type {type(arr)}")
+
+
+@_power_norm_function.register
+def _(arr: np.ndarray, **kwargs):
+    return sklearn_pp.PowerTransformer(**kwargs).fit_transform
+
+
 def power_norm(
     adata: AnnData,
     vars: str | Sequence[str] | None = None,
@@ -331,10 +387,8 @@ def power_norm(
         >>> adata = ep.dt.mimic_2(encoded=True)
         >>> adata_norm = ep.pp.power_norm(adata, copy=True)
     """
-    if is_dask_array(adata.X):
-        raise NotImplementedError("dask-ml has no PowerTransformer, this is only available in scikit-learn")
-    else:
-        scale_func = sklearn_pp.PowerTransformer(**kwargs).fit_transform
+
+    scale_func = _power_norm_function(adata.X, **kwargs)
 
     return _scale_func_group(
         adata=adata,
diff --git a/tests/preprocessing/test_normalization.py b/tests/preprocessing/test_normalization.py
index 048aabd4..5f172563 100644
--- a/tests/preprocessing/test_normalization.py
+++ b/tests/preprocessing/test_normalization.py
@@ -89,19 +89,35 @@ def test_vars_checks(adata_to_norm):
         ep.pp.scale_norm(adata_to_norm, vars=["String1"])
 
 
-# TODO: where to list the supported types?
+# TODO: list the supported array types centrally?
 norm_scale_supported_types = [np.asarray, da.asarray]
 norm_scale_unsupported_types = [sparse.csc_matrix]
 
 
-# TODO: find consens for "minimal" test of ehrapy functions when make this casting test. vanilla settings, all defaults?
-# even test for value matchings?
-@pytest.mark.parametrize("array_type", norm_scale_supported_types)
-def test_norm_scale(array_type, adata_to_norm):
+# TODO: check this for each function, with just default settings?
+@pytest.mark.parametrize(
+    "array_type,expected_error",
+    [
+        (np.array, None),
+        (da.array, None),
+        (sparse.csr_matrix, NotImplementedError),
+    ],
+)
+def test_norm_scale_array_types(adata_to_norm, array_type, expected_error):
+    adata_to_norm_casted = adata_to_norm.copy()
+    adata_to_norm_casted.X = array_type(adata_to_norm_casted.X)
+    if expected_error:
+        with pytest.raises(expected_error):
+            ep.pp.scale_norm(adata_to_norm_casted)
+
+
+@pytest.mark.parametrize("array_type", [np.array, da.array])
+def test_norm_scale(adata_to_norm, array_type):
     """Test for the scaling normalization method."""
     warnings.filterwarnings("ignore")
     adata_to_norm_casted = adata_to_norm.copy()
     adata_to_norm_casted.X = array_type(adata_to_norm_casted.X)
+    ep.pp.scale_norm(adata_to_norm_casted)
 
     adata_norm = ep.pp.scale_norm(adata_to_norm, copy=True)
 
@@ -137,14 +153,6 @@ def test_norm_scale_integers(adata_mini_integers_in_X):
     assert np.allclose(adata_norm.X, in_days_norm)
 
 
-@pytest.mark.parametrize("array_type", norm_scale_unsupported_types)
-def test_norm_scale_notimplemented(array_type, adata_to_norm):
-    adata_to_norm_casted = adata_to_norm.copy()
-    adata_to_norm_casted.X = array_type(adata_to_norm_casted.X)
-    with pytest.raises(NotImplementedError):
-        ep.pp.scale_norm(adata_to_norm_casted)
-
-
 @pytest.mark.parametrize("array_type", ARRAY_TYPES)
 def test_norm_scale_kwargs(array_type, adata_to_norm):
     adata_to_norm_casted = adata_to_norm.copy()
@@ -191,6 +199,23 @@ def test_norm_scale_group(array_type, adata_mini):
     assert np.allclose(adata_mini_norm.X[:, 2], col2_norm)
 
 
+@pytest.mark.parametrize(
+    "array_type,expected_error",
+    [
+        (np.array, None),
+        (da.array, None),
+        (sparse.csr_matrix, NotImplementedError),
+    ],
+)
+def test_norm_minmax_array_types(adata_to_norm, array_type, expected_error):
+    adata_to_norm_casted = adata_to_norm.copy()
+    print(adata_to_norm_casted.X)
+    adata_to_norm_casted.X = array_type(adata_to_norm_casted.X)
+    if expected_error:
+        with pytest.raises(expected_error):
+            ep.pp.minmax_norm(adata_to_norm_casted)
+
+
 @pytest.mark.parametrize("array_type", ARRAY_TYPES)
 def test_norm_minmax(array_type, adata_to_norm):
     """Test for the minmax normalization method."""
@@ -251,6 +276,24 @@ def test_norm_minmax_group(array_type, adata_mini):
     assert np.allclose(adata_mini_norm.X[:, 2], col2_norm)
 
 
+@pytest.mark.parametrize(
+    "array_type,expected_error",
+    [
+        (np.array, None),
+        (da.array, NotImplementedError),
+        (sparse.csr_matrix, NotImplementedError),
+    ],
+)
+def test_norm_maxabs_array_types(adata_to_norm, array_type, expected_error):
+    adata_to_norm_casted = adata_to_norm.copy()
+    adata_to_norm_casted.X = array_type(adata_to_norm_casted.X)
+    if expected_error:
+        with pytest.raises(expected_error):
+            ep.pp.maxabs_norm(adata_to_norm_casted)
+    else:
+        ep.pp.maxabs_norm(adata_to_norm_casted)
+
+
 @pytest.mark.parametrize("array_type", ARRAY_TYPES)
 def test_norm_maxabs(array_type, adata_to_norm):
     """Test for the maxabs normalization method."""
@@ -317,6 +360,22 @@ def test_norm_maxabs_group(array_type, adata_mini):
         assert np.allclose(adata_mini_norm.X[:, 2], col2_norm)
 
 
+@pytest.mark.parametrize(
+    "array_type,expected_error",
+    [
+        (np.array, None),
+        (da.array, None),
+        (sparse.csr_matrix, NotImplementedError),
+    ],
+)
+def test_norm_robust_scale_array_types(adata_to_norm, array_type, expected_error):
+    adata_to_norm_casted = adata_to_norm.copy()
+    adata_to_norm_casted.X = array_type(adata_to_norm_casted.X)
+    if expected_error:
+        with pytest.raises(expected_error):
+            ep.pp.robust_scale_norm(adata_to_norm_casted)
+
+
 @pytest.mark.parametrize("array_type", ARRAY_TYPES)
 def test_norm_robust_scale(array_type, adata_to_norm):
     """Test for the robust_scale normalization method."""
@@ -343,7 +402,7 @@ def test_norm_robust_scale_integers(adata_mini_integers_in_X):
 
 
 @pytest.mark.parametrize("array_type", ARRAY_TYPES)
-def test_norm_robust_scale_kwargs(array_type, adata_to_norm):
+def test_norm_robust_scale_kwargs(adata_to_norm, array_type):
     adata_to_norm_casted = adata_to_norm.copy()
     adata_to_norm_casted.X = array_type(adata_to_norm_casted.X)
 
@@ -380,6 +439,22 @@ def test_norm_robust_scale_group(array_type, adata_mini):
     assert np.allclose(adata_mini_norm.X[:, 2], col2_norm)
 
 
+@pytest.mark.parametrize(
+    "array_type,expected_error",
+    [
+        (np.array, None),
+        (da.array, None),
+        (sparse.csr_matrix, NotImplementedError),
+    ],
+)
+def test_norm_quantile_array_types(adata_to_norm, array_type, expected_error):
+    adata_to_norm_casted = adata_to_norm.copy()
+    adata_to_norm_casted.X = array_type(adata_to_norm_casted.X)
+    if expected_error:
+        with pytest.raises(expected_error):
+            ep.pp.quantile_norm(adata_to_norm_casted)
+
+
 @pytest.mark.parametrize("array_type", ARRAY_TYPES)
 def test_norm_quantile_uniform(array_type, adata_to_norm):
     """Test for the quantile normalization method."""
@@ -459,6 +534,22 @@ def test_norm_quantile_uniform_group(array_type, adata_mini):
     assert np.allclose(adata_mini_norm.X[:, 2], col2_norm)
 
 
+@pytest.mark.parametrize(
+    "array_type,expected_error",
+    [
+        (np.array, None),
+        (da.array, None),
+        (sparse.csr_matrix, NotImplementedError),
+    ],
+)
+def test_norm_power_array_types(adata_to_norm, array_type, expected_error):
+    adata_to_norm_casted = adata_to_norm.copy()
+    adata_to_norm_casted.X = array_type(adata_to_norm_casted.X)
+    if expected_error:
+        with pytest.raises(expected_error):
+            ep.pp.power_norm(adata_to_norm_casted)
+
+
 @pytest.mark.parametrize("array_type", ARRAY_TYPES)
 def test_norm_power(array_type, adata_to_norm):
     """Test for the power transformation normalization method."""
@@ -573,6 +664,22 @@ def test_norm_power_group(array_type, adata_mini):
         assert np.allclose(adata_mini_norm.X[:, 2], col2_norm, rtol=1e-02, atol=1e-02)
 
 
+@pytest.mark.parametrize(
+    "array_type,expected_error",
+    [
+        (np.array, None),
+        (da.array, None),
+        (sparse.csr_matrix, None),
+    ],
+)
+def test_norm_log_norm_array_types(adata_to_norm, array_type, expected_error):
+    adata_to_norm_casted = adata_to_norm.copy()
+    adata_to_norm_casted.X = array_type(adata_to_norm_casted.X)
+    if expected_error:
+        with pytest.raises(expected_error):
+            ep.pp.log_norm(adata_to_norm_casted)
+
+
 def test_norm_log1p(adata_to_norm):
     """Test for the log normalization method."""
     # Ensure that some test data is strictly positive

From 621ea970644c07ecfe949374746e80f3a69950b8 Mon Sep 17 00:00:00 2001
From: eroell <eljas.roellin@ikmail.com>
Date: Fri, 6 Dec 2024 17:40:37 +0100
Subject: [PATCH 3/5] try dask import

---
 ehrapy/preprocessing/_normalization.py | 38 +++++++++++++++++---------
 1 file changed, 25 insertions(+), 13 deletions(-)

diff --git a/ehrapy/preprocessing/_normalization.py b/ehrapy/preprocessing/_normalization.py
index ca18c4cb..af39879d 100644
--- a/ehrapy/preprocessing/_normalization.py
+++ b/ehrapy/preprocessing/_normalization.py
@@ -3,16 +3,20 @@
 from functools import singledispatch
 from typing import TYPE_CHECKING
 
-import dask.array as da
 import numpy as np
 import sklearn.preprocessing as sklearn_pp
 
 from ehrapy._compat import is_dask_array
 
 try:
+    import dask.array as da
     import dask_ml.preprocessing as daskml_pp
+
+    DASK_AVAILABLE = True
 except ImportError:
     daskml_pp = None
+    DASK_AVAILABLE = False
+
 
 from ehrapy.anndata.anndata_ext import (
     assert_numeric_vars,
@@ -81,9 +85,11 @@ def _(arr: np.ndarray, **kwargs):
     return sklearn_pp.StandardScaler(**kwargs).fit_transform
 
 
-@_scale_norm_function.register
-def _(arr: da.Array, **kwargs):
-    return daskml_pp.StandardScaler(**kwargs).fit_transform
+if DASK_AVAILABLE:
+
+    @_scale_norm_function.register
+    def _(arr: da.Array, **kwargs):
+        return daskml_pp.StandardScaler(**kwargs).fit_transform
 
 
 def scale_norm(
@@ -137,9 +143,11 @@ def _(arr: np.ndarray, **kwargs):
     return sklearn_pp.MinMaxScaler(**kwargs).fit_transform
 
 
-@_minmax_norm_function.register
-def _(arr: da.Array, **kwargs):
-    return daskml_pp.MinMaxScaler(**kwargs).fit_transform
+if DASK_AVAILABLE:
+
+    @_minmax_norm_function.register
+    def _(arr: da.Array, **kwargs):
+        return daskml_pp.MinMaxScaler(**kwargs).fit_transform
 
 
 def minmax_norm(
@@ -243,9 +251,11 @@ def _(arr: np.ndarray, **kwargs):
     return sklearn_pp.RobustScaler(**kwargs).fit_transform
 
 
-@_robust_scale_norm_function.register
-def _(arr: da.Array, **kwargs):
-    return daskml_pp.RobustScaler(**kwargs).fit_transform
+if DASK_AVAILABLE:
+
+    @_robust_scale_norm_function.register
+    def _(arr: da.Array, **kwargs):
+        return daskml_pp.RobustScaler(**kwargs).fit_transform
 
 
 def robust_scale_norm(
@@ -301,9 +311,11 @@ def _(arr: np.ndarray, **kwargs):
     return sklearn_pp.QuantileTransformer(**kwargs).fit_transform
 
 
-@_quantile_norm_function.register
-def _(arr: da.Array, **kwargs):
-    return daskml_pp.QuantileTransformer(**kwargs).fit_transform
+if DASK_AVAILABLE:
+
+    @_quantile_norm_function.register
+    def _(arr: da.Array, **kwargs):
+        return daskml_pp.QuantileTransformer(**kwargs).fit_transform
 
 
 def quantile_norm(

From 33192d13780776ab794d95eeba81ec510d0af971 Mon Sep 17 00:00:00 2001
From: eroell <eljas.roellin@ikmail.com>
Date: Thu, 12 Dec 2024 17:22:28 +0100
Subject: [PATCH 4/5] doc build fix

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 78931dcc..7bc35d29 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -99,7 +99,7 @@ docs = [
     "nbsphinx-link",
     "ipykernel",
     "ipython",
-    "ehrapy[dask,medcat]",
+    "ehrapy[dask]",
 ]
 test = [
     "ehrapy[dask]",

From 48f59368a4f0a2a12f88e9f61e39086312afb6af Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 12 Dec 2024 16:25:48 +0000
Subject: [PATCH 5/5] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .github/pull_request_template.md |  8 ++++----
 CODE_OF_CONDUCT.md               | 28 ++++++++++++++--------------
 README.md                        |  8 ++++----
 docs/contributing.md             | 18 +++++++++---------
 docs/index.md                    |  6 +++---
 docs/installation.md             |  8 ++++----
 6 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
index 4214c3b8..0bafff61 100644
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -4,10 +4,10 @@
 
 <!-- Please fill in the appropriate checklist below (delete whatever is not relevant). These are the most common things requested on pull requests (PRs). -->
 
--   [ ] This comment contains a description of changes (with reason)
--   [ ] Referenced issue is linked
--   [ ] If you've fixed a bug or added code that should be tested, add tests!
--   [ ] Documentation in `docs` is updated
+- [ ] This comment contains a description of changes (with reason)
+- [ ] Referenced issue is linked
+- [ ] If you've fixed a bug or added code that should be tested, add tests!
+- [ ] Documentation in `docs` is updated
 
 **Description of changes**
 
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
index d6209cca..39816a93 100644
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@@ -14,23 +14,23 @@ religion, or sexual identity and orientation.
 Examples of behavior that contributes to creating a positive environment
 include:
 
--   Using welcoming and inclusive language
--   Being respectful of differing viewpoints and experiences
--   Gracefully accepting constructive criticism
--   Focusing on what is best for the community
--   Showing empathy towards other community members
+- Using welcoming and inclusive language
+- Being respectful of differing viewpoints and experiences
+- Gracefully accepting constructive criticism
+- Focusing on what is best for the community
+- Showing empathy towards other community members
 
 Examples of unacceptable behavior by participants include:
 
--   The use of sexualized language or imagery and unwelcome sexual
-    attention or advances
--   Trolling, insulting/derogatory comments, and personal or political
-    attacks
--   Public or private harassment
--   Publishing others’ private information, such as a physical or
-    electronic address, without explicit permission
--   Other conduct which could reasonably be considered inappropriate in a
-    professional setting
+- The use of sexualized language or imagery and unwelcome sexual
+  attention or advances
+- Trolling, insulting/derogatory comments, and personal or political
+  attacks
+- Public or private harassment
+- Publishing others’ private information, such as a physical or
+  electronic address, without explicit permission
+- Other conduct which could reasonably be considered inappropriate in a
+  professional setting
 
 ## Our Responsibilities
 
diff --git a/README.md b/README.md
index 32e66dec..6c4533b4 100644
--- a/README.md
+++ b/README.md
@@ -16,10 +16,10 @@
 
 ## Features
 
--   Exploratory and targeted analysis of Electronic Health Records
--   Quality control & preprocessing
--   Visualization & Exploration
--   Clustering & trajectory inference
+- Exploratory and targeted analysis of Electronic Health Records
+- Quality control & preprocessing
+- Visualization & Exploration
+- Clustering & trajectory inference
 
 ## Installation
 
diff --git a/docs/contributing.md b/docs/contributing.md
index ce5858eb..0a5b318e 100644
--- a/docs/contributing.md
+++ b/docs/contributing.md
@@ -126,11 +126,11 @@ in the cookiecutter-scverse template.
 
 Please write documentation for new or changed features and use-cases. This project uses [sphinx][] with the following features:
 
--   the [myst][] extension allows to write documentation in markdown/Markedly Structured Text
--   Google-style docstrings
--   Jupyter notebooks as tutorials through [myst-nb][] (See [Tutorials with myst-nb](#tutorials-with-myst-nb-and-jupyter-notebooks))
--   [Sphinx autodoc typehints][], to automatically reference annotated input and output types
--   Citations (like {cite:p}`Virshup_2023`) can be included with [sphinxcontrib-bibtex](https://sphinxcontrib-bibtex.readthedocs.io/)
+- the [myst][] extension allows to write documentation in markdown/Markedly Structured Text
+- Google-style docstrings
+- Jupyter notebooks as tutorials through [myst-nb][] (See [Tutorials with myst-nb](#tutorials-with-myst-nb-and-jupyter-notebooks))
+- [Sphinx autodoc typehints][], to automatically reference annotated input and output types
+- Citations (like {cite:p}`Virshup_2023`) can be included with [sphinxcontrib-bibtex](https://sphinxcontrib-bibtex.readthedocs.io/)
 
 See the [scanpy developer docs](https://scanpy.readthedocs.io/en/latest/dev/documentation.html) for more information
 on how to write documentation.
@@ -144,10 +144,10 @@ These notebooks come from [pert-tutorials](https://github.com/theislab/ehrapy-tu
 
 #### Hints
 
--   If you refer to objects from other packages, please add an entry to `intersphinx_mapping` in `docs/conf.py`. Only
-    if you do so can sphinx automatically create a link to the external documentation.
--   If building the documentation fails because of a missing link that is outside your control, you can add an entry to
-    the `nitpick_ignore` list in `docs/conf.py`
+- If you refer to objects from other packages, please add an entry to `intersphinx_mapping` in `docs/conf.py`. Only
+  if you do so can sphinx automatically create a link to the external documentation.
+- If building the documentation fails because of a missing link that is outside your control, you can add an entry to
+  the `nitpick_ignore` list in `docs/conf.py`
 
 #### Building the docs locally
 
diff --git a/docs/index.md b/docs/index.md
index 56cc3037..03a0987d 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -61,8 +61,8 @@ medRxiv 2023.12.11.23299816; doi: https://doi.org/10.1101/2023.12.11.23299816 ](
 
 # Indices and tables
 
--   {ref}`genindex`
--   {ref}`modindex`
--   {ref}`search`
+- {ref}`genindex`
+- {ref}`modindex`
+- {ref}`search`
 
 [scanpy genome biology (2018)]: https://doi.org/10.1186/s13059-017-1382-0
diff --git a/docs/installation.md b/docs/installation.md
index ba7010a9..b349394e 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -51,10 +51,10 @@ pip install ehrapy[medcat]
 
 Available language models are
 
--   en_core_web_md (python -m spacy download en_core_web_md)
--   en-core-sci-sm (pip install <https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_core_sci_sm-0.4.0.tar.gz>)
--   en-core-sci-md (pip install <https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_core_sci_md-0.4.0.tar.gz>)
--   en-core-sci-lg (pip install <https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_core_sci_lg-0.4.0.tar.gz>)
+- en_core_web_md (python -m spacy download en_core_web_md)
+- en-core-sci-sm (pip install <https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_core_sci_sm-0.4.0.tar.gz>)
+- en-core-sci-md (pip install <https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_core_sci_md-0.4.0.tar.gz>)
+- en-core-sci-lg (pip install <https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_core_sci_lg-0.4.0.tar.gz>)
 
 [github repo]: https://github.com/theislab/ehrapy
 [pip]: https://pip.pypa.io