diff --git a/.github/workflows/run_notebooks.yml b/.github/workflows/run_notebooks.yml index f45fd96b..4af421c5 100644 --- a/.github/workflows/run_notebooks.yml +++ b/.github/workflows/run_notebooks.yml @@ -1,7 +1,6 @@ name: Run Notebooks on: - - push - pull_request jobs: diff --git a/ehrapy/_compat.py b/ehrapy/_compat.py index 5497eead..dc94a6d3 100644 --- a/ehrapy/_compat.py +++ b/ehrapy/_compat.py @@ -1,7 +1,6 @@ # Since we might check whether an object is an instance of dask.array.Array # without requiring dask installed in the environment. -# This would become obsolete should dask become a requirement for ehrapy - +from collections.abc import Callable try: import dask.array as da @@ -11,6 +10,12 @@ DASK_AVAILABLE = False +def _raise_array_type_not_implemented(func: Callable, type_: type) -> NotImplementedError: + raise NotImplementedError( + f"{func.__name__} does not support array type {type_}. Must be of type {func.registry.keys()}." # type: ignore + ) + + def is_dask_array(array): if DASK_AVAILABLE: return isinstance(array, da.Array) diff --git a/ehrapy/preprocessing/_normalization.py b/ehrapy/preprocessing/_normalization.py index de6cf646..67c41d02 100644 --- a/ehrapy/preprocessing/_normalization.py +++ b/ehrapy/preprocessing/_normalization.py @@ -1,16 +1,22 @@ from __future__ import annotations +from functools import singledispatch from typing import TYPE_CHECKING import numpy as np import sklearn.preprocessing as sklearn_pp -from ehrapy._compat import is_dask_array +from ehrapy._compat import _raise_array_type_not_implemented try: + import dask.array as da import dask_ml.preprocessing as daskml_pp + + DASK_AVAILABLE = True except ImportError: daskml_pp = None + DASK_AVAILABLE = False + from ehrapy.anndata.anndata_ext import ( assert_numeric_vars, @@ -69,6 +75,23 @@ def _scale_func_group( return None +@singledispatch +def _scale_norm_function(arr): + _raise_array_type_not_implemented(_scale_norm_function, type(arr)) + + +@_scale_norm_function.register +def _(arr: np.ndarray, **kwargs): + return sklearn_pp.StandardScaler(**kwargs).fit_transform + + +if DASK_AVAILABLE: + + @_scale_norm_function.register + def _(arr: da.Array, **kwargs): + return daskml_pp.StandardScaler(**kwargs).fit_transform + + def scale_norm( adata: AnnData, vars: str | Sequence[str] | None = None, @@ -98,10 +121,7 @@ def scale_norm( >>> adata_norm = ep.pp.scale_norm(adata, copy=True) """ - if is_dask_array(adata.X): - scale_func = daskml_pp.StandardScaler(**kwargs).fit_transform - else: - scale_func = sklearn_pp.StandardScaler(**kwargs).fit_transform + scale_func = _scale_norm_function(adata.X, **kwargs) return _scale_func_group( adata=adata, @@ -113,6 +133,23 @@ def scale_norm( ) +@singledispatch +def _minmax_norm_function(arr): + _raise_array_type_not_implemented(_minmax_norm_function, type(arr)) + + +@_minmax_norm_function.register +def _(arr: np.ndarray, **kwargs): + return sklearn_pp.MinMaxScaler(**kwargs).fit_transform + + +if DASK_AVAILABLE: + + @_minmax_norm_function.register + def _(arr: da.Array, **kwargs): + return daskml_pp.MinMaxScaler(**kwargs).fit_transform + + def minmax_norm( adata: AnnData, vars: str | Sequence[str] | None = None, @@ -143,10 +180,7 @@ def minmax_norm( >>> adata_norm = ep.pp.minmax_norm(adata, copy=True) """ - if is_dask_array(adata.X): - scale_func = daskml_pp.MinMaxScaler(**kwargs).fit_transform - else: - scale_func = sklearn_pp.MinMaxScaler(**kwargs).fit_transform + scale_func = _minmax_norm_function(adata.X, **kwargs) return _scale_func_group( adata=adata, @@ -158,6 +192,16 @@ def minmax_norm( ) +@singledispatch +def _maxabs_norm_function(arr): + _raise_array_type_not_implemented(_scale_norm_function, type(arr)) + + +@_maxabs_norm_function.register +def _(arr: np.ndarray): + return sklearn_pp.MaxAbsScaler().fit_transform + + def maxabs_norm( adata: AnnData, vars: str | Sequence[str] | None = None, @@ -184,10 +228,8 @@ def maxabs_norm( >>> adata = ep.dt.mimic_2(encoded=True) >>> adata_norm = ep.pp.maxabs_norm(adata, copy=True) """ - if is_dask_array(adata.X): - raise NotImplementedError("MaxAbsScaler is not implemented in dask_ml.") - else: - scale_func = sklearn_pp.MaxAbsScaler().fit_transform + + scale_func = _maxabs_norm_function(adata.X) return _scale_func_group( adata=adata, @@ -199,6 +241,23 @@ def maxabs_norm( ) +@singledispatch +def _robust_scale_norm_function(arr, **kwargs): + _raise_array_type_not_implemented(_robust_scale_norm_function, type(arr)) + + +@_robust_scale_norm_function.register +def _(arr: np.ndarray, **kwargs): + return sklearn_pp.RobustScaler(**kwargs).fit_transform + + +if DASK_AVAILABLE: + + @_robust_scale_norm_function.register + def _(arr: da.Array, **kwargs): + return daskml_pp.RobustScaler(**kwargs).fit_transform + + def robust_scale_norm( adata: AnnData, vars: str | Sequence[str] | None = None, @@ -229,10 +288,8 @@ def robust_scale_norm( >>> adata = ep.dt.mimic_2(encoded=True) >>> adata_norm = ep.pp.robust_scale_norm(adata, copy=True) """ - if is_dask_array(adata.X): - scale_func = daskml_pp.RobustScaler(**kwargs).fit_transform - else: - scale_func = sklearn_pp.RobustScaler(**kwargs).fit_transform + + scale_func = _robust_scale_norm_function(adata.X, **kwargs) return _scale_func_group( adata=adata, @@ -244,6 +301,23 @@ def robust_scale_norm( ) +@singledispatch +def _quantile_norm_function(arr): + _raise_array_type_not_implemented(_quantile_norm_function, type(arr)) + + +@_quantile_norm_function.register +def _(arr: np.ndarray, **kwargs): + return sklearn_pp.QuantileTransformer(**kwargs).fit_transform + + +if DASK_AVAILABLE: + + @_quantile_norm_function.register + def _(arr: da.Array, **kwargs): + return daskml_pp.QuantileTransformer(**kwargs).fit_transform + + def quantile_norm( adata: AnnData, vars: str | Sequence[str] | None = None, @@ -273,10 +347,8 @@ def quantile_norm( >>> adata = ep.dt.mimic_2(encoded=True) >>> adata_norm = ep.pp.quantile_norm(adata, copy=True) """ - if is_dask_array(adata.X): - scale_func = daskml_pp.QuantileTransformer(**kwargs).fit_transform - else: - scale_func = sklearn_pp.QuantileTransformer(**kwargs).fit_transform + + scale_func = _quantile_norm_function(adata.X, **kwargs) return _scale_func_group( adata=adata, @@ -288,6 +360,16 @@ def quantile_norm( ) +@singledispatch +def _power_norm_function(arr, **kwargs): + _raise_array_type_not_implemented(_power_norm_function, type(arr)) + + +@_power_norm_function.register +def _(arr: np.ndarray, **kwargs): + return sklearn_pp.PowerTransformer(**kwargs).fit_transform + + def power_norm( adata: AnnData, vars: str | Sequence[str] | None = None, @@ -317,10 +399,8 @@ def power_norm( >>> adata = ep.dt.mimic_2(encoded=True) >>> adata_norm = ep.pp.power_norm(adata, copy=True) """ - if is_dask_array(adata.X): - raise NotImplementedError("dask-ml has no PowerTransformer, this is only available in scikit-learn") - else: - scale_func = sklearn_pp.PowerTransformer(**kwargs).fit_transform + + scale_func = _power_norm_function(adata.X, **kwargs) return _scale_func_group( adata=adata, diff --git a/pyproject.toml b/pyproject.toml index 654d9092..55f2f277 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,7 +72,7 @@ medcat = [ "medcat", ] dask = [ - "dask", + "anndata[dask]", "dask-ml", ] dev = [ @@ -136,7 +136,8 @@ filterwarnings = [ "ignore:`flavor='seurat_v3'` expects raw count data, but non-integers were found:UserWarning", "ignore:All-NaN slice encountered:RuntimeWarning", "ignore:Observation names are not unique. To make them unique, call `.obs_names_make_unique`.:UserWarning", - "ignore:Trying to modify attribute .var of view" + "ignore:Trying to modify attribute `.var` of view, initializing view as actual.:anndata.ImplicitModificationWarning", + "ignore:Transforming to str index.:anndata.ImplicitModificationWarning:" ] minversion = 6.0 norecursedirs = [ '.*', 'build', 'dist', '*.egg', 'data', '__pycache__'] diff --git a/tests/preprocessing/test_normalization.py b/tests/preprocessing/test_normalization.py index 9b5b1c7b..249e6892 100644 --- a/tests/preprocessing/test_normalization.py +++ b/tests/preprocessing/test_normalization.py @@ -2,6 +2,7 @@ from collections import OrderedDict from pathlib import Path +import dask.array as da import numpy as np import pandas as pd import pytest @@ -13,6 +14,7 @@ from tests.conftest import ARRAY_TYPES, TEST_DATA_PATH CURRENT_DIR = Path(__file__).parent +from scipy import sparse @pytest.fixture @@ -87,24 +89,40 @@ def test_vars_checks(adata_to_norm): ep.pp.scale_norm(adata_to_norm, vars=["String1"]) -@pytest.mark.parametrize("array_type", ARRAY_TYPES) -def test_norm_scale(array_type, adata_to_norm): +# TODO: check this for each function, with just default settings? +@pytest.mark.parametrize( + "array_type,expected_error", + [ + (np.array, None), + (da.array, None), + (sparse.csr_matrix, NotImplementedError), + ], +) +def test_norm_scale_array_types(adata_to_norm, array_type, expected_error): + adata_to_norm.X = array_type(adata_to_norm.X) + if expected_error: + with pytest.raises(expected_error): + ep.pp.scale_norm(adata_to_norm) + + +@pytest.mark.parametrize("array_type", [np.array, da.array]) +def test_norm_scale(adata_to_norm, array_type): """Test for the scaling normalization method.""" warnings.filterwarnings("ignore") - adata_to_norm_casted = adata_to_norm.copy() - adata_to_norm_casted.X = array_type(adata_to_norm_casted.X) + adata_to_norm.X = array_type(adata_to_norm.X) + ep.pp.scale_norm(adata_to_norm) adata_norm = ep.pp.scale_norm(adata_to_norm, copy=True) num1_norm = np.array([-1.4039999, 0.55506986, 0.84893], dtype=np.float32) num2_norm = np.array([-1.3587323, 1.0190493, 0.3396831], dtype=np.float32) - assert np.array_equal(adata_norm.X[:, 0], adata_to_norm_casted.X[:, 0]) - assert np.array_equal(adata_norm.X[:, 1], adata_to_norm_casted.X[:, 1]) - assert np.array_equal(adata_norm.X[:, 2], adata_to_norm_casted.X[:, 2]) + assert np.array_equal(adata_norm.X[:, 0], adata_to_norm.X[:, 0]) + assert np.array_equal(adata_norm.X[:, 1], adata_to_norm.X[:, 1]) + assert np.array_equal(adata_norm.X[:, 2], adata_to_norm.X[:, 2]) assert np.allclose(adata_norm.X[:, 3], num1_norm) assert np.allclose(adata_norm.X[:, 4], num2_norm) - assert np.allclose(adata_norm.X[:, 5], adata_to_norm_casted.X[:, 5], equal_nan=True) + assert np.allclose(adata_norm.X[:, 5], adata_to_norm.X[:, 5], equal_nan=True) def test_norm_scale_integers(adata_mini_integers_in_X): @@ -130,8 +148,7 @@ def test_norm_scale_integers(adata_mini_integers_in_X): @pytest.mark.parametrize("array_type", ARRAY_TYPES) def test_norm_scale_kwargs(array_type, adata_to_norm): - adata_to_norm_casted = adata_to_norm.copy() - adata_to_norm_casted.X = array_type(adata_to_norm_casted.X) + adata_to_norm.X = array_type(adata_to_norm.X) adata_norm = ep.pp.scale_norm(adata_to_norm, copy=True, with_mean=False) @@ -174,23 +191,37 @@ def test_norm_scale_group(array_type, adata_mini): assert np.allclose(adata_mini_norm.X[:, 2], col2_norm) +@pytest.mark.parametrize( + "array_type,expected_error", + [ + (np.array, None), + (da.array, None), + (sparse.csr_matrix, NotImplementedError), + ], +) +def test_norm_minmax_array_types(adata_to_norm, array_type, expected_error): + adata_to_norm.X = array_type(adata_to_norm.X) + if expected_error: + with pytest.raises(expected_error): + ep.pp.minmax_norm(adata_to_norm) + + @pytest.mark.parametrize("array_type", ARRAY_TYPES) def test_norm_minmax(array_type, adata_to_norm): """Test for the minmax normalization method.""" - adata_to_norm_casted = adata_to_norm.copy() - adata_to_norm_casted.X = array_type(adata_to_norm_casted.X) + adata_to_norm.X = array_type(adata_to_norm.X) - adata_norm = ep.pp.minmax_norm(adata_to_norm_casted, copy=True) + adata_norm = ep.pp.minmax_norm(adata_to_norm, copy=True) num1_norm = np.array([0.0, 0.86956537, 0.9999999], dtype=np.dtype(np.float32)) num2_norm = np.array([0.0, 1.0, 0.71428573], dtype=np.float32) - assert np.array_equal(adata_norm.X[:, 0], adata_to_norm_casted.X[:, 0]) - assert np.array_equal(adata_norm.X[:, 1], adata_to_norm_casted.X[:, 1]) - assert np.array_equal(adata_norm.X[:, 2], adata_to_norm_casted.X[:, 2]) + assert np.array_equal(adata_norm.X[:, 0], adata_to_norm.X[:, 0]) + assert np.array_equal(adata_norm.X[:, 1], adata_to_norm.X[:, 1]) + assert np.array_equal(adata_norm.X[:, 2], adata_to_norm.X[:, 2]) assert np.allclose(adata_norm.X[:, 3], num1_norm) assert np.allclose(adata_norm.X[:, 4], num2_norm) - assert np.allclose(adata_norm.X[:, 5], adata_to_norm_casted.X[:, 5], equal_nan=True) + assert np.allclose(adata_norm.X[:, 5], adata_to_norm.X[:, 5], equal_nan=True) def test_norm_minmax_integers(adata_mini_integers_in_X): @@ -201,10 +232,9 @@ def test_norm_minmax_integers(adata_mini_integers_in_X): @pytest.mark.parametrize("array_type", ARRAY_TYPES) def test_norm_minmax_kwargs(array_type, adata_to_norm): - adata_to_norm_casted = adata_to_norm.copy() - adata_to_norm_casted.X = array_type(adata_to_norm_casted.X) + adata_to_norm.X = array_type(adata_to_norm.X) - adata_norm = ep.pp.minmax_norm(adata_to_norm_casted, copy=True, feature_range=(0, 2)) + adata_norm = ep.pp.minmax_norm(adata_to_norm, copy=True, feature_range=(0, 2)) num1_norm = np.array([0.0, 1.7391307, 1.9999998], dtype=np.float32) num2_norm = np.array([0.0, 2.0, 1.4285715], dtype=np.float32) @@ -234,28 +264,44 @@ def test_norm_minmax_group(array_type, adata_mini): assert np.allclose(adata_mini_norm.X[:, 2], col2_norm) +@pytest.mark.parametrize( + "array_type,expected_error", + [ + (np.array, None), + (da.array, NotImplementedError), + (sparse.csr_matrix, NotImplementedError), + ], +) +def test_norm_maxabs_array_types(adata_to_norm, array_type, expected_error): + adata_to_norm.X = array_type(adata_to_norm.X) + if expected_error: + with pytest.raises(expected_error): + ep.pp.maxabs_norm(adata_to_norm) + else: + ep.pp.maxabs_norm(adata_to_norm) + + @pytest.mark.parametrize("array_type", ARRAY_TYPES) def test_norm_maxabs(array_type, adata_to_norm): """Test for the maxabs normalization method.""" - adata_to_norm_casted = adata_to_norm.copy() - adata_to_norm_casted.X = array_type(adata_to_norm_casted.X) + adata_to_norm.X = array_type(adata_to_norm.X) if "dask" in array_type.__name__: with pytest.raises(NotImplementedError): - adata_norm = ep.pp.maxabs_norm(adata_to_norm_casted, copy=True) + adata_norm = ep.pp.maxabs_norm(adata_to_norm, copy=True) else: - adata_norm = ep.pp.maxabs_norm(adata_to_norm_casted, copy=True) + adata_norm = ep.pp.maxabs_norm(adata_to_norm, copy=True) num1_norm = np.array([0.5964913, 0.94736844, 1.0], dtype=np.float32) num2_norm = np.array([-0.4, 1.0, 0.6], dtype=np.float32) - assert np.array_equal(adata_norm.X[:, 0], adata_to_norm_casted.X[:, 0]) - assert np.array_equal(adata_norm.X[:, 1], adata_to_norm_casted.X[:, 1]) - assert np.array_equal(adata_norm.X[:, 2], adata_to_norm_casted.X[:, 2]) + assert np.array_equal(adata_norm.X[:, 0], adata_to_norm.X[:, 0]) + assert np.array_equal(adata_norm.X[:, 1], adata_to_norm.X[:, 1]) + assert np.array_equal(adata_norm.X[:, 2], adata_to_norm.X[:, 2]) assert np.allclose(adata_norm.X[:, 3], num1_norm) assert np.allclose(adata_norm.X[:, 4], num2_norm) - assert np.allclose(adata_norm.X[:, 5], adata_to_norm_casted.X[:, 5], equal_nan=True) + assert np.allclose(adata_norm.X[:, 5], adata_to_norm.X[:, 5], equal_nan=True) def test_norm_maxabs_integers(adata_mini_integers_in_X): @@ -300,23 +346,37 @@ def test_norm_maxabs_group(array_type, adata_mini): assert np.allclose(adata_mini_norm.X[:, 2], col2_norm) +@pytest.mark.parametrize( + "array_type,expected_error", + [ + (np.array, None), + (da.array, None), + (sparse.csr_matrix, NotImplementedError), + ], +) +def test_norm_robust_scale_array_types(adata_to_norm, array_type, expected_error): + adata_to_norm.X = array_type(adata_to_norm.X) + if expected_error: + with pytest.raises(expected_error): + ep.pp.robust_scale_norm(adata_to_norm) + + @pytest.mark.parametrize("array_type", ARRAY_TYPES) def test_norm_robust_scale(array_type, adata_to_norm): """Test for the robust_scale normalization method.""" - adata_to_norm_casted = adata_to_norm.copy() - adata_to_norm_casted.X = array_type(adata_to_norm_casted.X) + adata_to_norm.X = array_type(adata_to_norm.X) - adata_norm = ep.pp.robust_scale_norm(adata_to_norm_casted, copy=True) + adata_norm = ep.pp.robust_scale_norm(adata_to_norm, copy=True) num1_norm = np.array([-1.73913043, 0.0, 0.26086957], dtype=np.float32) num2_norm = np.array([-1.4285715, 0.5714286, 0.0], dtype=np.float32) - assert np.array_equal(adata_norm.X[:, 0], adata_to_norm_casted.X[:, 0]) - assert np.array_equal(adata_norm.X[:, 1], adata_to_norm_casted.X[:, 1]) - assert np.array_equal(adata_norm.X[:, 2], adata_to_norm_casted.X[:, 2]) + assert np.array_equal(adata_norm.X[:, 0], adata_to_norm.X[:, 0]) + assert np.array_equal(adata_norm.X[:, 1], adata_to_norm.X[:, 1]) + assert np.array_equal(adata_norm.X[:, 2], adata_to_norm.X[:, 2]) assert np.allclose(adata_norm.X[:, 3], num1_norm) assert np.allclose(adata_norm.X[:, 4], num2_norm) - assert np.allclose(adata_norm.X[:, 5], adata_to_norm_casted.X[:, 5], equal_nan=True) + assert np.allclose(adata_norm.X[:, 5], adata_to_norm.X[:, 5], equal_nan=True) def test_norm_robust_scale_integers(adata_mini_integers_in_X): @@ -326,11 +386,10 @@ def test_norm_robust_scale_integers(adata_mini_integers_in_X): @pytest.mark.parametrize("array_type", ARRAY_TYPES) -def test_norm_robust_scale_kwargs(array_type, adata_to_norm): - adata_to_norm_casted = adata_to_norm.copy() - adata_to_norm_casted.X = array_type(adata_to_norm_casted.X) +def test_norm_robust_scale_kwargs(adata_to_norm, array_type): + adata_to_norm.X = array_type(adata_to_norm.X) - adata_norm = ep.pp.robust_scale_norm(adata_to_norm_casted, copy=True, with_scaling=False) + adata_norm = ep.pp.robust_scale_norm(adata_to_norm, copy=True, with_scaling=False) num1_norm = np.array([-2.0, 0.0, 0.2999997], dtype=np.float32) num2_norm = np.array([-5.0, 2.0, 0.0], dtype=np.float32) @@ -363,24 +422,38 @@ def test_norm_robust_scale_group(array_type, adata_mini): assert np.allclose(adata_mini_norm.X[:, 2], col2_norm) +@pytest.mark.parametrize( + "array_type,expected_error", + [ + (np.array, None), + (da.array, None), + (sparse.csr_matrix, NotImplementedError), + ], +) +def test_norm_quantile_array_types(adata_to_norm, array_type, expected_error): + adata_to_norm.X = array_type(adata_to_norm.X) + if expected_error: + with pytest.raises(expected_error): + ep.pp.quantile_norm(adata_to_norm) + + @pytest.mark.parametrize("array_type", ARRAY_TYPES) def test_norm_quantile_uniform(array_type, adata_to_norm): """Test for the quantile normalization method.""" warnings.filterwarnings("ignore", category=UserWarning) - adata_to_norm_casted = adata_to_norm.copy() - adata_to_norm_casted.X = array_type(adata_to_norm_casted.X) + adata_to_norm.X = array_type(adata_to_norm.X) - adata_norm = ep.pp.quantile_norm(adata_to_norm_casted, copy=True) + adata_norm = ep.pp.quantile_norm(adata_to_norm, copy=True) num1_norm = np.array([0.0, 0.5, 1.0], dtype=np.float32) num2_norm = np.array([0.0, 1.0, 0.5], dtype=np.float32) - assert np.array_equal(adata_norm.X[:, 0], adata_to_norm_casted.X[:, 0]) - assert np.array_equal(adata_norm.X[:, 1], adata_to_norm_casted.X[:, 1]) - assert np.array_equal(adata_norm.X[:, 2], adata_to_norm_casted.X[:, 2]) + assert np.array_equal(adata_norm.X[:, 0], adata_to_norm.X[:, 0]) + assert np.array_equal(adata_norm.X[:, 1], adata_to_norm.X[:, 1]) + assert np.array_equal(adata_norm.X[:, 2], adata_to_norm.X[:, 2]) assert np.allclose(adata_norm.X[:, 3], num1_norm) assert np.allclose(adata_norm.X[:, 4], num2_norm) - assert np.allclose(adata_norm.X[:, 5], adata_to_norm_casted.X[:, 5], equal_nan=True) + assert np.allclose(adata_norm.X[:, 5], adata_to_norm.X[:, 5], equal_nan=True) def test_norm_quantile_integers(adata_mini_integers_in_X): @@ -406,10 +479,9 @@ def test_norm_quantile_integers(adata_mini_integers_in_X): @pytest.mark.parametrize("array_type", ARRAY_TYPES) def test_norm_quantile_uniform_kwargs(array_type, adata_to_norm): - adata_to_norm_casted = adata_to_norm.copy() - adata_to_norm_casted.X = array_type(adata_to_norm_casted.X) + adata_to_norm.X = array_type(adata_to_norm.X) - adata_norm = ep.pp.quantile_norm(adata_to_norm_casted, copy=True, output_distribution="normal") + adata_norm = ep.pp.quantile_norm(adata_to_norm, copy=True, output_distribution="normal") num1_norm = np.array([-5.19933758, 0.0, 5.19933758], dtype=np.float32) num2_norm = np.array([-5.19933758, 5.19933758, 0.0], dtype=np.float32) @@ -442,27 +514,41 @@ def test_norm_quantile_uniform_group(array_type, adata_mini): assert np.allclose(adata_mini_norm.X[:, 2], col2_norm) +@pytest.mark.parametrize( + "array_type,expected_error", + [ + (np.array, None), + (da.array, None), + (sparse.csr_matrix, NotImplementedError), + ], +) +def test_norm_power_array_types(adata_to_norm, array_type, expected_error): + adata_to_norm.X = array_type(adata_to_norm.X) + if expected_error: + with pytest.raises(expected_error): + ep.pp.power_norm(adata_to_norm) + + @pytest.mark.parametrize("array_type", ARRAY_TYPES) def test_norm_power(array_type, adata_to_norm): """Test for the power transformation normalization method.""" - adata_to_norm_casted = adata_to_norm.copy() - adata_to_norm_casted.X = array_type(adata_to_norm_casted.X) + adata_to_norm.X = array_type(adata_to_norm.X) if "dask" in array_type.__name__: with pytest.raises(NotImplementedError): - ep.pp.power_norm(adata_to_norm_casted, copy=True) + ep.pp.power_norm(adata_to_norm, copy=True) else: - adata_norm = ep.pp.power_norm(adata_to_norm_casted, copy=True) + adata_norm = ep.pp.power_norm(adata_to_norm, copy=True) num1_norm = np.array([-1.3821232, 0.43163615, 0.950487], dtype=np.float32) num2_norm = np.array([-1.340104, 1.0613203, 0.27878374], dtype=np.float32) - assert np.array_equal(adata_norm.X[:, 0], adata_to_norm_casted.X[:, 0]) - assert np.array_equal(adata_norm.X[:, 1], adata_to_norm_casted.X[:, 1]) - assert np.array_equal(adata_norm.X[:, 2], adata_to_norm_casted.X[:, 2]) + assert np.array_equal(adata_norm.X[:, 0], adata_to_norm.X[:, 0]) + assert np.array_equal(adata_norm.X[:, 1], adata_to_norm.X[:, 1]) + assert np.array_equal(adata_norm.X[:, 2], adata_to_norm.X[:, 2]) assert np.allclose(adata_norm.X[:, 3], num1_norm, rtol=1.1) assert np.allclose(adata_norm.X[:, 4], num2_norm, rtol=1.1) - assert np.allclose(adata_norm.X[:, 5], adata_to_norm_casted.X[:, 5], equal_nan=True) + assert np.allclose(adata_norm.X[:, 5], adata_to_norm.X[:, 5], equal_nan=True) def test_norm_power_integers(adata_mini_integers_in_X): @@ -488,17 +574,16 @@ def test_norm_power_integers(adata_mini_integers_in_X): @pytest.mark.parametrize("array_type", ARRAY_TYPES) def test_norm_power_kwargs(array_type, adata_to_norm): - adata_to_norm_casted = adata_to_norm.copy() - adata_to_norm_casted.X = array_type(adata_to_norm_casted.X) + adata_to_norm.X = array_type(adata_to_norm.X) if "dask" in array_type.__name__: with pytest.raises(NotImplementedError): - ep.pp.power_norm(adata_to_norm_casted, copy=True) + ep.pp.power_norm(adata_to_norm, copy=True) else: with pytest.raises(ValueError): - ep.pp.power_norm(adata_to_norm_casted, copy=True, method="box-cox") + ep.pp.power_norm(adata_to_norm, copy=True, method="box-cox") - adata_norm = ep.pp.power_norm(adata_to_norm_casted, copy=True, standardize=False) + adata_norm = ep.pp.power_norm(adata_to_norm, copy=True, standardize=False) num1_norm = np.array([201.03636, 1132.8341, 1399.3877], dtype=np.float32) num2_norm = np.array([-1.8225479, 5.921072, 3.397709], dtype=np.float32) @@ -556,6 +641,21 @@ def test_norm_power_group(array_type, adata_mini): assert np.allclose(adata_mini_norm.X[:, 2], col2_norm, rtol=1e-02, atol=1e-02) +@pytest.mark.parametrize( + "array_type,expected_error", + [ + (np.array, None), + (da.array, None), + (sparse.csr_matrix, None), + ], +) +def test_norm_log_norm_array_types(adata_to_norm, array_type, expected_error): + adata_to_norm.X = array_type(adata_to_norm.X) + if expected_error: + with pytest.raises(expected_error): + ep.pp.log_norm(adata_to_norm) + + def test_norm_log1p(adata_to_norm): """Test for the log normalization method.""" # Ensure that some test data is strictly positive