Skip to content

Commit

Permalink
Make all imputation methods consistent in regard to encoding requirem…
Browse files Browse the repository at this point in the history
…ents (#827)

* Before test

* After tests

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Apply suggestions from code review part 1

Co-authored-by: Lukas Heumos <lukas.heumos@posteo.net>

* @nicolassidoux
@Zethson
Apply suggestions from code review part 2

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Updated _base_check_imputation to throw exception

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Added spinner support

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* After @eroell review

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Changed spinner to Rich

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fixed missing import

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* After @eroell review

* Updated returns in imputation, rewrote miss_forest_impute

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fixed imputation returns

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Lukas Heumos <lukas.heumos@posteo.net>
Co-authored-by: PRECIPOINT\nicolas.sidoux <nicolas.sidoux@precipoint.de>
  • Loading branch information
4 people authored Nov 25, 2024
1 parent f05adda commit 67fedbf
Show file tree
Hide file tree
Showing 11 changed files with 407 additions and 312 deletions.
2 changes: 1 addition & 1 deletion ehrapy/_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def __init__(
figdir: str | Path = "./figures/",
cache_compression: str | None = "lzf",
max_memory=15,
n_jobs: int = 1,
n_jobs: int = -1,
logfile: str | Path | None = None,
categories_to_ignore: Iterable[str] = ("N/A", "dontknow", "no_gate", "?"),
_frameon: bool = True,
Expand Down
6 changes: 3 additions & 3 deletions ehrapy/core/_tool_available.py → ehrapy/_utils_available.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from subprocess import PIPE, Popen


def _check_module_importable(package: str) -> bool: # pragma: no cover
def _check_module_importable(package: str) -> bool:
"""Checks whether a module is installed and can be loaded.
Args:
Expand All @@ -19,7 +19,7 @@ def _check_module_importable(package: str) -> bool: # pragma: no cover
return module_available


def _shell_command_accessible(command: list[str]) -> bool: # pragma: no cover
def _shell_command_accessible(command: list[str]) -> bool:
"""Checks whether the provided command is accessible in the current shell.
Args:
Expand All @@ -29,7 +29,7 @@ def _shell_command_accessible(command: list[str]) -> bool: # pragma: no cover
True if the command is accessible, False otherwise.
"""
command_accessible = Popen(command, stdout=PIPE, stderr=PIPE, universal_newlines=True, shell=True)
(commmand_stdout, command_stderr) = command_accessible.communicate()
command_accessible.communicate()
if command_accessible.returncode != 0:
return False

Expand Down
File renamed without changes.
21 changes: 21 additions & 0 deletions ehrapy/_utils_rendering.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import functools

from rich.progress import Progress, SpinnerColumn


def spinner(message: str = "Running task"):
def wrap(func):
@functools.wraps(func)
def wrapped_f(*args, **kwargs):
with Progress(
"[progress.description]{task.description}",
SpinnerColumn(),
refresh_per_second=1500,
) as progress:
progress.add_task(f"[blue]{message}", total=1)
result = func(*args, **kwargs)
return result

return wrapped_f

return wrap
52 changes: 49 additions & 3 deletions ehrapy/anndata/anndata_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import random
from collections import OrderedDict
from string import ascii_letters
from typing import TYPE_CHECKING, NamedTuple
from typing import TYPE_CHECKING, Any, NamedTuple

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -303,7 +303,7 @@ def move_to_x(adata: AnnData, to_x: list[str] | str) -> AnnData:
return new_adata


def _get_column_indices(adata: AnnData, col_names: str | Iterable[str]) -> list[int]:
def get_column_indices(adata: AnnData, col_names: str | Iterable[str]) -> list[int]:
"""Fetches the column indices in X for a given list of column names
Args:
Expand Down Expand Up @@ -383,7 +383,7 @@ def set_numeric_vars(
if copy:
adata = adata.copy()

vars_idx = _get_column_indices(adata, vars)
vars_idx = get_column_indices(adata, vars)

adata.X[:, vars_idx] = values

Expand Down Expand Up @@ -663,3 +663,49 @@ def get_rank_features_df(

class NotEncodedError(AssertionError):
pass


def _are_ndarrays_equal(arr1: np.ndarray, arr2: np.ndarray) -> np.bool_:
"""Check if two arrays are equal member-wise.
Note: Two NaN are considered equal.
Args:
arr1: First array to compare
arr2: Second array to compare
Returns:
True if the two arrays are equal member-wise
"""
return np.all(np.equal(arr1, arr2, dtype=object) | ((arr1 != arr1) & (arr2 != arr2)))


def _is_val_missing(data: np.ndarray) -> np.ndarray[Any, np.dtype[np.bool_]]:
"""Check if values in a AnnData matrix are missing.
Args:
data: The AnnData matrix to check
Returns:
An array of bool representing the missingness of the original data, with the same shape
"""
return np.isin(data, [None, ""]) | (data != data)


def _to_dense_matrix(adata: AnnData, layer: str | None = None) -> np.ndarray: # pragma: no cover
"""Extract a layer from an AnnData object and convert it to a dense matrix if required.
Args:
adata: The AnnData where to extract the layer from.
layer: Name of the layer to extract. If omitted, X is considered.
Returns:
The layer as a dense matrix. If a conversion was required, this function returns a copy of the original layer,
othersize this function returns a reference.
"""
from scipy.sparse import issparse

if layer is None:
return adata.X.toarray() if issparse(adata.X) else adata.X
else:
return adata.layers[layer].toarray() if issparse(adata.layers[layer]) else adata.layers[layer]
2 changes: 1 addition & 1 deletion ehrapy/plot/_scanpy_pl_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import scanpy as sc
from scanpy.plotting import DotPlot, MatrixPlot, StackedViolin

from ehrapy._doc_util import (
from ehrapy._utils_doc import (
_doc_params,
doc_adata_color_etc,
doc_common_groupby_plot_args,
Expand Down
Loading

0 comments on commit 67fedbf

Please sign in to comment.