Skip to content

Commit

Permalink
add docstring + rename to cox_ph
Browse files Browse the repository at this point in the history
  • Loading branch information
fatisati committed Jan 22, 2024
1 parent b115237 commit 101b4fa
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 4 deletions.
23 changes: 21 additions & 2 deletions ehrapy/tools/_sa.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
import anndata as ehrapy_ad
from lifelines import CoxPHFitter, KaplanMeierFitter
from lifelines.statistics import StatisticalResult, logrank_test
from scipy import stats
Expand Down Expand Up @@ -264,12 +265,30 @@ def anova_glm(result_1: GLMResultsWrapper, result_2: GLMResultsWrapper, formula_
return dataframe


def cph(ad: AnnData, duration_col: str, event_col: str, entry_col: str = None) -> KaplanMeierFitter:
def cox_ph(adata: AnnData, duration_col: str, event_col: str, entry_col: str = None) -> KaplanMeierFitter:
"""Fit the Cox’s proportional hazard for the survival function.
See https://lifelines.readthedocs.io/en/latest/fitters/regression/CoxPHFitter.html
Args:
adata: anndata object with necessary columns duration_col and event_col (see below)
duration_col: the name of the column in anndata that contains the subjects’ lifetimes.
event_col: the name of the column in anndata that contains the subjects’ death observation. If left as None, assume all individuals are uncensored.
entry_col: a column denoting when a subject entered the study, i.e. left-truncation.
Returns:
Fitted CoxPHFitter
Examples:
>>> import ehrapy as ep
>>> adata = ep.dt.mimic_2(encoded=False)
>>> # Because in MIMIC-II database, `censor_fl` is censored or death (binary: 0 = death, 1 = censored).
>>> # While in KaplanMeierFitter, `event_observed` is True if the the death was observed, False if the event was lost (right-censored).
>>> # So we need to flip `censor_fl` when pass `censor_fl` to KaplanMeierFitter
>>> adata[:, ['censor_flg']].X = np.where(adata[:, ['censor_flg']].X == 0, 1, 0)
>>> cph = ep.tl.cox_ph(adata, "mort_day_censored", "censor_flg")
"""
df = ad.to_df()
df = ehrapy_ad.anndata_to_df(adata)
df = df[[duration_col, event_col, entry_col]]
cph = CoxPHFitter()
cph.fit(df, duration_col, event_col, entry_col=entry_col)
Expand Down
4 changes: 2 additions & 2 deletions tests/tools/test_sa.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,10 +76,10 @@ def test_anova_glm(self):
assert dataframe.iloc[1, 4] == 2
assert pytest.approx(dataframe.iloc[1, 5], 0.1) == 0.103185

def test_cph(self):
def test_cox_ph(self):
adata = ep.dt.mimic_2(encoded=False)
adata[:, ["censor_flg"]].X = np.where(adata[:, ["censor_flg"]].X == 0, 1, 0)
cph = ep.tl.cph(adata, "mort_day_censored", "censor_flg")
cph = ep.tl.cox_ph(adata, "mort_day_censored", "censor_flg")

assert isinstance(cph, CoxPHFitter)
assert len(cph.durations) == 1776
Expand Down

0 comments on commit 101b4fa

Please sign in to comment.