-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #132 from mmore500/alifestd-count-nodes
Impl, test alifestd count nodes tools
- Loading branch information
Showing
17 changed files
with
732 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
import pandas as pd | ||
|
||
from ._alifestd_count_leaf_nodes import alifestd_count_leaf_nodes | ||
from ._alifestd_count_root_nodes import alifestd_count_root_nodes | ||
from ._alifestd_count_unifurcations import alifestd_count_unifurcations | ||
|
||
|
||
def alifestd_calc_polytomic_index(phylogeny_df: pd.DataFrame) -> int: | ||
"""Count how many fewer inner nodes are contained in phylogeny than expected | ||
if strictly bifurcationg. | ||
Excludes unifurcations from calculation. | ||
""" | ||
num_leaf_nodes = alifestd_count_leaf_nodes(phylogeny_df) | ||
num_root_nodes = alifestd_count_root_nodes(phylogeny_df) | ||
expected_rows_if_bifurcating = max(2 * num_leaf_nodes - num_root_nodes, 0) | ||
num_unifurcations = alifestd_count_unifurcations(phylogeny_df) | ||
num_non_unifurcating_rows = len(phylogeny_df) - num_unifurcations | ||
res = expected_rows_if_bifurcating - num_non_unifurcating_rows | ||
assert 0 <= res < max(expected_rows_if_bifurcating, 1) | ||
return res |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
import pandas as pd | ||
|
||
from ._alifestd_count_leaf_nodes import alifestd_count_leaf_nodes | ||
|
||
|
||
def alifestd_count_inner_nodes( | ||
phylogeny_df: pd.DataFrame, | ||
mutate: bool = False, | ||
) -> int: | ||
"""Count how many non-leaf nodes are contained in phylogeny.""" | ||
|
||
num_leaves = alifestd_count_leaf_nodes(phylogeny_df) | ||
res = len(phylogeny_df) - num_leaves | ||
assert res >= 0 | ||
return res |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
import pandas as pd | ||
|
||
from ._alifestd_find_leaf_ids import alifestd_find_leaf_ids | ||
|
||
|
||
def alifestd_count_leaf_nodes(phylogeny_df: pd.DataFrame) -> int: | ||
"""How many leaf nodes are contained in phylogeny?""" | ||
return len(alifestd_find_leaf_ids(phylogeny_df)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
from collections import Counter | ||
|
||
import pandas as pd | ||
|
||
from ._alifestd_try_add_ancestor_id_col import alifestd_try_add_ancestor_id_col | ||
|
||
|
||
def alifestd_count_polytomies(phylogeny_df: pd.DataFrame) -> int: | ||
"""Count how many inner nodes have more than two descendant nodes. | ||
Only supports asexual phylogenies. | ||
""" | ||
phylogeny_df = alifestd_try_add_ancestor_id_col(phylogeny_df) | ||
if "ancestor_id" not in phylogeny_df.columns: | ||
raise ValueError( | ||
"alifestd_count_polytomies only supports asexual phylogenies.", | ||
) | ||
ancestor_counts = Counter(phylogeny_df["ancestor_id"]) | ||
return sum(v > 2 for v in ancestor_counts.values()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
import numpy as np | ||
import pandas as pd | ||
|
||
|
||
def alifestd_count_root_nodes(phylogeny_df: pd.DataFrame) -> np.array: # int | ||
"""How many root nodes are contained in phylogeny?""" | ||
return ( | ||
phylogeny_df["ancestor_list"] | ||
.astype(str) | ||
.str.lower() | ||
.isin(("[none]", "[]")) | ||
.sum() | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
from collections import Counter | ||
|
||
import pandas as pd | ||
|
||
from ._alifestd_try_add_ancestor_id_col import alifestd_try_add_ancestor_id_col | ||
|
||
|
||
def alifestd_count_unifurcations(phylogeny_df: pd.DataFrame) -> int: | ||
"""Count how many inner nodes have exactly one descendant node. | ||
Only supports asexual phylogenies. | ||
""" | ||
phylogeny_df = alifestd_try_add_ancestor_id_col(phylogeny_df) | ||
if "ancestor_id" not in phylogeny_df.columns: | ||
raise ValueError( | ||
"alifestd_count_unifurcations only supports asexual phylogenies.", | ||
) | ||
except_roots = phylogeny_df["ancestor_id"] != phylogeny_df["id"] | ||
ancestor_counts = Counter(phylogeny_df.loc[except_roots, "ancestor_id"]) | ||
return sum(v == 1 for v in ancestor_counts.values()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
92 changes: 92 additions & 0 deletions
92
tests/test_hstrat/test_auxiliary_lib/test_alifestd_calc_polytomic_index.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
import pandas as pd | ||
import pytest | ||
|
||
from hstrat._auxiliary_lib import ( | ||
alifestd_calc_polytomic_index, | ||
alifestd_make_empty, | ||
) | ||
|
||
|
||
def test_empty_df(): | ||
assert alifestd_calc_polytomic_index(alifestd_make_empty()) == 0 | ||
|
||
|
||
def test_singleton_df(): | ||
df = pd.DataFrame( | ||
{ | ||
"id": [0], | ||
"ancestor_list": [[None]], | ||
} | ||
) | ||
assert alifestd_calc_polytomic_index(df) == 0 | ||
|
||
|
||
def test_polytomy_df1(): | ||
df = pd.DataFrame( | ||
{ | ||
"id": [0, 1, 2, 3, 4], | ||
"ancestor_list": [[None], [0], [0], [0], [1]], | ||
} | ||
) | ||
assert alifestd_calc_polytomic_index(df) == 1 | ||
|
||
|
||
def test_polytomy_df2(): | ||
df = pd.DataFrame( | ||
{ | ||
"id": [0, 1, 2, 3, 4, 5, 6], | ||
"ancestor_list": [[None], [0], [0], [0], [1], [1], [1]], | ||
} | ||
) | ||
assert alifestd_calc_polytomic_index(df) == 2 | ||
|
||
|
||
def test_polytomy_df3(): | ||
df = pd.DataFrame( | ||
{ | ||
"id": [0, 1, 2, 3, 4, 5, 6, 7], | ||
"ancestor_list": [[None], [0], [0], [0], [1], [1], [1], [1]], | ||
} | ||
) | ||
assert alifestd_calc_polytomic_index(df) == 3 | ||
|
||
|
||
def test_multiple_trees_df1(): | ||
df = pd.DataFrame( | ||
{ | ||
"id": [0, 1, 2, 3, 4, 5], | ||
"ancestor_list": [[None], [None], [0], [2], [2], [3]], | ||
} | ||
) | ||
assert alifestd_calc_polytomic_index(df) == 0 | ||
|
||
|
||
def test_multiple_trees_df2(): | ||
df = pd.DataFrame( | ||
{ | ||
"id": [0, 1, 2, 3, 4, 5], | ||
"ancestor_list": [[None], [None], [0], [1], [0], [0]], | ||
} | ||
) | ||
assert alifestd_calc_polytomic_index(df) == 1 | ||
|
||
|
||
def test_multiple_trees_df3(): | ||
df = pd.DataFrame( | ||
{ | ||
"id": [0, 1, 2, 3, 4, 5, 6], | ||
"ancestor_list": [[None], [None], [0], [1], [0], [0], [None]], | ||
} | ||
) | ||
assert alifestd_calc_polytomic_index(df) == 1 | ||
|
||
|
||
def test_sexual(): | ||
df = pd.DataFrame( | ||
{ | ||
"id": [0, 1, 2, 3, 5], | ||
"ancestor_list": [[None], [None], [0, 1], [1], [0]], | ||
} | ||
) | ||
with pytest.raises(ValueError): | ||
alifestd_calc_polytomic_index(df) |
80 changes: 80 additions & 0 deletions
80
tests/test_hstrat/test_auxiliary_lib/test_alifestd_count_inner_nodes.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
import pandas as pd | ||
|
||
from hstrat._auxiliary_lib import ( | ||
alifestd_count_inner_nodes, | ||
alifestd_make_empty, | ||
) | ||
|
||
|
||
def test_empty_df(): | ||
assert alifestd_count_inner_nodes(alifestd_make_empty()) == 0 | ||
|
||
|
||
def test_singleton_df(): | ||
df = pd.DataFrame( | ||
{ | ||
"id": [0], | ||
"ancestor_list": [[None]], | ||
} | ||
) | ||
assert alifestd_count_inner_nodes(df) == 0 | ||
|
||
|
||
def test_sexual_df1(): | ||
df = pd.DataFrame( | ||
{ | ||
"id": [0, 1, 2, 3, 4], | ||
"ancestor_list": ["[None]", "[0]", "[0]", "[1, 0]", "[1]"], | ||
} | ||
) | ||
assert alifestd_count_inner_nodes(df) == 2 | ||
|
||
|
||
def test_sexual_df2(): | ||
df = pd.DataFrame( | ||
{ | ||
"id": [0, 1, 2, 3, 4], | ||
"ancestor_list": [[None], [0], [0], [1, 0], [1]], | ||
} | ||
) | ||
assert alifestd_count_inner_nodes(df) == 2 | ||
|
||
|
||
def test_polytomy_df(): | ||
df = pd.DataFrame( | ||
{ | ||
"id": [0, 1, 2, 3, 4], | ||
"ancestor_list": [[None], [0], [0], [0], [1]], | ||
} | ||
) | ||
assert alifestd_count_inner_nodes(df) == 2 | ||
|
||
|
||
def test_multiple_trees_df1(): | ||
df = pd.DataFrame( | ||
{ | ||
"id": [0, 1, 2, 3, 4, 5], | ||
"ancestor_list": [[None], [None], [0], [2], [2], [3]], | ||
} | ||
) | ||
assert alifestd_count_inner_nodes(df) == 3 | ||
|
||
|
||
def test_multiple_trees_df2(): | ||
df = pd.DataFrame( | ||
{ | ||
"id": [0, 1, 2, 3, 4, 5], | ||
"ancestor_list": [[None], [None], [0], [1], [2], [3]], | ||
} | ||
) | ||
assert alifestd_count_inner_nodes(df) == 4 | ||
|
||
|
||
def test_strictly_bifurcating_df(): | ||
df = pd.DataFrame( | ||
{ | ||
"id": [0, 1, 2, 3, 4], | ||
"ancestor_list": [[None], [0], [0], [1], [1]], | ||
} | ||
) | ||
assert alifestd_count_inner_nodes(df) == 2 |
Oops, something went wrong.