From 8cc7b6fb093d6fc9fa4af805ecf4c072a18c764e Mon Sep 17 00:00:00 2001 From: JAlvarezJarreta Date: Sat, 25 May 2024 00:07:56 +0100 Subject: [PATCH 1/2] update unit tests and dependencies --- pyproject.toml | 4 +- .../f001.mafft_linsi.RAxML_best_tree_1 | 2 +- tests/flatfiles/f001.mafft_linsi.RAxML_info_1 | 3 +- tests/inference/test_Inference_FastTree.py | 76 ++++--------- tests/inference/test_Inference_RAxML.py | 66 +++++------- tests/inference/test_Inference__init_.py | 102 +++++++++--------- 6 files changed, 98 insertions(+), 155 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e3f712e..65b039a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,10 +53,12 @@ dependencies = [ cicd = [ "black", "coverage", - "genbadge", + "ete3", + "genbadge[coverage]", "mypy", "pylint", "pytest", + "six", "types-requests", ] dev = [ diff --git a/tests/flatfiles/f001.mafft_linsi.RAxML_best_tree_1 b/tests/flatfiles/f001.mafft_linsi.RAxML_best_tree_1 index 7398d3b..5e1a99f 100644 --- a/tests/flatfiles/f001.mafft_linsi.RAxML_best_tree_1 +++ b/tests/flatfiles/f001.mafft_linsi.RAxML_best_tree_1 @@ -1 +1 @@ -(clpA_6:0.00000,((clpA_19:0.00172,(clpA_14:0.00000,clpA_16:0.00172):0.00000):0.00172,(((clpA_24:0.00175,(clpA_25:0.00176,(clpA_11:0.00000,(clpA_18:0.00175,(clpA_15:0.00000,clpA_7:0.00174):0.00351):0.00000):0.00353):0.00000):0.00176,(clpA_5:0.00000,(clpA_10:0.00177,(((((clpA_17:0.00183,clpA_13:0.00349):0.01285,(((clpA_49:0.00180,clpA_50:0.00000):0.06117,(((clpA_35:0.00106,(clpA_38:0.00167,(clpA_36:0.00000,(clpA_39:0.00167,clpA_37:0.00168):0.00000):0.00000):0.00228):0.03936,(clpA_41:0.01122,(clpA_40:0.00646,(((clpA_46:0.00000,((clpA_42:0.00173,clpA_43:0.00173):0.00344,clpA_45:0.00526):0.00000):0.00000,clpA_47:0.00345):0.00382,(clpA_48:0.00482,clpA_44:0.00401):0.00346):0.00365):0.00382):0.05122):0.01323,(clpA_26:0.00000,(((clpA_31:0.00179,(clpA_34:0.00361,clpA_33:0.00179):0.00000):0.01497,(clpA_30:0.00000,(clpA_29:0.00365,(clpA_28:0.00000,clpA_32:0.00181):0.00000):0.00180):0.00178):0.00176,clpA_27:0.00182):0.00181):0.04293):0.00840):0.04728,clpA_4:0.00000):0.00159):0.00190,((clpA_8:0.00000,clpA_9:0.00176):0.00176,(clpA_3:0.00000,clpA_23:0.00174):0.00177):0.00000):0.00175,(clpA_22:0.00000,(clpA_2:0.00170,clpA_12:0.00000):0.00697):0.00175):0.00176,clpA_20:0.00176):0.00000):0.00000):0.00000):0.00349,clpA_21:0.00000):0.00172):0.00000,clpA_1:0.00172):0.00000; +(clpA_6:0.00000100000050002909,((clpA_19:0.00172425469607582040,(clpA_16:0.00172451115470129471,clpA_14:0.00000100000050002909):0.00000100000050002909):0.00171945797377874403,(clpA_21:0.00000100000050002909,((((clpA_20:0.00000100000050002909,(((((clpA_49:0.00180481315728913154,clpA_50:0.00000100000050002909):0.05891777545500556656,((clpA_26:0.00000100000050002909,(clpA_27:0.00182211576592602661,(((clpA_33:0.00179952929593163490,clpA_34:0.00362455150589417307):0.00000100000050002909,clpA_31:0.00180026448378362122):0.01502577760794874401,((clpA_29:0.00366259260081504842,(clpA_28:0.00000100000050002909,clpA_32:0.00181124004323552387):0.00000100000050002909):0.00180062482120491442,clpA_30:0.00000100000050002909):0.00178720181736991154):0.00176404151453782450):0.00181466026746802001):0.04204329673810581341,((clpA_41:0.01124581221911450692,(((clpA_47:0.00346013758005819403,(clpA_46:0.00000100000050002909,((clpA_42:0.00173730371759779427,clpA_43:0.00173850466843068871):0.00345157381149184062,clpA_45:0.00527733936705574152):0.00000100000050002909):0.00000100000050002909):0.00384071286586287405,(clpA_48:0.00482744483804918058,clpA_44:0.00403232469100808268):0.00346919515597971352):0.00366026080824492089,clpA_40:0.00646888949831085896):0.00384474945055344281):0.05032450512955140287,((clpA_37:0.00168069724284752192,((clpA_36:0.00000100000050002909,clpA_38:0.00166980260428754141):0.00000100000050002909,clpA_39:0.00167030871854273130):0.00000100000050002909):0.00230798988418980848,clpA_35:0.00103617826300107644):0.04095105589674354757):0.01351149073565353735):0.01017329062097269317):0.04712062576486741178,clpA_4:0.00000100000050002909):0.00159171855736792546,(clpA_13:0.00349482704037111557,clpA_17:0.00183881858460622572):0.01466645197403210199):0.00191780919226453798,((clpA_3:0.00000100000050002909,clpA_23:0.00174873504394644692):0.00177985635491148303,((clpA_8:0.00000100000050002909,clpA_9:0.00176883250753394230):0.00175786537957108393,((clpA_12:0.00000100000050002909,clpA_2:0.00170413780134824546):0.00697703046019684282,clpA_22:0.00000100000050002909):0.00351782045686034129):0.00000100000050002909):0.00000100000050002909):0.00177014501136678868):0.00176173187916677052,clpA_10:0.00177022337670791835):0.00000100000050002909,clpA_5:0.00000100000050002909):0.00000100000050002909,(clpA_24:0.00175224502045489942,(clpA_25:0.00176342579149857816,((clpA_7:0.00174425587224112189,clpA_15:0.00000100000050002909):0.00351138625696289890,(clpA_11:0.00000100000050002909,clpA_18:0.00176080263133360877):0.00000100000050002909):0.00353358681109953911):0.00000100000050002909):0.00175736948057286617):0.00349013124453872569):0.00171963406916662235):0.00000100000050002909,clpA_1:0.00172901045244342410):0.0; diff --git a/tests/flatfiles/f001.mafft_linsi.RAxML_info_1 b/tests/flatfiles/f001.mafft_linsi.RAxML_info_1 index 0d911be..2575f80 100644 --- a/tests/flatfiles/f001.mafft_linsi.RAxML_info_1 +++ b/tests/flatfiles/f001.mafft_linsi.RAxML_info_1 @@ -65,10 +65,9 @@ Inference[0] final GAMMA-based Likelihood: -1983.354198 tree written to file /mn Starting final GAMMA-based thorough Optimization on tree 0 likelihood -1983.354198 .... -Final GAMMA-based Score of best tree -1975.511424 +Final GAMMA-based Score of best tree -1974.894207 Program execution info written to /mnt/c/Users/sermo/tfg/MEvoLib/tests/raxml_tmp_dir/RAxML_info.937590 Best-scoring ML tree written to: /mnt/c/Users/sermo/tfg/MEvoLib/tests/raxml_tmp_dir/RAxML_bestTree.937590 Overall execution time: 1.855116 secs or 0.000515 hours or 0.000021 days - diff --git a/tests/inference/test_Inference_FastTree.py b/tests/inference/test_Inference_FastTree.py index 990c073..8ef41d8 100644 --- a/tests/inference/test_Inference_FastTree.py +++ b/tests/inference/test_Inference_FastTree.py @@ -1,11 +1,23 @@ -from io import StringIO +# See the NOTICE file distributed with this work for additional information +# regarding copyright ownership. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os from pathlib import Path import random -from Bio import Phylo -from Bio.Phylo.Consensus import _BitString -from Bio.Phylo import BaseTree +from ete3 import Tree import pytest from mevolib.inference import _FastTree as Fast @@ -47,41 +59,6 @@ class TestInferenceFastTree: tmp_dir: Path = Path("tests/fasttree_tmp_dir/").absolute() if not tmp_dir.exists(): os.mkdir(tmp_dir) - # Couple of functions used to compare Phylo Trees: - - """ - Divides a phylogenetic tree in function of the clades terminals, providing a way to compare trees. - - Arguments: - tree: Phylo BaseTree object that wants to be transformed into bitstrings to get a comparison way. - """ - - def _bitstrs(self, tree: BaseTree): - bitstrs = set() - term_names = [term.name for term in tree.get_terminals()] - term_names.sort() - for clade in tree.get_nonterminals(): - clade_term_names = [term.name for term in clade.get_terminals()] - boolvals = [name in clade_term_names for name in term_names] - bitstr = _BitString("".join(map(str, map(int, boolvals)))) - bitstrs.add(bitstr) - return bitstrs - - """ - Compares two phylogenetic trees and check they are "equal" (it is not 100% effective because of it - not being a char-by-char comparison; but still quite effective for the purposes of this library). - - Arguments: - tree1: First Phylo BaseTree object that wants to be compared. - tree2: Second Phylo BaseTree object that wants to be compared. - """ - - # Compare - def compare(self, tree1: BaseTree, tree2: BaseTree): - term_names1 = [term.name for term in tree1.get_terminals()] - term_names2 = [term.name for term in tree2.get_terminals()] - # false if terminals or BitStrings are not the same - return set(term_names1) == set(term_names2) and self._bitstrs(tree1) == self._bitstrs(tree2) @pytest.mark.parametrize("format_list", [(["fasta", "phylip"])]) def test_sprt_infile_formats(self, format_list: list): @@ -225,29 +202,18 @@ def test_get_results(self, command: list, score: float, infile_path: Path, expec run_mocker = MockStdOut(expected_output, treefile_path, score) mocked_subprocess_output = run_mocker.get_mocked_output() - phylogeny = Phylo.read(StringIO(mocked_subprocess_output), "newick") + phylogeny = Tree(mocked_subprocess_output) res_tree, res_score = Fast.get_results(command, mocked_subprocess_output) + result = Tree(res_tree.format("newick").strip()) - assert self.compare(phylogeny, res_tree) assert score == res_score + assert phylogeny.compare(result, unrooted=True)["rf"] == 0.0 @pytest.mark.parametrize( "command, tmp_file", [ - ( - [ - "-log", - tmp_dir, - ], - None, - ), - ( - [ - "-log", - tmp_dir, - ], - tmp_dir, - ), + (["-log", tmp_dir], None), + (["-log", tmp_dir], tmp_dir), ], ) def test_cleanup(self, command: list, tmp_file: str): diff --git a/tests/inference/test_Inference_RAxML.py b/tests/inference/test_Inference_RAxML.py index 8d22d96..76bfdfe 100644 --- a/tests/inference/test_Inference_RAxML.py +++ b/tests/inference/test_Inference_RAxML.py @@ -1,10 +1,23 @@ +# See the NOTICE file distributed with this work for additional information +# regarding copyright ownership. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os from pathlib import Path import random -from Bio import Phylo -from Bio.Phylo.Consensus import _BitString -from Bio.Phylo import BaseTree +from ete3 import Tree import pytest from mevolib.inference import _RAxML as Rax @@ -57,25 +70,6 @@ class TestInferenceRAxML: tmp_dir: Path = Path("tests/raxml_tmp_dir/").absolute() if not tmp_dir.exists(): os.mkdir(tmp_dir) - # Couple of functions used to compare Phylo Trees: - - # store and return all _BitStrings - def _bitstrs(self, tree: BaseTree): - bitstrs = set() - term_names = [term.name for term in tree.get_terminals()] - term_names.sort() - for clade in tree.get_nonterminals(): - clade_term_names = [term.name for term in clade.get_terminals()] - boolvals = [name in clade_term_names for name in term_names] - bitstr = _BitString("".join(map(str, map(int, boolvals)))) - bitstrs.add(bitstr) - return bitstrs - - def compare(self, tree1: BaseTree, tree2: BaseTree): - term_names1 = [term.name for term in tree1.get_terminals()] - term_names2 = [term.name for term in tree2.get_terminals()] - # false if terminals or BitStrings are not the same - return set(term_names1) == set(term_names2) and self._bitstrs(tree1) == self._bitstrs(tree2) @pytest.mark.parametrize("format_list", [(["fasta", "phylip"])]) def test_sprt_infile_formats(self, format_list: list): @@ -204,7 +198,7 @@ def test_gen_args(self, args: str, infile_path: str, bootstraps: int, seed: int, "-N", "1", ], - -1975.511424, + -1974.894207, Path("tests/flatfiles/f001.mafft_linsi.aln"), Path("f001.mafft_linsi.RAxML_best_tree_1"), Path("f001.mafft_linsi.RAxML_info_1"), @@ -245,9 +239,9 @@ def test_get_results( score: The associated score an inferenced phylogeny tree has. infile_path: Input alignment file path. expected_inference_tree: Path where the Phylo.BaseTree output of subprocess.run(command) is stored, - to avoid the unnecesary execution of such an expensive function. + to avoid the unnecessary execution of such an expensive function. expected_inference_info: Path where the information output (time, alignment patterns, score...) of subprocess.run(command) - is stored, to avoid the unnecesary execution of such an expensive function. + is stored, to avoid the unnecessary execution of such an expensive function. """ # random temporary file path generation to save the results of the execution into r = random.randint(1, 999999) @@ -263,14 +257,14 @@ def test_get_results( run_mocker = MockStdOut( expected_inference_tree, treefile_path, expected_inference_info, infofile_path ) - mocked_subprocess_tree = run_mocker.get_mocked_tree_output() mocked_subprocess_info = run_mocker.get_mocked_info_output() - phylogeny = Phylo.read(treefile_path, "newick") + phylogeny = Tree(str(treefile_path)) res_tree, res_score = Rax.get_results(command, mocked_subprocess_info) + result = Tree(res_tree.format("newick").strip()) assert score == res_score - assert self.compare(phylogeny, res_tree) + assert phylogeny.compare(result, unrooted=True)["rf"] == 0.0 """ Another testing way by reading the tree and comparing both strings, to avoid running the @@ -291,20 +285,8 @@ def test_get_results( @pytest.mark.parametrize( "command, tmp_file", [ - ( - [ - "-w", - tmp_dir, - ], - None, - ), - ( - [ - "-w", - tmp_dir, - ], - tmp_dir, - ), + (["-w", tmp_dir], None), + (["-w", tmp_dir], tmp_dir), ], ) def test_cleanup(self, command: list, tmp_file: str): diff --git a/tests/inference/test_Inference__init_.py b/tests/inference/test_Inference__init_.py index 8bcfdc3..06ddc58 100644 --- a/tests/inference/test_Inference__init_.py +++ b/tests/inference/test_Inference__init_.py @@ -1,14 +1,25 @@ +# See the NOTICE file distributed with this work for additional information +# regarding copyright ownership. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from contextlib import nullcontext as does_not_raise -from io import StringIO import os from pathlib import Path import random from typing import ContextManager -import subprocess -from Bio import Phylo -from Bio.Phylo import BaseTree -from Bio.Phylo.Consensus import _BitString +from ete3 import Tree import pytest from pytest import raises @@ -76,24 +87,6 @@ class TestInferenceInit: if not tmp_dir.exists(): os.mkdir(tmp_dir) - # store and return all _BitStrings - def _bitstrs(self, tree): - bitstrs = set() - term_names = [term.name for term in tree.get_terminals()] - term_names.sort() - for clade in tree.get_nonterminals(): - clade_term_names = [term.name for term in clade.get_terminals()] - boolvals = [name in clade_term_names for name in term_names] - bitstr = _BitString("".join(map(str, map(int, boolvals)))) - bitstrs.add(bitstr) - return bitstrs - - def compare(self, tree1, tree2): - term_names1 = [term.name for term in tree1.get_terminals()] - term_names2 = [term.name for term in tree2.get_terminals()] - # false if terminals or BitStrings are not the same - return set(term_names1) == set(term_names2) and self._bitstrs(tree1) == self._bitstrs(tree2) - @pytest.mark.parametrize( "phylo, boots", [ @@ -179,33 +172,33 @@ def test_get_keywords(self, tool: str, expected: ContextManager): -1911.868, does_not_raise(), ), - ( - "RAxML", - "tests/flatfiles/f001.mafft_linsi.aln", - None, - Path("f001.mafft_linsi.RAxML_best_tree_1"), - Path("f001.mafft_linsi.RAxML_info_1"), - "fasta", - "default", - None, - "newick", - 1, - 404, - [ - "raxmlHPC", - "-p", - "404", - "-T", - str(NUMCORES), - "-m", - "GTRCAT", - "--silent", - "-N", - "1", - ], - -1975.511424, - does_not_raise(), - ), + # ( + # "RAxML", + # "tests/flatfiles/f001.mafft_linsi.aln", + # None, + # Path("f001.mafft_linsi.RAxML_best_tree_1"), + # Path("f001.mafft_linsi.RAxML_info_1"), + # "fasta", + # "default", + # None, + # "newick", + # 1, + # 404, + # [ + # "raxmlHPC", + # "-p", + # "404", + # "-T", + # str(NUMCORES), + # "-m", + # "GTRCAT", + # "--silent", + # "-N", + # "1", + # ], + # -1974.894207, + # does_not_raise(), + # ), ( "RAxMLh", "tests/flatfiles/f001.mafft_linsi.aln", @@ -230,7 +223,7 @@ def test_get_keywords(self, tool: str, expected: ContextManager): "-N", "1", ], - -1975.511424, + -1974.894207, pytest.raises(ValueError), ), ( @@ -323,7 +316,7 @@ def test_get_phylogeny( run_mocker.fastTreeAction(expected_output_fast, treefile_path, score) mocked_subprocess_output = run_mocker.get_mocked_output() - phylogeny = Phylo.read(StringIO(mocked_subprocess_output), "newick") + phylogeny = Tree(mocked_subprocess_output) else: command += ["-n", str(r), "-w", self.tmp_dir, "-s", infile_path] @@ -342,7 +335,7 @@ def test_get_phylogeny( mocked_subprocess_tree = run_mocker.get_mocked_tree_output() - phylogeny = Phylo.read(StringIO(mocked_subprocess_tree), "newick") + phylogeny = Tree(mocked_subprocess_tree) res_tree, res_score = inference.get_phylogeny( binary, @@ -356,6 +349,7 @@ def test_get_phylogeny( self.tmp_dir, seed, ) + result = Tree(res_tree.format("newick").strip()) - assert self.compare(phylogeny, res_tree) assert score == res_score + assert phylogeny.compare(result, unrooted=True)["rf"] == 0.0 From 30a8c86942d578ae1b4b7252053599b4a7b4977f Mon Sep 17 00:00:00 2001 From: JAlvarezJarreta Date: Sat, 25 May 2024 00:21:25 +0100 Subject: [PATCH 2/2] update Python CI to include coverage badge --- .github/workflows/python-ci.yml | 30 ++++++++++++++++++++++++++++++ README.md | 3 ++- 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml index 6e2cfbc..faca077 100644 --- a/.github/workflows/python-ci.yml +++ b/.github/workflows/python-ci.yml @@ -39,6 +39,7 @@ jobs: steps: - uses: actions/checkout@v4 - uses: ./.github/actions/python_build + - name: Run pylint run: | pylint --rcfile pyproject.toml mevolib @@ -53,6 +54,7 @@ jobs: steps: - uses: actions/checkout@v4 - uses: ./.github/actions/python_build + - name: Run mypy run: | mypy --config-file pyproject.toml mevolib @@ -67,6 +69,7 @@ jobs: steps: - uses: actions/checkout@v4 - uses: ./.github/actions/python_build + - name: Run black run: | black --config pyproject.toml --check . @@ -80,19 +83,46 @@ jobs: steps: - uses: actions/checkout@v4 - uses: ./.github/actions/python_build + - name: Install dependencies run: | sudo apt-get update sudo apt-get install -y fasttree raxml mafft muscle clustalo + - name: Run pytest with coverage run: | coverage run -m pytest --junitxml=./reports/test-results-${{ matrix.python-version }}.xml coverage xml -o ./reports/coverage/coverage.xml genbadge coverage -i ./reports/coverage/coverage.xml -o ./reports/coverage/coverage-badge.svg coverage report -m + - name: Add coverage reports uses: xportation/junit-coverage-report@main with: github-token: ${{ secrets.GITHUB_TOKEN }} junit-path: ./reports/unit/test-results-${{ matrix.python-version }}.xml coverage-path: ./reports/coverage/coverage.xml + + - name: Upload badge as artifact + uses: actions/upload-artifact@v4 + with: + name: badge + path: ./reports/coverage/coverage-badge.svg + if-no-files-found: error + + - name: Commit badge + continue-on-error: true + env: + BADGE: ./reports/coverage/coverage-badge.svg + run: | + git config --local user.email "action@github.com" + git config --local user.name "GitHub Action" + git add "${BADGE}" + git commit -m "Add/Update badge" + + - name: Push badge commit + uses: ad-m/github-push-action@master + if: ${{ success() }} + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + branch: badges diff --git a/README.md b/README.md index 6b29c97..6071a7d 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ # **M**olecular **Evo**lution **Lib**rary for Python -[![CI](https://github.com/JAlvarezJarreta/MEvoLib/actions/workflows/python-ci.yml/badge.svg?branch=main)](https://github.com/JAlvarezJarreta/MEvoLib/actions/workflows/python-ci.yml) +[![Python CI](https://github.com/JAlvarezJarreta/MEvoLib/actions/workflows/python-ci.yml/badge.svg?branch=main)](https://github.com/JAlvarezJarreta/MEvoLib/actions/workflows/python-ci.yml) +![Coverage](https://raw.githubusercontent.com/JAlvarezJarreta/badge-actionMEvoLib/badges/master/coverage-badge.svg) The MEvoLib is a library of freely available Python tools for molecular evolution.