From 4d45b3229c133c6bcde2d8b3940829298e2f4317 Mon Sep 17 00:00:00 2001 From: thorstenwagner Date: Fri, 29 Sep 2023 11:56:15 +0200 Subject: [PATCH 1/3] Provide TSV as argument --- missense/missense.py | 34 ++++++++++++++++++++++++++++------ tests/test_run.py | 22 +++++++++++++++++++--- 2 files changed, 47 insertions(+), 9 deletions(-) diff --git a/missense/missense.py b/missense/missense.py index 7e7b95a..262513d 100644 --- a/missense/missense.py +++ b/missense/missense.py @@ -401,11 +401,13 @@ def gen_image(pos_to_val) -> np.array: return img -def get_data_tuple(uniprot_id: str): +def get_data_tuple(uniprot_id: str, tsv_path: str = None): """ Extracts the raw data for the plot from the tsv file. """ - with open(os.path.join(tempfile.gettempdir(), "alpha.tsv"), encoding="utf-8") as f: + if tsv_path is None: + tsv_path = os.path.join(tempfile.gettempdir(), "alpha.tsv") + with open(tsv_path, encoding="utf-8") as f: doc = f.read() m = re.findall(uniprot_id.upper() + r"\t(.\d+.)\t(\d.\d+)", doc) pos_to_val = [] @@ -457,6 +459,13 @@ def create_parser() -> argparse.ArgumentParser: help="Output folder", ) + parser.add_argument( + "--tsv", + type=str, + help="You can provide the path to the tsv file if you want to skip the downloading part.", + default=None + ) + parser.add_argument( "--pdbpath", type=str, @@ -547,8 +556,17 @@ def create_modified_pdb(img: np.array, uniprot_id: str, output_path: str, pdb_pt else: out_file.write(f'{line}') -def _run(uniprot_id: str, output_path: str, pdbpath: str, maxacid: int): - download_missense_data() +def _run(uniprot_id: str, + output_path: str, + tsvpath: str, + pdbpath: str, + maxacid: int): + + if tsvpath is None or os.path.exists(tsvpath)==False: + tsvpath=None + download_missense_data() + + os.makedirs(output_path, exist_ok=True) chain = None @@ -559,7 +577,7 @@ def _run(uniprot_id: str, output_path: str, pdbpath: str, maxacid: int): print(f"Cant find chain for {uniprot_id} in {pdbpath}") sys.exit(1) - pos_to_val = get_data_tuple(uniprot_id) + pos_to_val = get_data_tuple(uniprot_id=uniprot_id, tsv_path=tsvpath) out_fig_pth = os.path.join(output_path, f"{uniprot_id}.pdf") img_raw_data = make_and_save_plot(pos_to_val, out_fig_pth, maxacid) @@ -570,7 +588,11 @@ def _run(uniprot_id: str, output_path: str, pdbpath: str, maxacid: int): def _main_(): args = create_parser().parse_args() - _run(args.uniprot_id, args.output_path, args.pdbpath, args.maxacid) + _run(uniprot_id=args.uniprot_id, + output_path=args.output_path, + pdbpath=args.pdbpath, + maxacid=args.maxacid, + tsvpath=args.tsv) diff --git a/tests/test_run.py b/tests/test_run.py index 3724f5f..ddf7aba 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -28,7 +28,21 @@ def test_pdf_and_and_are_generated(self): ms._run(uniprot_id="Q9UQ13", output_path=tmpdirname, maxacid=200, - pdbpath=None + pdbpath=None, + tsvpath=None + ) + + self.assertEqual(True, os.path.exists(os.path.join(tmpdirname,"Q9UQ13.pdf"))) + self.assertEqual(True, os.path.exists(os.path.join(tmpdirname, "Q9UQ13-edit.pdb"))) + + def test_pdf_and_and_are_generated_tsv_by_path(self): + + with tempfile.TemporaryDirectory() as tmpdirname: + ms._run(uniprot_id="Q9UQ13", + output_path=tmpdirname, + maxacid=200, + pdbpath=None, + tsvpath=os.path.join(os.path.dirname(__file__), "../resources/tests/Q9UQ13/alpha.tsv") ) self.assertEqual(True, os.path.exists(os.path.join(tmpdirname,"Q9UQ13.pdf"))) @@ -43,7 +57,8 @@ def test_pdb_check_with_reference(self): ms._run(uniprot_id="Q9UQ13", output_path=tmpdirname, maxacid=200, - pdbpath=None + pdbpath=None, + tsvpath=None ) ref_pth = os.path.join(os.path.dirname(__file__), "../resources/tests/Q9UQ13/Q9UQ13-edit.pdb") @@ -62,7 +77,8 @@ def test_pdb_check_with_reference_with_pdb(self): ms._run(uniprot_id="Q9UQ13", output_path=tmpdirname, maxacid=200, - pdbpath=os.path.join(os.path.dirname(__file__), "../resources/tests/Q9UQ13-with-pdb/7upi.pdb") + pdbpath=os.path.join(os.path.dirname(__file__), "../resources/tests/Q9UQ13-with-pdb/7upi.pdb"), + tsvpath=None ) ref_pth = os.path.join(os.path.dirname(__file__), "../resources/tests/Q9UQ13-with-pdb/Q9UQ13-edit.pdb") From 71ecec6e657d0203b8012ad3518eab6365532878 Mon Sep 17 00:00:00 2001 From: thorstenwagner Date: Fri, 29 Sep 2023 12:01:05 +0200 Subject: [PATCH 2/3] test case for non existing unitprot --- README.md | 2 +- missense/missense.py | 6 +++++- tests/test_run.py | 14 ++++++++++++++ 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 5baccc9..e2a2a2f 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ options: You can provide the optional argument `--pdbpath` if you want to use an experimental PDB, otherwise it will instead download the alphafold predicted PDB. -For example, to reproduce [Figure 3D](https://www.science.org/doi/10.1126/science.adg7492#F3) (the middle one) and the generate the PDB shown in [Figure 3E](https://www.science.org/doi/10.1126/science.adg7492#F3) do: +For example, to reproduce [Figure 3D](https://www.science.org/doi/10.1126/science.adg7492#F3) (the middle one) and generates the PDB shown in [Figure 3E](https://www.science.org/doi/10.1126/science.adg7492#F3) do: ``` wget https://files.rcsb.org/download/7UPI.pdb diff --git a/missense/missense.py b/missense/missense.py index 262513d..d77690c 100644 --- a/missense/missense.py +++ b/missense/missense.py @@ -462,7 +462,7 @@ def create_parser() -> argparse.ArgumentParser: parser.add_argument( "--tsv", type=str, - help="You can provide the path to the tsv file if you want to skip the downloading part.", + help="You can provide the path to the tsv file if you want to skip the download.", default=None ) @@ -579,6 +579,10 @@ def _run(uniprot_id: str, pos_to_val = get_data_tuple(uniprot_id=uniprot_id, tsv_path=tsvpath) + if len(pos_to_val) == 0: + print(f"Could not find any data in the AlphaMissense database for uniprot id {uniprot_id}") + sys.exit(1) + out_fig_pth = os.path.join(output_path, f"{uniprot_id}.pdf") img_raw_data = make_and_save_plot(pos_to_val, out_fig_pth, maxacid) print(f"Save plot to {out_fig_pth}") diff --git a/tests/test_run.py b/tests/test_run.py index ddf7aba..29c5d9e 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -35,6 +35,20 @@ def test_pdf_and_and_are_generated(self): self.assertEqual(True, os.path.exists(os.path.join(tmpdirname,"Q9UQ13.pdf"))) self.assertEqual(True, os.path.exists(os.path.join(tmpdirname, "Q9UQ13-edit.pdb"))) + def test_pdf_and_and_are_generated_non_existing_uniprot(self): + + shutil.copyfile(os.path.join(os.path.dirname(__file__), "../resources/tests/Q9UQ13/alpha.tsv"), + os.path.join(tempfile.gettempdir(), "alpha.tsv")) + + with tempfile.TemporaryDirectory() as tmpdirname: + with self.assertRaises(SystemExit): + ms._run(uniprot_id="Q9UQ13234234", + output_path=tmpdirname, + maxacid=200, + pdbpath=None, + tsvpath=None + ) + def test_pdf_and_and_are_generated_tsv_by_path(self): with tempfile.TemporaryDirectory() as tmpdirname: From b61285ec8bcfe8f6d88dfdadefcf52ad40e015ea Mon Sep 17 00:00:00 2001 From: thorstenwagner Date: Fri, 29 Sep 2023 12:57:40 +0200 Subject: [PATCH 3/3] update readme --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e2a2a2f..0c54731 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ pip install pymissense Generate usage is: ``` -usage: pymissense [-h] [--pdbpath PDBPATH] [--maxacid MAXACID] uniprot_id output_path +usage: pymissense[-h] [--tsv TSV] [--pdbpath PDBPATH] [--maxacid MAXACID] uniprot_id output_path AlphaMissense plot and pdb generator @@ -39,8 +39,9 @@ positional arguments: options: -h, --help show this help message and exit + --tsv TSV You can provide the path to the tsv file if you want to skip the download. (default: None) --pdbpath PDBPATH If defined, it will write the pathogencity as bfactor in that PDB. If its not defined or not existing it will instead download the alphafold predicted PDB (default: None) - --maxacid MAXACID Maximum squence number to use. (default: None) + --maxacid MAXACID Maximum squence number to use in the plot. (default: None) ``` You can provide the optional argument `--pdbpath` if you want to use an experimental PDB, otherwise it will instead download the alphafold predicted PDB.