From 8a0e84be476639f5d518dabab5c14354950a5f86 Mon Sep 17 00:00:00 2001 From: Chenghao Zhu Date: Wed, 15 Nov 2023 20:41:04 +0800 Subject: [PATCH 1/2] fix (summarizeFasta): keys in summarize table not created correctly if source combination is specified in --order-source --- moPepGen/aa/PeptidePoolSummarizer.py | 3 +- test/unit/test_peptide_pool_summarizer.py | 78 +++++++++++++++++++++++ 2 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 test/unit/test_peptide_pool_summarizer.py diff --git a/moPepGen/aa/PeptidePoolSummarizer.py b/moPepGen/aa/PeptidePoolSummarizer.py index 0bbca026..8e34b7a7 100644 --- a/moPepGen/aa/PeptidePoolSummarizer.py +++ b/moPepGen/aa/PeptidePoolSummarizer.py @@ -303,7 +303,8 @@ def write_summary_table(self, handle:IO): summary_keys = self.summary_table.get_keys() header = '\t'.join(['sources', *summary_keys]) handle.write(header + '\n') - sources = [it[0] for it in sorted(self.order.items(), key=lambda x:x[1])] + sources = [it[0] for it in sorted(self.order.items(), key=lambda x:x[1]) + if not isinstance(it[0], frozenset)] for i in range(len(sources)): for comb in itertools.combinations(sources, i + 1): if self.ignore_missing_source: diff --git a/test/unit/test_peptide_pool_summarizer.py b/test/unit/test_peptide_pool_summarizer.py new file mode 100644 index 00000000..201d26e8 --- /dev/null +++ b/test/unit/test_peptide_pool_summarizer.py @@ -0,0 +1,78 @@ +""" Module for testing PeptidePoolSummarizer """ +import copy +import io +from contextlib import redirect_stdout +import unittest +from test.unit import create_aa_record, create_genomic_annotation, get_tx2gene_and_coding_tx +from moPepGen.aa.PeptidePoolSummarizer import PeptidePoolSummarizer +from moPepGen.aa.PeptidePoolSplitter import LabelSourceMapping +from moPepGen.aa import VariantPeptidePool +from test.unit.test_peptide_pool_splitter import ( + GVF_CASE1, PEPTIDE_DB_CASE1, LABEL_MAP1, SOURCE_ORDER, + ANNOTATION_ATTRS, ANNOTATION_DATA, +) + +SOURCE_PARSER_MAP = { + 'gSNP': 'parseVEP', + 'gINDEL': 'parseVEP', + 'sSNV': 'parseVEP', + 'sINDEL': 'parseVEP', + 'altSplice': 'parseRMATS', + 'Fusion': 'parseSTARFusion', + 'circRNA': 'parseCIRCExplorer' +} + +class TestPeptidePoolSummarizer(unittest.TestCase): + """ Test cases for PeptidePoolSummarizer """ + def test_summarize_fasta_case1(self): + """ """ + anno = create_genomic_annotation(ANNOTATION_DATA) + tx2gene, coding_tx = get_tx2gene_and_coding_tx(anno) + peptides_data = [[ 'SSSSSSSR', 'ENST0001|SNV-1001-T-A|1' ]] + peptides = VariantPeptidePool({create_aa_record(*x) for x in peptides_data}) + label_map = LabelSourceMapping(copy.copy(LABEL_MAP1)) + summarizer = PeptidePoolSummarizer( + peptides, order=copy.copy(SOURCE_ORDER), label_map=label_map, + ) + summarizer.count_peptide_source( + tx2gene=tx2gene, + coding_tx=coding_tx, + enzyme='trypsin' + ) + self.assertEqual(set(summarizer.summary_table.data.keys()), {frozenset(['gSNP'])}) + + def test_summarize_fasta_source_comb_order(self): + """ """ + anno = create_genomic_annotation(ANNOTATION_DATA) + anno.transcripts['ENST0005'] = copy.deepcopy(anno.transcripts['ENST0002']) + anno.transcripts['ENST0005'].is_protein_coding = False + tx2gene, coding_tx = get_tx2gene_and_coding_tx(anno) + peptides_data = [ + [ + 'SSSSSSSR', + 'CIRC-ENST0002-E1-E2|1 ENST0005|SE-2100|1' + ] + ] + peptides = VariantPeptidePool({create_aa_record(*x) for x in peptides_data}) + label_map = LabelSourceMapping(copy.copy(LABEL_MAP1)) + # order = copy.copy(SOURCE_ORDER) + order = { + 'altSplice': 1, + frozenset(['altSplice', 'Noncoding']): 2, + 'Noncoding': 3, + 'circRNA': 4 + } + source_parser_map = copy.deepcopy(SOURCE_PARSER_MAP) + summarizer = PeptidePoolSummarizer( + peptides, order=order, label_map=label_map, source_parser_map=source_parser_map + ) + summarizer.count_peptide_source( + tx2gene=tx2gene, + coding_tx=coding_tx, + enzyme='trypsin' + ) + self.assertEqual(set(summarizer.summary_table.data.keys()), {frozenset(['altSplice', 'Noncoding'])}) + + handle = io.StringIO() + with redirect_stdout(handle): + summarizer.write_summary_table(handle) From abb92c6d23afaf7dea2e5d4739fde1b75829ca0d Mon Sep 17 00:00:00 2001 From: Chenghao Zhu Date: Wed, 15 Nov 2023 20:54:18 +0800 Subject: [PATCH 2/2] style (test): fix styles for pylint --- test/unit/test_peptide_pool_summarizer.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/test/unit/test_peptide_pool_summarizer.py b/test/unit/test_peptide_pool_summarizer.py index 201d26e8..fb0e62c5 100644 --- a/test/unit/test_peptide_pool_summarizer.py +++ b/test/unit/test_peptide_pool_summarizer.py @@ -4,13 +4,13 @@ from contextlib import redirect_stdout import unittest from test.unit import create_aa_record, create_genomic_annotation, get_tx2gene_and_coding_tx +from test.unit.test_peptide_pool_splitter import ( + LABEL_MAP1, SOURCE_ORDER, ANNOTATION_DATA, +) from moPepGen.aa.PeptidePoolSummarizer import PeptidePoolSummarizer from moPepGen.aa.PeptidePoolSplitter import LabelSourceMapping from moPepGen.aa import VariantPeptidePool -from test.unit.test_peptide_pool_splitter import ( - GVF_CASE1, PEPTIDE_DB_CASE1, LABEL_MAP1, SOURCE_ORDER, - ANNOTATION_ATTRS, ANNOTATION_DATA, -) + SOURCE_PARSER_MAP = { 'gSNP': 'parseVEP', @@ -25,7 +25,7 @@ class TestPeptidePoolSummarizer(unittest.TestCase): """ Test cases for PeptidePoolSummarizer """ def test_summarize_fasta_case1(self): - """ """ + """ basic test """ anno = create_genomic_annotation(ANNOTATION_DATA) tx2gene, coding_tx = get_tx2gene_and_coding_tx(anno) peptides_data = [[ 'SSSSSSSR', 'ENST0001|SNV-1001-T-A|1' ]] @@ -42,7 +42,7 @@ def test_summarize_fasta_case1(self): self.assertEqual(set(summarizer.summary_table.data.keys()), {frozenset(['gSNP'])}) def test_summarize_fasta_source_comb_order(self): - """ """ + """ When source combination is present in --order-source """ anno = create_genomic_annotation(ANNOTATION_DATA) anno.transcripts['ENST0005'] = copy.deepcopy(anno.transcripts['ENST0002']) anno.transcripts['ENST0005'].is_protein_coding = False @@ -71,7 +71,10 @@ def test_summarize_fasta_source_comb_order(self): coding_tx=coding_tx, enzyme='trypsin' ) - self.assertEqual(set(summarizer.summary_table.data.keys()), {frozenset(['altSplice', 'Noncoding'])}) + self.assertEqual( + set(summarizer.summary_table.data.keys()), + {frozenset(['altSplice', 'Noncoding'])} + ) handle = io.StringIO() with redirect_stdout(handle):