From 4e8258e014ab0c25ceb8d64a4df610c7a723cbb3 Mon Sep 17 00:00:00 2001 From: Gerry Tonkin-Hill Date: Mon, 7 Oct 2024 10:02:01 +0200 Subject: [PATCH 1/7] update docs --- docs/gettingstarted/params.md | 36 +++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/docs/gettingstarted/params.md b/docs/gettingstarted/params.md index 1fa8a6e..419bd93 100755 --- a/docs/gettingstarted/params.md +++ b/docs/gettingstarted/params.md @@ -15,6 +15,8 @@ Thus to align all genes present in at least 98% of isolates using clustal and 10 panaroo -i *.gff -o ./results/ --clean-mode strict -a core --aligner clustal --core_threshold 0.98 -t 10 ``` +You can also output unaligned gene sequences by specifying `--aligner none`. Additionally, user @revinci has provided a separate script for generating alignments after running Panaroo, which is described [here](https://github.com/gtonkinhill/panaroo/issues/306). + #### Cluster Thresholds The Panaroo algorithm initially performs a conservative clustering step before collapsing genes into possible families. It is usually best to use the dafault parameters for this initial clustering stage. @@ -54,9 +56,11 @@ panaroo -i *.gff -o ./results/ --clean-mode strict --refind_prop_match 0.5 --sea usage: panaroo [-h] -i INPUT_FILES [INPUT_FILES ...] -o OUTPUT_DIR --clean-mode {strict,moderate,sensitive} [--remove-invalid-genes] [-c ID] [-f FAMILY_THRESHOLD] - [--len_dif_percent LEN_DIF_PERCENT] [--merge_paralogs] - [--search_radius SEARCH_RADIUS] + [--len_dif_percent LEN_DIF_PERCENT] + [--family_len_dif_percent FAMILY_LEN_DIF_PERCENT] + [--merge_paralogs] [--search_radius SEARCH_RADIUS] [--refind_prop_match REFIND_PROP_MATCH] + [--refind-mode {default,strict,off}] [--min_trailing_support MIN_TRAILING_SUPPORT] [--trailing_recursive TRAILING_RECURSIVE] [--edge_support_threshold EDGE_SUPPORT_THRESHOLD] @@ -65,9 +69,10 @@ usage: panaroo [-h] -i INPUT_FILES [INPUT_FILES ...] -o OUTPUT_DIR [--high_var_flag CYCLE_THRESHOLD_MIN] [--min_edge_support_sv MIN_EDGE_SUPPORT_SV] [--all_seq_in_graph] [--no_clean_edges] [-a {core,pan}] - [--aligner {prank,clustal,mafft}] [--codons] - [--core_threshold CORE] [--core_entropy_filter HC_THRESHOLD] - [-t N_CPU] [--codon-table TABLE] [--quiet] [--version] + [--aligner {prank,clustal,mafft,none}] [--codons] + [--core_threshold CORE] [--core_subset SUBSET] + [--core_entropy_filter HC_THRESHOLD] [-t N_CPU] + [--codon-table TABLE] [--quiet] [--version] panaroo: an updated pipeline for pangenome investigation @@ -125,6 +130,9 @@ Matching: (default=0.7) --len_dif_percent LEN_DIF_PERCENT length difference cutoff (default=0.98) + --family_len_dif_percent FAMILY_LEN_DIF_PERCENT + length difference cutoff at the gene family level + (default=0.0) --merge_paralogs don't split paralogs Refind: @@ -134,6 +142,20 @@ Refind: --refind_prop_match REFIND_PROP_MATCH the proportion of an accessory gene that must be found in order to consider it a match + --refind-mode {default,strict,off} + The stringency mode at which to re-find genes. + + default: + Will re-find similar gene sequences. Allows for + premature stop codons and incorrect lengths to account + for misassemblies. + + strict: + Prevents fragmented, misassembled, or potential + pseudogene sequences from being re-found. + + off: + Turns off all re-finding steps. Graph correction: --min_trailing_support MIN_TRAILING_SUPPORT @@ -170,13 +192,15 @@ Gene alignment: -a {core,pan}, --alignment {core,pan} Output alignments of core genes or all genes. Options are 'core' and 'pan'. Default: 'None' - --aligner {prank,clustal,mafft} + --aligner {prank,clustal,mafft,none} Specify an aligner. Options:'prank', 'clustal', and default: 'mafft' --codons Generate codon alignments by aligning sequences at the protein level --core_threshold CORE Core-genome sample threshold (default=0.95) + --core_subset SUBSET Randomly subset the core genome to these many genes + (default=all) --core_entropy_filter HC_THRESHOLD Manually set the Block Mapping and Gathering with Entropy (BMGE) filter. Can be between 0.0 and 1.0. By From affcf9913daf8846a13f1301bc05b37df615cc61 Mon Sep 17 00:00:00 2001 From: Gerry Tonkin-Hill Date: Mon, 3 Feb 2025 15:38:21 +1100 Subject: [PATCH 2/7] Update panaroo_test.yml --- .github/workflows/panaroo_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/panaroo_test.yml b/.github/workflows/panaroo_test.yml index 6d339bd..53ea495 100644 --- a/.github/workflows/panaroo_test.yml +++ b/.github/workflows/panaroo_test.yml @@ -19,7 +19,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.7, 3.8, 3.9] + python-version: [3.8, 3.9, 3.10, 3.11] steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} From 3a0f72b884a1001e54bf0732985a6a0e1448d27c Mon Sep 17 00:00:00 2001 From: Gerry Tonkin-Hill Date: Mon, 3 Feb 2025 15:41:20 +1100 Subject: [PATCH 3/7] Update panaroo_test.yml --- .github/workflows/panaroo_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/panaroo_test.yml b/.github/workflows/panaroo_test.yml index 53ea495..6206449 100644 --- a/.github/workflows/panaroo_test.yml +++ b/.github/workflows/panaroo_test.yml @@ -19,7 +19,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.8, 3.9, 3.10, 3.11] + python-version: ["3.8", "3.9", "3.10", "3.11"] steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} From 18eb55700cd011d1f32fa6aec060fc7573c429ed Mon Sep 17 00:00:00 2001 From: Gerry Tonkin-Hill Date: Mon, 3 Feb 2025 06:12:23 +0000 Subject: [PATCH 4/7] updated plot qc function to remove deprecated code --- panaroo/generate_qc_plots.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/panaroo/generate_qc_plots.py b/panaroo/generate_qc_plots.py index dbeea40..c1b4308 100755 --- a/panaroo/generate_qc_plots.py +++ b/panaroo/generate_qc_plots.py @@ -174,11 +174,11 @@ def plot_ngenes(input_gffs, outdir, no_plot=True): ] layout = go.Layout(autosize=True, xaxis=dict(title='', - titlefont=dict(size=18, color='black'), + title=dict(size=18, color='black'), showticklabels=False, automargin=True), yaxis=dict(title="Number of Genes", - titlefont=dict(size=18, color='black'), + title=dict(size=18, color='black'), showticklabels=True, tickfont=dict(size=10, color='black'))) From 564f021a6965c7985eeaafdac822b59777f37b8e Mon Sep 17 00:00:00 2001 From: Gerry Tonkin-Hill Date: Mon, 3 Feb 2025 06:18:15 +0000 Subject: [PATCH 5/7] updated plot qc function to remove deprecated code --- panaroo/generate_qc_plots.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/panaroo/generate_qc_plots.py b/panaroo/generate_qc_plots.py index c1b4308..51aba72 100755 --- a/panaroo/generate_qc_plots.py +++ b/panaroo/generate_qc_plots.py @@ -173,12 +173,10 @@ def plot_ngenes(input_gffs, outdir, no_plot=True): pointpos=-1.8) ] layout = go.Layout(autosize=True, - xaxis=dict(title='', - title=dict(size=18, color='black'), + xaxis=dict(title=dict(text='', font=dict(size=18, color='black')), showticklabels=False, automargin=True), - yaxis=dict(title="Number of Genes", - title=dict(size=18, color='black'), + yaxis=dict(title=dict(text="Number of Genes", font=dict(size=18, color='black')), showticklabels=True, tickfont=dict(size=10, color='black'))) From ccef4963d9903f192a8b49000d087ee21ba82595 Mon Sep 17 00:00:00 2001 From: Gerry Tonkin-Hill Date: Wed, 5 Feb 2025 03:23:19 +0000 Subject: [PATCH 6/7] fixing issues with ploty again --- panaroo/generate_qc_plots.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/panaroo/generate_qc_plots.py b/panaroo/generate_qc_plots.py index 51aba72..fc30f0e 100755 --- a/panaroo/generate_qc_plots.py +++ b/panaroo/generate_qc_plots.py @@ -232,12 +232,13 @@ def plot_ncontigs(input_gffs, outdir, no_plot=False): pointpos=-1.8) ] layout = go.Layout(autosize=True, - xaxis=dict(title='', - titlefont=dict(size=18, color='black'), + xaxis=dict(title=dict(text='', + font=dict(size=18, color='black')), + dict(text='', font=dict(size=18, color='black')) showticklabels=False, automargin=True), - yaxis=dict(title="Number of Contigs", - titlefont=dict(size=18, color='black'), + yaxis=dict(title=dict(text="Number of Contigs", + font=dict(size=18, color='black')), showticklabels=True, tickfont=dict(size=10, color='black'))) @@ -323,16 +324,16 @@ def plot_mash_contam(mash_contam_file, outdir): trace = go.Scatter(x=x, y=y, mode='markers', text=text, hoverinfo="text") layout = go.Layout(autosize=True, - xaxis=dict(title='Match', - titlefont=dict(size=18, color='black'), + xaxis=dict(title=dict(text='Match', + font=dict(size=18, color='black')), showticklabels=True, tickangle=45, ticktext=tick_labels, tickvals=tickvals, automargin=True, tickfont=dict(size=8, color='black')), - yaxis=dict(title="Percentage of shared hash's", - titlefont=dict(size=18, color='black'), + yaxis=dict(title=dict(text="Percentage of shared hash's", + font=dict(size=18, color='black')), showticklabels=True, tickangle=45, tickfont=dict(size=10, color='black'))) From 9ab8b4d55e4d60cc56dc1e623a065ab5d2299be5 Mon Sep 17 00:00:00 2001 From: Gerry Tonkin-Hill Date: Wed, 5 Feb 2025 03:35:43 +0000 Subject: [PATCH 7/7] fixing issues with ploty again --- panaroo/generate_qc_plots.py | 1 - 1 file changed, 1 deletion(-) diff --git a/panaroo/generate_qc_plots.py b/panaroo/generate_qc_plots.py index fc30f0e..15c317b 100755 --- a/panaroo/generate_qc_plots.py +++ b/panaroo/generate_qc_plots.py @@ -234,7 +234,6 @@ def plot_ncontigs(input_gffs, outdir, no_plot=False): layout = go.Layout(autosize=True, xaxis=dict(title=dict(text='', font=dict(size=18, color='black')), - dict(text='', font=dict(size=18, color='black')) showticklabels=False, automargin=True), yaxis=dict(title=dict(text="Number of Contigs",