Skip to content

Commit

Permalink
Merge pull request #327 from nzmacalasdair/devel
Browse files Browse the repository at this point in the history
Fix  Issue 320 Key error with panaroo-generate-gffs
  • Loading branch information
nzmacalasdair authored Feb 18, 2025
2 parents 4dc499e + ce0c622 commit 90b7fb3
Showing 1 changed file with 18 additions and 4 deletions.
22 changes: 18 additions & 4 deletions panaroo/post_run_gff_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@
from Bio import SeqIO
from Bio.Seq import Seq

from .isvalid import is_valid_folder
#janky workaround to run this as a script
try:
from .isvalid import is_valid_folder
except ImportError as e:
from isvalid import is_valid_folder
#from .__init__ import __version__

def get_options():
Expand Down Expand Up @@ -142,7 +146,10 @@ def process_refound_gene(refound_id, pangenome_id, parsed_gff, refound_seqs,
strand = refound_seqs[refound_id][3]

#Get additional data required for GFF annotation
gene_name = G.nodes[pangenome_id]["annotation"]
combined_gene_name = G.nodes[pangenome_id]["annotation"]
gene_name = "_".join(combined_gene_name.strip(";").split(";"))
panaroo_name = G.nodes[pangenome_id]["name"]

if G.nodes[pangenome_id]["paralog"] == 1:
has_paralog = "True"
else:
Expand All @@ -153,6 +160,7 @@ def process_refound_gene(refound_id, pangenome_id, parsed_gff, refound_seqs,
"locus_tag="+refound_id,
"name="+gene_name,
"description="+gene_description,
"panaroo_gene_cluster="+panaroo_name,
"inference=panaroo refound gene",
"has_pangenome_paralog="+has_paralog])
gff_line = [scaffold_id, "Panaroo_refound", "candidate_gene", str(start+1), str(stop), ".",
Expand Down Expand Up @@ -195,7 +203,11 @@ def create_new_gffs(isolate_index, parsed_gffs, pp_isolate_genes,
else:
original_gene_data = original_gene_data[0]
#Get various other metadata for gene required for GFF3
gene_name = G.nodes[pangenome_gene]["annotation"]
combined_gene_name = G.nodes[pangenome_gene]["annotation"]
gene_name = "_".join(combined_gene_name.strip(";").split(";"))
panaroo_name = G.nodes[pangenome_gene]["name"]


if gene_name == "":
gene_name = "No_name"
if G.nodes[pangenome_gene]["paralog"] == 1:
Expand All @@ -210,8 +222,10 @@ def create_new_gffs(isolate_index, parsed_gffs, pp_isolate_genes,
"description="+gene_description,
"pangenome_id="+str(pangenome_gene),
"panaroo_ID="+gene,
"panaroo_gene_cluster="+panaroo_name,
"eC_number="+original_gene_data.get("eC_number", str(None)),
"prepanaroo_inference="+original_gene_data["inference"],
"prepanaroo_inference="+original_gene_data.get("inference",
"Unknown_inference"),
"has_pangenome_paralog="+has_paralog])
new_gene_line = "\t".join([original_gene_data["seqid"],
"Panaroo",
Expand Down

0 comments on commit 90b7fb3

Please sign in to comment.