From 2bdf83422db382f080a16bb13afed1e463bf4bc3 Mon Sep 17 00:00:00 2001 From: ytirlet <101868688+ytirlet@users.noreply.github.com> Date: Fri, 14 Apr 2023 10:00:39 +0200 Subject: [PATCH] Update pipeline.py --- pipeline.py | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/pipeline.py b/pipeline.py index e1db7cb..56dd6e0 100644 --- a/pipeline.py +++ b/pipeline.py @@ -11,7 +11,6 @@ import os,sys import os.path from optparse import OptionParser -import matplotlib.pyplot as plt import pandas as pd import numpy as np import csv @@ -59,14 +58,14 @@ def main() : parser.add_option("-i", "--input", dest="input",help="Path to the folder where the genomes are") parser.add_option("-o", "--output", dest="output",help="Path to the folder where you want to put the results in") parser.add_option("--tax", dest="all_taxon",help="path of the all_taxon.tsv file") - parser.add_option("--padmet_ref", dest="path_to_padmet_ref", help="Path to the padmet_ref need for the module mpwt.") - parser.add_option("--ptsc",dest="ptsc", help="Path to scratch folder (genouest cluster).") + parser.add_option("--padmet_ref", dest="path_to_padmet_ref", help="Path to the reference database in Padmet format.") + parser.add_option("--ptsc",dest="ptsc", help="Path to root folder.") parser.add_option("--ptsi",dest="ptsi", help="Name of the singularity image.") parser.add_option("--pwy",dest="pwy_fold", help="Path to the folder with the pathways.txt files for all wanted metabolites.") parser.add_option("--strain",dest="strain", help="Path to the strains file.") parser.add_option("--annot",dest="annot",help="Annotation tool. 'prokka' by default, can choose 'eggnog' too.") parser.add_option("--egg_path",dest="egg_path",help="Path to the eggnog database, mandatory if you want to use eggnog as the annotation tool.") - parser.add_option("-r","--rename",action="store_true",dest="rename", help="Renames all the strains with abreviations.") + parser.add_option("-r","--rename",action="store_true",dest="rename", help="Renames of the strains with abreviations.") parser.add_option("-a","--asko", action="store_true", dest="asko", help="Launch the creation of the askomics files.") parser.add_option("-v","--verbose",action="store_true",dest="verbose", help="Activate verbose.") parser.add_option("-k","--keep_faa", action="store_true", dest="keep_faa", default=False, help="Keep .faa files that can be need to use other annotation software like eggNOG-mapper") @@ -125,7 +124,14 @@ def main() : for name in files : os.system('mkdir ' + output_path + 'eggnog/' + name) os.system('emapper.py -i ' + path_to_all_data + name + '/' + name +'.fasta -o ' + name + ' --cpu 40 --itype genome --data_dir ' + path_to_egg+ ' --output_dir ' + output_path + 'eggnog' + name + '/ --dbmem --genepred prodigal --override') - + + genom = path_to_all_data + name + '/' + name + '.fasta' + prot = output_path + name + '/' + name + '.emapper.genepred.fasta' + gff = output_path + name + '/' + name + '.emapper.genepred.gff' + annot = output_path + name + '/' + name + '.emapper.annotations' + out_file = output_path + name + '/' + name + '.gbk' + os.system('emapper2gbk genomes -fn ' + genom + ' -fp ' + prot + ' -g ' + gff + ' -a ' + annot + ' -o ' + out_file + ' -gt eggnog -c 5') + else : raise ValueError("The specified annotation tool is not recognized. Please retry with 'eggnog' or 'prokka'. Default is 'prokka'.") @@ -140,16 +146,20 @@ def main() : for row in lines : all_lines.append(row) to_write.append(all_lines[0]) + print(all_lines) for name in files : for row in all_lines : - rowsplit = row.split('\t') - new_row = rowsplit[0] + '\t' + rowsplit[1] + '\t' + rowsplit[2] + '\t' + print(row) + #rowsplit = row.split('\t') + new_row = row[:3] if options.rename : - new_row += forbiden(rename(rowsplit[0])) + new_row.append(forbiden(rename(row[0]))) else : - new_row += forbiden(rowsplit[0]) + new_row.append(forbiden(row[0])) if name in new_row : to_write.append(new_row) + print(new_row) + print(to_write) with open(tax_file,'w') as fo : writer = csv.writer(fo,delimiter='\t') writer.writerows(to_write)