Skip to content

Commit

Permalink
Merge pull request #9 from ANSES-Ploufragan/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
FTouzain authored Jan 20, 2025
2 parents f181dd9 + 81bc038 commit 2069216
Show file tree
Hide file tree
Showing 7 changed files with 95 additions and 8 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
0.2.3.3:
- in convert_tbl2json.py, now when no gene found in bed, deduce them from CDS and name them gene_1 .. gene_N (not ORFN
because some prot are named ORF2 prot in PCV2, for an ORF that is not the second one!). It avoids to get a bug
for PCV2 viruses for instance
- in convert_vcffile_to_readablefile2.py, NOW handle genes in reverse orientation (did not display genes/prot labels previously)
- change version 0.2.3.2 -> 0.2.3.3
0.2.3.2:
- correct max number of args from 25 to 30 for vvv2_display.py
- change version 0.2.3.1 -> 0.2.3.2
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

setuptools.setup(
name="vvv2_display", # Required
version="0.2.3.2", # Required
version="0.2.3.3", # Required
description="Viral Variant Visualizer 2 display", # Optional
long_description=long_description,
long_description_content_type="text/markdown", # Optional (see note above)
Expand Down
22 changes: 22 additions & 0 deletions src/convert_tbl2json.py
Original file line number Diff line number Diff line change
Expand Up @@ -1466,16 +1466,37 @@ def indexlist(item2find, list_or_string):
f.write("\"virus_id\": \"unkown\", ")
b_first_cds_found = False

# **********************************************
# for genes
f.write("\"genes\": {")
b_gene_found = False
for i in range(len(names)):
if types[i] == 'gene':
if b_first_cds_found:
f.write(", ")

f.write("\""+names[i]+"\": ["+str(starts[i])+", "+str(ends[i])+"]")
b_first_cds_found = True
b_gene_found = True

# ------------------------------------------
# special case of not recorded genes
# means genes not recorded, like in PCV2,
# therefore we must deduce them from proteins, calling them ORF1...ORFN
if not b_gene_found:
b_first_cds_found = False
for i in range(len(names)):
if types[i] == 'cds':
tmp_name = 'gene_'+str(i+1)
if b_first_cds_found:
f.write(", ")

f.write("\""+tmp_name+"\": ["+str(starts[i])+", "+str(ends[i])+"]")
b_first_cds_found = True
# ------------------------------------------

f.write("}")
# **********************************************

b_first_cds_found = False
# for proteins
Expand Down Expand Up @@ -1530,6 +1551,7 @@ def indexlist(item2find, list_or_string):
with open(bed_vardict_annot_f, 'w+') as f:
# no header otherwise bug in vardict
for i in range(len(names)):
# print("for vardict, treat BED contig "+names[i])
f.write("\t".join([
chrs[i],
str(starts_vardict[i]),
Expand Down
17 changes: 17 additions & 0 deletions src/convert_vcffile_to_readablefile2.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,13 +69,21 @@ def find_key_genes(dico_json, genomepos):
gene = "intergene"
base_inf_allgenes = ''
base_sup_allgenes = ''

# print("find_key_genes call pos ("+str(genomepos)+")")
for typ in dico_json:
# print(f"dico_json key treated:{key}")

if typ == "genes":
for key in dico_json[typ]:
base_inf , base_sup = dico_json[typ][key][0] , dico_json[typ][key][1]

# if gene in reverse orientation, change coordinates to make base_inf < base_sup
if base_sup < base_inf:
base_tmp = base_sup
base_sup = base_inf
base_inf = base_tmp

if (base_inf <= genomepos)and(genomepos <= base_sup):
# print("GENE base_inf:"+str(base_inf)+" <= genomepos:"+str(genomepos)+" <= base_sup:"+str(base_sup))
if gene == 'intergene':
Expand All @@ -91,6 +99,7 @@ def find_key_genes(dico_json, genomepos):
# print(f"NOT( base_inf:{base_inf} <= genomepos:{genomepos} <= base_sup:{base_sup} )")
# continue
# print(f"return gene:{gene}\tbase_inf:{base_inf_allgenes}\tbase_sup:{base_sup_allgenes}")

return gene, base_inf_allgenes, base_sup_allgenes

def find_key_proteins(dico_json, genomepos, gene_res):
Expand All @@ -107,6 +116,13 @@ def find_key_proteins(dico_json, genomepos, gene_res):
if typ == "proteins":
for key in dico_json[typ]:
base_inf , base_sup = dico_json[typ][key][0] , dico_json[typ][key][1]

# if gene in reverse orientation, change coordinates to make base_inf < base_sup
if base_sup < base_inf:
base_tmp = base_sup
base_sup = base_inf
base_inf = base_tmp

if (base_inf <= genomepos)and(genomepos <= base_sup):
# print("PROT base_inf:"+str(base_inf)+" <= genomepos:"+str(genomepos)+" <= base_sup:"+str(base_sup))
if gene_res == 'intergene':
Expand Down Expand Up @@ -306,6 +322,7 @@ def is_homopolymer(lseq, rseq, ref, alt):
pos = genomeposition[i]
gene_id, base_inf, base_sup = find_key_genes(dico_json, pos)
protein_id, pbase_inf, pbase_sup = find_key_proteins(dico_json, pos, gene_id)
# print("pos gene_id protein_id:"+str(pos)+"\t"+gene_id+"\t"+protein_id)
line = write_line(temp_counts[i], pos, gene_id, protein_id, dico)
filout.write(line)

Expand Down
12 changes: 10 additions & 2 deletions src/visualize_snp_v4.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env Rscript
#
# FT; last modification November 20th 2024
# FT; last modification January 20th 2025
# AF; last modification February 16th 2019
#
# Description: This script has been written in order to generate a graph
Expand Down Expand Up @@ -226,7 +226,11 @@ genecols = append(genecols, complete_col_set)

# needed to have color labels NOT ORDERED and to choose colors
gene_id_labels = unique(density$gene_id)
# print("gene_id_labels BEFORE numbering:")
# print(gene_id_labels)
density$gene_id_num = paste( sprintf("%02d", match(density$gene_id, gene_id_labels) ), ":", density$gene_id)
# print("gene_id_labels AFTER numbering:")
# print(density$gene_id_num)

# if( b_verbose ){
# print("protein_id_labels BEFORE removing duplicates:")
Expand Down Expand Up @@ -329,7 +333,11 @@ ymm=as.numeric(unlist(ymm))

gnames_label=lapply(nrange, FUN=function(x){
glabel = ""
if( (xma[x] - xmi[x]) > min_glength){
if( (xma[x] > xmi[x]) &
( (xma[x] - xmi[x]) > min_glength) ){
glabel=gnames[x]
} else if( (xma[x] < xmi[x]) &
( (xmi[x] - xma[x]) > min_glength) ){
glabel=gnames[x]
}
glabel
Expand Down
40 changes: 37 additions & 3 deletions src/vvv2_display.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def __main__():
b_test_vvv2_display = False # ok 2022 05 05 complet, partial tc
b_test_convert_tbl2json = False # ok 2022 04 26 complete tc,
b_test_correct_multicontig_vardict_vcf = False # ok 2022 04 29 partial tc
b_test_convert_vcffile_to_readable = False # ok 2024 03 27 complete tc,
b_test_convert_vcffile_to_readable = False # ok 2025 01 20 complete tc,
b_test_correct_covdepth_f = False # ok 2024 01 20 tc,
b_test_visualize_snp_v4 = False # ok 2024 03 27 complete tc,
b_test = False
Expand Down Expand Up @@ -175,6 +175,7 @@ def __main__():
# parser.set_defaults(b_test_visualize_snp_v4=False)
parser.set_defaults(b_verbose=False)
parser.set_defaults(var_significant_threshold=7)
parser.set_defaults(b_log_scale=True)
if var_significant_threshold is not None:
var_significant_threshold_str = str(var_significant_threshold)

Expand Down Expand Up @@ -380,6 +381,39 @@ def __main__():
print(prog_tag + " END")
# --------------------------------------------------------------

# --------------------------------------------------------------
# COMPLETE GENOME PCV2
# in files
pass_annot_f = test_dir + "/res3_vadr_pass.tbl" # from vadr results
fail_annot_f = test_dir + "/res3_vadr_fail.tbl" # from vadr results
seq_stat_f = test_dir + "/res3_vadr.seqstat" # from vadr results
vardict_vcf_f = test_dir + "/res3_vardict.vcf" # from lofreq results
cov_depth_f = test_dir + "/res3_covdepth.txt" # from samtools results
# tmp out files
json_annot_f = test_dir + "/res3_vadr.json" # from convert_tbl2json.py
contig_limits_f= test_dir + "/contig3_limits.txt"
contig_names_f = test_dir + "/contig3_names.txt"
cov_depth_corr_f= test_dir + "/res3_covdepth_corr.txt"
# final out file
png_var_f = test_dir + "/res3_vvv2.png" # from ...
cmd = ' '.join([ dir_path + "/vvv2_display.py",
"--pass_tbl_f", pass_annot_f,
"--fail_tbl_f", fail_annot_f,
"--seq_stat_f", seq_stat_f,
"--vcf_f", vardict_vcf_f,
"--contig_limits_f", contig_limits_f,
"--contig_names_f", contig_names_f,
"--cov_depth_f", cov_depth_f,
"--cov_depth_corr_f", cov_depth_corr_f,
"--png_var_f", png_var_f,
"--var_significant_threshold", var_significant_threshold_str
])
print(prog_tag + " START")
print(prog_tag + " cmd:" + cmd)
os.system(cmd)
print(prog_tag + " END")
# --------------------------------------------------------------

sys.exit()
# ------------------------------------------------------------------

Expand Down Expand Up @@ -483,8 +517,8 @@ def __main__():
# vardict_vcf_f = test_dir + "/res_vardict.vcf" # from vardict results
correct_vcf_f = test_dir + "/res_correct.vcf"
json_annot_f = test_dir + "/res_vadr.json"
snp_loc_f = test_dir + "/res_snp.txt"
snp_loc_summary_f = test_dir + "/res_snp_summary.txt"
snp_loc_f = test_dir + "/res_snp.txt"
snp_loc_summary_f = test_dir + "/res_snp_summary.txt"

if(snp_loc_f == ''):
snp_loc_f = vardict_vcf_f
Expand Down
4 changes: 2 additions & 2 deletions vvv2_display.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<tool id="vvv2_display" name="vvv2_display: Display SNP proportions and CDS of an assembly in png image" version="0.2.3.2" python_template_version="3.9">
<tool id="vvv2_display" name="vvv2_display: Display SNP proportions and CDS of an assembly in png image" version="0.2.3.3" python_template_version="3.9">
<requirements>
<requirement type="package" version="0.2.3.2">vvv2_display</requirement>
<requirement type="package" version="0.2.3.3">vvv2_display</requirement>
</requirements>
<command detect_errors="exit_code"><![CDATA[
vvv2_display.py -f '$vadr_fail_annotation' -p '$vadr_pass_annotation' -s '$vadr_seqstat' -n '$vardict_vcf' -r '$snp_img' -w '$var_significant_thres' -o '$cov_depth' -e '$cov_depth_corr' -t '$snp_loc' -u '$snp_loc_summary' -j '$json_annot' -k '$bed_annot' -l '$correct_vcf' -m '$contig_limits' $cov_depth_scale
Expand Down

0 comments on commit 2069216

Please sign in to comment.