Skip to content

Commit

Permalink
Fix missing vim genes (#182)
Browse files Browse the repository at this point in the history
* Replace Spades assembler with SKESA
* Replace blaVIM test data with output from SKESA
* Convert sequence naming in Skesa contigs fasta file into Spades format
* Check if reference_length is set before showing it in template
  • Loading branch information
samuell authored Sep 20, 2024
1 parent 1ab5a51 commit edb9256
Show file tree
Hide file tree
Showing 7 changed files with 128 additions and 36 deletions.
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ dependencies:
- pigz>=2.4
- quast=5.0.2
- samtools=1.13
- spades=3.14.0
- trimmomatic=0.39
- r-base=4.1.1
- openjdk=11.0.9.1
- pytest=6.2.5
- pytest-cov=2.12.1
- skesa=2.5.1
17 changes: 0 additions & 17 deletions microSALT/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,15 +63,6 @@ def done():
logger.debug("INFO - Execution finished!")


def validate_assembly_mode(ctx, param, value):
allowed_values = ["careful", "isolate"]
if value not in allowed_values:
raise click.BadParameter(
f"Invalid value: {value}. Allowed values are {', '.join(allowed_values)}"
)
return value


def review_sampleinfo(pfile):
"""Reviews sample info. Returns loaded json object"""

Expand Down Expand Up @@ -139,12 +130,6 @@ def root(ctx):
@click.option(
"--untrimmed", help="Use untrimmed input data", default=False, is_flag=True
)
@click.option(
"--assembly-mode",
help="Runs SPAdes in careful mode",
type=click.Choice(["careful", "isolate"]),
default="isolate",
)
@click.pass_context
def analyse(
ctx,
Expand All @@ -156,7 +141,6 @@ def analyse(
skip_update,
force_update,
untrimmed,
assembly_mode,
):
"""Sequence analysis, typing and resistance identification"""
# Run section
Expand All @@ -178,7 +162,6 @@ def analyse(
"email": email,
"skip_update": skip_update,
"trimmed": not untrimmed,
"assembly_mode": assembly_mode,
"pool": pool,
}

Expand Down
4 changes: 4 additions & 0 deletions microSALT/server/templates/typing_page.html
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,11 @@ <h4>Översikt</h4>
<h4>Assembly</h4>
{% if sample.genome_length > -1 %}
<tr><th scope="row" >Genomstorlek enligt assembly</th><td>{{'{0:,}'.format(sample.genome_length)|replace(","," ")}}</td></tr>
{% if sample.reference_length and sample.reference_length > -1 %}
<tr><th scope="row" >Referensens genomstorlek</th><td>{{'{0:,}'.format(sample.reference_length)|replace(","," ")}}</td></tr>
{% else %}
<tr><th scope="row" >Referensens genomstorlek</th><td>N/A</td></tr>
{% endif %}
<tr><th scope="row" >GC halt</th><td>{{sample.gc_percentage| round(2)}}%</td></tr>
<tr><th scope="row" >N50 (minsta contiglängd för 50% av genomet)</th><td>{{'{0:,}'.format(sample.n50)|replace(","," ")}}</td></tr>
<tr><th scope="row" >Nödvändiga contigs</th><td>{{sample.contigs}}</td></tr>
Expand Down
48 changes: 31 additions & 17 deletions microSALT/utils/job_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ def __init__(self, config, log, sampleinfo={}, run_settings={}):
self.indir = os.path.abspath(run_settings.get("input", "/tmp/"))
self.trimmed = run_settings.get("trimmed", True)
self.qc_only = run_settings.get("qc_only", False)
self.assembly_mode = run_settings.get("assembly_mode")
self.pool = run_settings.get("pool", [])
self.finishdir = run_settings.get("finishdir", "")

Expand Down Expand Up @@ -180,30 +179,45 @@ def verify_fastq(self):
return sorted(verified_files)

def create_assemblysection(self):
assembly_dir = f"{self.finishdir}/assembly"
contigs_file_raw = f"{assembly_dir}/{self.name}_contigs_raw.fasta"
contigs_file = f"{assembly_dir}/{self.name}_contigs.fasta"
contigs_trimmed_file = f"{assembly_dir}/{self.name}_trimmed_contigs.fasta"

batchfile = open(self.batchfile, "a+")
# memory is actually 128 per node regardless of cores.
batchfile.write("# Spades assembly\n")
if self.trimmed:
trimline = "-s {}".format(self.concat_files["i"])
else:
trimline = ""

batchfile.write("# SKESA assembly\n")
batchfile.write(
f"spades.py --threads {self.config['slurm_header']['threads']} --{self.assembly_mode}"
f" --memory {8 * int(self.config['slurm_header']['threads'])} -o "
f"{self.finishdir}/assembly -1 {self.concat_files['f']} -2 {self.concat_files['r']} "
f"{trimline}\n"
f"mkdir -p {assembly_dir} &"
f"skesa "
f"--cores {self.config['slurm_header']['threads']} "
f"--memory {8 * int(self.config['slurm_header']['threads'])} "
f"--contigs_out {contigs_file_raw} "
f"--reads {self.concat_files['f']},{self.concat_files['r']}\n"
)

# Convert sequence naming in Skesa output into Spades format in the contigs fasta file:
# ----------------------------------------------
# Skesa format: >Contig_1_100.000
# Spades format: >NODE_1_length_150_cov_100.000
# ----------------------------------------------
# We do the change by doing the following with awk:
# 1. When the line is a header (starting with >), capture the contig number and coverage
# 2. When the line is NOT a header (not starting with >), compute the length of the line
# and then print out:
# a. A new header line in Spades format with the length included
# b. Print out the sequence line.
# Note: The match function requires GNU awk (gawk) to be able to capture groups in regexes.
batchfile.write(
"mv {0}/assembly/contigs.fasta {0}/assembly/{1}_contigs.fasta\n".format(
self.finishdir, self.name
)
"gawk " +
"'/^>/ { match($0, /Contig_([0-9]+)_([0-9\.]+)/, m) } " +
"!/^>/ { seqlen=length($0); print \">NODE_\" m[1] \"_length_\" seqlen \"_cov_\" m[2]; print $0; }' " +
f"{contigs_file_raw} > {contigs_file}\n"
)

# Keep only the 999(?) top contigs to avoid really low-quality contigs
batchfile.write(
"sed -n '/NODE_1000_/q;p' {0}/assembly/{1}_contigs.fasta > {0}/assembly/{1}_trimmed_contigs.fasta\n".format(
self.finishdir, self.name
)
f"sed -n '/NODE_1000_/q;p' {contigs_file} > {contigs_trimmed_file}\n"
)
# batchfile.write("##Input cleanup\n")
# batchfile.write("rm -r {}/trimmed\n".format(self.finishdir))
Expand Down
2 changes: 1 addition & 1 deletion microSALT/utils/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,7 @@ def scrape_blast(self, type="", file_list=[]):
# Identical identity and span, seperating based on contig coverage
else:
# Rightmost is worse
if float(hypo[ind].get("contig_coverage")) >= float(
if float(hypo[ind].get("contig_coverage")) > float(
hypo[targ].get("contig_coverage")
):
del hypo[targ]
Expand Down
3 changes: 3 additions & 0 deletions tests/test_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,13 @@ def test_blast_scraping(scraper, testdata_prefix, caplog):
caplog.set_level(logging.DEBUG)
scraper.scrape_blast(type='seq_type',file_list=["{}/blast_single_loci.txt".format(testdata_prefix)])
assert "candidate" in caplog.text

caplog.clear()
hits = scraper.scrape_blast(type='resistance',file_list=["{}/blast_single_resistance.txt".format(testdata_prefix)])
genes = [h["gene"] for h in hits]

assert "blaOXA-48" in genes
assert "blaVIM-4" in genes

def test_alignment_scraping(scraper, init_references, testdata_prefix):
scraper.scrape_alignment(file_list=glob.glob("{}/*.stats.*".format(testdata_prefix)))
88 changes: 88 additions & 0 deletions tests/testdata/blast_single_resistance.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,91 @@ blaOXA-232_1_JX423831 minus NODE_65_length_2231_cov_99.410745 blaOXA-232_1_JX423
blaOXA-484_1_KR401105 minus NODE_65_length_2231_cov_99.410745 blaOXA-484_1_KR401105 94.368 0.0 1225 1409 2206 798 1 799
blaOXA-535_1_KX828709 minus NODE_65_length_2231_cov_99.410745 blaOXA-535_1_KX828709 84.856 0.0 804 1409 2206 798 1 799
blaOXA-436_1_KT959105 minus NODE_65_length_2231_cov_99.410745 blaOXA-436_1_KT959105 84.481 0.0 787 1409 2206 798 1 799
# BLASTN 2.12.0+
# Query: NODE_13_length_813_cov_66.2018
# Database: /tmp/beta-lactam
# Fields: subject title, subject strand, query acc.ver, subject acc.ver, % identity, evalue, bit score, q. start, q. end, s. start, s. end, alignment length
# 25 hits found
blaVIM-57_1_MH450217 plus NODE_13_length_813_cov_66.2018 blaVIM-57_1_MH450217 100.000 1.03e-19 93.5 764 813 1 50 50
blaVIM-49_1_KU663374 plus NODE_13_length_813_cov_66.2018 blaVIM-49_1_KU663374 100.000 1.03e-19 93.5 764 813 1 50 50
blaVIM-52_1_KX349731 plus NODE_13_length_813_cov_66.2018 blaVIM-52_1_KX349731 100.000 1.03e-19 93.5 764 813 1 50 50
blaVIM-54_1_KY508061 plus NODE_13_length_813_cov_66.2018 blaVIM-54_1_KY508061 100.000 1.03e-19 93.5 764 813 1 50 50
blaVIM-39_1_KF131539 plus NODE_13_length_813_cov_66.2018 blaVIM-39_1_KF131539 100.000 1.03e-19 93.5 764 813 1 50 50
blaVIM-40_1_HG934765 plus NODE_13_length_813_cov_66.2018 blaVIM-40_1_HG934765 100.000 1.03e-19 93.5 764 813 1 50 50
blaVIM-42_1_KP071470 plus NODE_13_length_813_cov_66.2018 blaVIM-42_1_KP071470 100.000 1.03e-19 93.5 764 813 1 50 50
blaVIM-43_1_KP096412 plus NODE_13_length_813_cov_66.2018 blaVIM-43_1_KP096412 100.000 1.03e-19 93.5 764 813 1 50 50
blaVIM-37_1_JX982636 plus NODE_13_length_813_cov_66.2018 blaVIM-37_1_JX982636 100.000 1.03e-19 93.5 764 813 1 50 50
blaVIM-35_1_JX982634 plus NODE_13_length_813_cov_66.2018 blaVIM-35_1_JX982634 100.000 1.03e-19 93.5 764 813 1 50 50
blaVIM-34_1_JX013656 plus NODE_13_length_813_cov_66.2018 blaVIM-34_1_JX013656 100.000 1.03e-19 93.5 764 813 1 50 50
blaVIM-33_1_JX258134 plus NODE_13_length_813_cov_66.2018 blaVIM-33_1_JX258134 100.000 1.03e-19 93.5 764 813 1 50 50
blaVIM-32_1_JN676230 plus NODE_13_length_813_cov_66.2018 blaVIM-32_1_JN676230 100.000 1.03e-19 93.5 764 813 1 50 50
blaVIM-29_1_JX311308 plus NODE_13_length_813_cov_66.2018 blaVIM-29_1_JX311308 100.000 1.03e-19 93.5 764 813 1 50 50
blaVIM-28_1_JF900599 plus NODE_13_length_813_cov_66.2018 blaVIM-28_1_JF900599 100.000 1.03e-19 93.5 764 813 1 50 50
blaVIM-27_1_HQ858608 plus NODE_13_length_813_cov_66.2018 blaVIM-27_1_HQ858608 100.000 1.03e-19 93.5 764 813 1 50 50
blaVIM-26_1_FR748153 plus NODE_13_length_813_cov_66.2018 blaVIM-26_1_FR748153 100.000 1.03e-19 93.5 764 813 1 50 50
blaVIM-25_1_HM750249 plus NODE_13_length_813_cov_66.2018 blaVIM-25_1_HM750249 100.000 1.03e-19 93.5 764 813 1 50 50
blaVIM-19_1_FJ499397 plus NODE_13_length_813_cov_66.2018 blaVIM-19_1_FJ499397 100.000 1.03e-19 93.5 764 813 1 50 50
blaVIM-14_1_FJ445404 plus NODE_13_length_813_cov_66.2018 blaVIM-14_1_FJ445404 100.000 1.03e-19 93.5 764 813 1 50 50
blaVIM-12_1_DQ143913 plus NODE_13_length_813_cov_66.2018 blaVIM-12_1_DQ143913 100.000 1.03e-19 93.5 764 813 1 50 50
blaVIM-5_1_DQ023222 plus NODE_13_length_813_cov_66.2018 blaVIM-5_1_DQ023222 100.000 1.03e-19 93.5 764 813 1 50 50
blaVIM-4_1_EU581706 plus NODE_13_length_813_cov_66.2018 blaVIM-4_1_EU581706 100.000 1.03e-19 93.5 764 813 1 50 50
blaVIM-1_1_Y18050 plus NODE_13_length_813_cov_66.2018 blaVIM-1_1_Y18050 100.000 1.03e-19 93.5 764 813 1 50 50
blaVIM-38_1_KC469971 plus NODE_13_length_813_cov_66.2018 blaVIM-38_1_KC469971 96.000 2.22e-16 82.4 764 813 1 50 50
# BLASTN 2.12.0+
# Query: NODE_38_length_14835_cov_85.885
# Database: /tmp/beta-lactam
# Fields: subject title, subject strand, query acc.ver, subject acc.ver, % identity, evalue, bit score, q. start, q. end, s. start, s. end, alignment length
# 53 hits found
blaVIM-4_1_EU581706 minus NODE_38_length_14835_cov_85.885 blaVIM-4_1_EU581706 99.875 0.0 1474 11925 12725 801 1 801
blaVIM-54_1_KY508061 minus NODE_38_length_14835_cov_85.885 blaVIM-54_1_KY508061 99.750 0.0 1469 11925 12725 801 1 801
blaVIM-40_1_HG934765 minus NODE_38_length_14835_cov_85.885 blaVIM-40_1_HG934765 99.750 0.0 1469 11925 12725 801 1 801
blaVIM-43_1_KP096412 minus NODE_38_length_14835_cov_85.885 blaVIM-43_1_KP096412 99.750 0.0 1469 11925 12725 801 1 801
blaVIM-37_1_JX982636 minus NODE_38_length_14835_cov_85.885 blaVIM-37_1_JX982636 99.750 0.0 1469 11925 12725 801 1 801
blaVIM-28_1_JF900599 minus NODE_38_length_14835_cov_85.885 blaVIM-28_1_JF900599 99.750 0.0 1469 11925 12725 801 1 801
blaVIM-19_1_FJ499397 minus NODE_38_length_14835_cov_85.885 blaVIM-19_1_FJ499397 99.750 0.0 1469 11925 12725 801 1 801
blaVIM-14_1_FJ445404 minus NODE_38_length_14835_cov_85.885 blaVIM-14_1_FJ445404 99.750 0.0 1469 11925 12725 801 1 801
blaVIM-1_1_Y18050 minus NODE_38_length_14835_cov_85.885 blaVIM-1_1_Y18050 99.750 0.0 1469 11925 12725 801 1 801
blaVIM-57_1_MH450217 minus NODE_38_length_14835_cov_85.885 blaVIM-57_1_MH450217 99.625 0.0 1463 11925 12725 801 1 801
blaVIM-52_1_KX349731 minus NODE_38_length_14835_cov_85.885 blaVIM-52_1_KX349731 99.625 0.0 1463 11925 12725 801 1 801
blaVIM-42_1_KP071470 minus NODE_38_length_14835_cov_85.885 blaVIM-42_1_KP071470 99.625 0.0 1463 11925 12725 801 1 801
blaVIM-35_1_JX982634 minus NODE_38_length_14835_cov_85.885 blaVIM-35_1_JX982634 99.625 0.0 1463 11925 12725 801 1 801
blaVIM-34_1_JX013656 minus NODE_38_length_14835_cov_85.885 blaVIM-34_1_JX013656 99.625 0.0 1463 11925 12725 801 1 801
blaVIM-33_1_JX258134 minus NODE_38_length_14835_cov_85.885 blaVIM-33_1_JX258134 99.625 0.0 1463 11925 12725 801 1 801
blaVIM-32_1_JN676230 minus NODE_38_length_14835_cov_85.885 blaVIM-32_1_JN676230 99.625 0.0 1463 11925 12725 801 1 801
blaVIM-27_1_HQ858608 minus NODE_38_length_14835_cov_85.885 blaVIM-27_1_HQ858608 99.625 0.0 1463 11925 12725 801 1 801
blaVIM-26_1_FR748153 minus NODE_38_length_14835_cov_85.885 blaVIM-26_1_FR748153 99.625 0.0 1463 11925 12725 801 1 801
blaVIM-39_1_KF131539 minus NODE_38_length_14835_cov_85.885 blaVIM-39_1_KF131539 99.501 0.0 1458 11925 12725 801 1 801
blaVIM-29_1_JX311308 minus NODE_38_length_14835_cov_85.885 blaVIM-29_1_JX311308 99.501 0.0 1458 11925 12725 801 1 801
blaVIM-5_1_DQ023222 minus NODE_38_length_14835_cov_85.885 blaVIM-5_1_DQ023222 98.002 0.0 1391 11925 12725 801 1 801
blaVIM-49_1_KU663374 minus NODE_38_length_14835_cov_85.885 blaVIM-49_1_KU663374 97.878 0.0 1386 11925 12725 801 1 801
blaVIM-12_1_DQ143913 minus NODE_38_length_14835_cov_85.885 blaVIM-12_1_DQ143913 97.878 0.0 1386 11925 12725 801 1 801
blaVIM-38_1_KC469971 minus NODE_38_length_14835_cov_85.885 blaVIM-38_1_KC469971 97.503 0.0 1369 11925 12725 801 1 801
blaVIM-25_1_HM750249 minus NODE_38_length_14835_cov_85.885 blaVIM-25_1_HM750249 96.504 0.0 1325 11925 12725 801 1 801
blaVIM-13_1_DQ365886 minus NODE_38_length_14835_cov_85.885 blaVIM-13_1_DQ365886 93.134 0.0 1175 11925 12725 801 1 801
blaVIM-47_1_KT954134 minus NODE_38_length_14835_cov_85.885 blaVIM-47_1_KT954134 92.884 0.0 1164 11925 12725 801 1 801
blaVIM-36_1_JX982635 minus NODE_38_length_14835_cov_85.885 blaVIM-36_1_JX982635 92.634 0.0 1153 11925 12725 801 1 801
blaVIM-30_1_JN129451 minus NODE_38_length_14835_cov_85.885 blaVIM-30_1_JN129451 92.634 0.0 1153 11925 12725 801 1 801
blaVIM-31_1_JN982330 minus NODE_38_length_14835_cov_85.885 blaVIM-31_1_JN982330 92.509 0.0 1147 11925 12725 801 1 801
blaVIM-6_1_AY165025 minus NODE_38_length_14835_cov_85.885 blaVIM-6_1_AY165025 92.509 0.0 1147 11925 12725 801 1 801
blaVIM-2_1_AF302086 minus NODE_38_length_14835_cov_85.885 blaVIM-2_1_AF302086 92.509 0.0 1147 11925 12725 801 1 801
blaVIM-51_1_KU746270 minus NODE_38_length_14835_cov_85.885 blaVIM-51_1_KU746270 92.385 0.0 1142 11925 12725 801 1 801
blaVIM-41_1_KP771862 minus NODE_38_length_14835_cov_85.885 blaVIM-41_1_KP771862 92.385 0.0 1142 11925 12725 801 1 801
blaVIM-45_1_KP681695 minus NODE_38_length_14835_cov_85.885 blaVIM-45_1_KP681695 92.385 0.0 1142 11925 12725 801 1 801
blaVIM-46_1_KP749829 minus NODE_38_length_14835_cov_85.885 blaVIM-46_1_KP749829 92.385 0.0 1142 11925 12725 801 1 801
blaVIM-24_1_HM855205 minus NODE_38_length_14835_cov_85.885 blaVIM-24_1_HM855205 92.385 0.0 1142 11925 12725 801 1 801
blaVIM-23_1_GQ242167 minus NODE_38_length_14835_cov_85.885 blaVIM-23_1_GQ242167 92.385 0.0 1142 11925 12725 801 1 801
blaVIM-20_1_GQ414736 minus NODE_38_length_14835_cov_85.885 blaVIM-20_1_GQ414736 92.385 0.0 1142 11925 12725 801 1 801
blaVIM-17_1_EU118148 minus NODE_38_length_14835_cov_85.885 blaVIM-17_1_EU118148 92.385 0.0 1142 11925 12725 801 1 801
blaVIM-16_1_EU419746 minus NODE_38_length_14835_cov_85.885 blaVIM-16_1_EU419746 92.385 0.0 1142 11925 12725 801 1 801
blaVIM-15_1_EU419745 minus NODE_38_length_14835_cov_85.885 blaVIM-15_1_EU419745 92.385 0.0 1142 11925 12725 801 1 801
blaVIM-11_1_AY605049 minus NODE_38_length_14835_cov_85.885 blaVIM-11_1_AY605049 92.385 0.0 1142 11925 12725 801 1 801
blaVIM-10_1_AY524989 minus NODE_38_length_14835_cov_85.885 blaVIM-10_1_AY524989 92.385 0.0 1142 11925 12725 801 1 801
blaVIM-9_1_AY524988 minus NODE_38_length_14835_cov_85.885 blaVIM-9_1_AY524988 92.385 0.0 1142 11925 12725 801 1 801
blaVIM-50_1_KU663375 minus NODE_38_length_14835_cov_85.885 blaVIM-50_1_KU663375 92.260 0.0 1136 11925 12725 801 1 801
blaVIM-44_1_KP681696 minus NODE_38_length_14835_cov_85.885 blaVIM-44_1_KP681696 92.260 0.0 1136 11925 12725 801 1 801
blaVIM-3_1_AF300454 minus NODE_38_length_14835_cov_85.885 blaVIM-3_1_AF300454 92.260 0.0 1136 11925 12725 801 1 801
blaVIM-56_1_MG834535 minus NODE_38_length_14835_cov_85.885 blaVIM-56_1_MG834535 92.621 0.0 1131 11925 12710 801 16 786
blaVIM-8_1_AY524987 minus NODE_38_length_14835_cov_85.885 blaVIM-8_1_AY524987 92.135 0.0 1131 11925 12725 801 1 801
blaVIM-48_1_KY362199 minus NODE_38_length_14835_cov_85.885 blaVIM-48_1_KY362199 92.755 0.0 1118 11953 12725 773 1 773
blaVIM-18_1_AM778091 minus NODE_38_length_14835_cov_85.885 blaVIM-18_1_AM778091 91.261 0.0 1081 11925 12725 789 1 801
blaVIM-7_1_AJ536835 minus NODE_38_length_14835_cov_85.885 blaVIM-7_1_AJ536835 80.563 3.40e-155 547 11926 12635 797 88 710

0 comments on commit edb9256

Please sign in to comment.