Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUSCO: Expose log as output and add parameter to remove intermediate files #7470

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions modules/nf-core/busco/busco/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,13 @@ process BUSCO_BUSCO {
val lineage // Required: lineage for checking against, or "auto/auto_prok/auto_euk" for enabling auto-lineage
path busco_lineages_path // Recommended: busco lineages file - downloads if not set
path config_file // Optional: busco configuration file
val clean_intermediates // Optional: Remove intermediate files

output:
tuple val(meta), path("*-busco.batch_summary.txt") , emit: batch_summary
tuple val(meta), path("short_summary.*.txt") , emit: short_summaries_txt , optional: true
tuple val(meta), path("short_summary.*.json") , emit: short_summaries_json , optional: true
tuple val(meta), path("*-busco/logs/busco.log") , emit: log , optional: true
tuple val(meta), path("*-busco/*/run_*/full_table.tsv") , emit: full_table , optional: true
tuple val(meta), path("*-busco/*/run_*/missing_busco_list.tsv") , emit: missing_busco_list , optional: true
tuple val(meta), path("*-busco/*/run_*/single_copy_proteins.faa") , emit: single_copy_proteins , optional: true
Expand All @@ -42,6 +44,7 @@ process BUSCO_BUSCO {
? lineage.replaceFirst('auto', '--auto-lineage').replaceAll('_', '-')
: "--lineage_dataset ${lineage}"
def busco_lineage_dir = busco_lineages_path ? "--download_path ${busco_lineages_path}" : ''
def clean_cmd = clean_intermediates ? 'rm -fr ./*-busco/*/auto_lineage ./*-busco/*/**/{miniprot,hmmer,.bbtools}_output' : ''
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suppose Busco doesn't provide a smarter way to clean up, correct?

"""
# Nextflow changes the container --entrypoint to /bin/bash (container default entrypoint: /usr/local/env-execute)
# Check for container variable initialisation script and source it.
Expand Down Expand Up @@ -85,6 +88,7 @@ process BUSCO_BUSCO {

# clean up
rm -rf "\$INPUT_SEQS"
${clean_cmd}

# Move files to avoid staging/publishing issues
mv ${prefix}-busco/batch_summary.txt ${prefix}-busco.batch_summary.txt
Expand Down
13 changes: 13 additions & 0 deletions modules/nf-core/busco/busco/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ input:
- - config_file:
type: file
description: Path to BUSCO config file.
- - clean_intermediates:
type: boolean
description: Flag to remove intermediate files.
output:
- batch_summary:
- meta:
Expand Down Expand Up @@ -71,6 +74,16 @@ output:
type: file
description: Short Busco summary in JSON format
pattern: "short_summary.*.json"
- log:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test' ]
- "*-busco/logs/busco.log":
type: file
description: BUSCO main log
pattern: "*-busco/logs/busco.log"
- full_table:
- meta:
type: map
Expand Down
19 changes: 13 additions & 6 deletions modules/nf-core/busco/busco/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ nextflow_process {
input[2] = 'bacteria_odb12' // Launch with 'auto' to use --auto-lineage, and specified lineages // 'auto' removed from test due to memory issues
input[3] = [] // Download busco lineage
input[4] = [] // No config
input[5] = true // Clean intermediates
"""
}
}
Expand Down Expand Up @@ -59,7 +60,7 @@ nextflow_process {
assert contains('fragmented_busco_sequences.tar.gz')
}

with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) {
with(path(process.out.log[0][1]).text) {
assert contains('DEBUG:busco.run_BUSCO')
assert contains('Results from dataset')
assert contains('how to cite BUSCO')
Expand Down Expand Up @@ -88,6 +89,7 @@ nextflow_process {
input[2] = 'bacteria_odb12'
input[3] = []
input[4] = []
input[5] = false
"""
}
}
Expand Down Expand Up @@ -143,7 +145,7 @@ nextflow_process {
assert contains('fragmented_busco_sequences.tar.gz')
}

with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) {
with(path(process.out.log[0][1]).text) {
assert contains('DEBUG:busco.run_BUSCO')
assert contains('Results from dataset')
assert contains('how to cite BUSCO')
Expand All @@ -170,6 +172,7 @@ nextflow_process {
input[2] = 'eukaryota_odb10'
input[3] = []
input[4] = []
input[5] = false
"""
}
}
Expand Down Expand Up @@ -205,7 +208,7 @@ nextflow_process {
assert contains('fragmented_busco_sequences.tar.gz')
}

with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) {
with(path(process.out.log[0][1]).text) {
assert contains('DEBUG:busco.run_BUSCO')
assert contains('Results from dataset')
assert contains('how to cite BUSCO')
Expand Down Expand Up @@ -233,6 +236,7 @@ nextflow_process {
input[2] = 'eukaryota_odb10'
input[3] = []
input[4] = []
input[5] = false
"""
}
}
Expand All @@ -245,7 +249,7 @@ nextflow_process {
process.out.versions[0]
).match()

with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) {
with(path(process.out.log[0][1]).text) {
assert contains('DEBUG:busco.run_BUSCO')
assert contains('Augustus did not recognize any genes')

Expand Down Expand Up @@ -276,6 +280,7 @@ nextflow_process {
input[2] = 'bacteria_odb12'
input[3] = []
input[4] = []
input[5] = false
"""
}
}
Expand Down Expand Up @@ -311,7 +316,7 @@ nextflow_process {
assert contains('fragmented_busco_sequences.tar.gz')
}

with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) {
with(path(process.out.log[0][1]).text) {
assert contains('DEBUG:busco.run_BUSCO')
assert contains('Results from dataset')
assert contains('how to cite BUSCO')
Expand All @@ -338,6 +343,7 @@ nextflow_process {
input[2] = 'bacteria_odb12'
input[3] = []
input[4] = []
input[5] = false
"""
}
}
Expand Down Expand Up @@ -375,7 +381,7 @@ nextflow_process {
assert contains('fragmented_busco_sequences.tar.gz')
}

with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) {
with(path(process.out.log[0][1]).text) {
assert contains('DEBUG:busco.run_BUSCO')
assert contains('Results from dataset')
assert contains('how to cite BUSCO')
Expand All @@ -399,6 +405,7 @@ nextflow_process {
input[2] = 'bacteria_odb12'
input[3] = []
input[4] = []
input[5] = false
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see any test with cleanup on, is it possible to make one?

"""
}
}
Expand Down
4 changes: 2 additions & 2 deletions modules/nf-core/busco/busco/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
],
"meta": {
"nf-test": "0.9.2",
"nextflow": "24.10.2"
"nextflow": "24.10.4"
},
"timestamp": "2024-12-13T15:30:45.505241761"
"timestamp": "2025-02-11T12:12:14.257383199"
},
"test_busco_eukaryote_augustus": {
"content": [
Expand Down
1 change: 1 addition & 0 deletions modules/nf-core/busco/generateplot/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ nextflow_process {
input[2] = 'bacteria_odb10'
input[3] = []
input[4] = []
input[5] = false
"""
}
}
Expand Down
30 changes: 20 additions & 10 deletions subworkflows/nf-core/fasta_gxf_busco_plot/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ workflow FASTA_GXF_BUSCO_PLOT {
val_lineages // [ val(lineage) ]
val_busco_lineages_path // val(path); Optional; Set to [] if not needed
val_busco_config // val(path); Optional; Set to [] if not needed
val_busco_cleanup // val(boolean); Set to true to remove BUSCO intermediate files

main:
ch_versions = Channel.empty()
Expand All @@ -32,6 +33,7 @@ workflow FASTA_GXF_BUSCO_PLOT {
ch_config_path = val_busco_config
? Channel.of(file(val_busco_config, checkIfExists: true))
: Channel.of( [ [] ] )
ch_busco_cleanup = Channel.of([val_busco_cleanup])

// MODULE: BUSCO_BUSCO as BUSCO_ASSEMBLY
ch_busco_assembly_inputs = ch_fasta
Expand All @@ -53,13 +55,17 @@ workflow FASTA_GXF_BUSCO_PLOT {
| combine(
ch_config_path
)
| combine(
ch_busco_cleanup
)

BUSCO_ASSEMBLY(
ch_busco_assembly_inputs.map { meta, fasta, _mode, _lineage, _db, _config -> [ meta, fasta ] },
ch_busco_assembly_inputs.map { _meta, _fasta, mode, _lineage, _db, _config -> mode },
ch_busco_assembly_inputs.map { _meta, _fasta, _mode, lineage, _db, _config -> lineage },
ch_busco_assembly_inputs.map { _meta, _fasta, _mode, _lineage, db, _config -> db },
ch_busco_assembly_inputs.map { _meta, _fasta, _mode, _lineage, _db, config -> config }
ch_busco_assembly_inputs.map { meta, fasta, _mode, _lineage, _db, _config, _cleanup -> [ meta, fasta ] },
ch_busco_assembly_inputs.map { _meta, _fasta, mode, _lineage, _db, _config, _cleanup -> mode },
ch_busco_assembly_inputs.map { _meta, _fasta, _mode, lineage, _db, _config, _cleanup -> lineage },
ch_busco_assembly_inputs.map { _meta, _fasta, _mode, _lineage, db, _config, _cleanup -> db },
ch_busco_assembly_inputs.map { _meta, _fasta, _mode, _lineage, _db, config, _cleanup -> config },
ch_busco_assembly_inputs.map { _meta, _fasta, _mode, _lineage, _db, _config, cleanup -> cleanup }
)

ch_assembly_batch_summary = BUSCO_ASSEMBLY.out.batch_summary
Expand Down Expand Up @@ -123,13 +129,17 @@ workflow FASTA_GXF_BUSCO_PLOT {
| combine(
ch_config_path
)
| combine(
ch_busco_cleanup
)

BUSCO_ANNOTATION(
ch_busco_annotation_inputs.map { meta, fasta, _mode, _lineage, _db, _config -> [ meta, fasta ] },
ch_busco_annotation_inputs.map { _meta, _fasta, mode, _lineage, _db, _config -> mode },
ch_busco_annotation_inputs.map { _meta, _fasta, _mode, lineage, _db, _config -> lineage },
ch_busco_annotation_inputs.map { _meta, _fasta, _mode, _lineage, db, _config -> db },
ch_busco_annotation_inputs.map { _meta, _fasta, _mode, _lineage, _db, config -> config }
ch_busco_annotation_inputs.map { meta, fasta, _mode, _lineage, _db, _config, _cleanup -> [ meta, fasta ] },
ch_busco_annotation_inputs.map { _meta, _fasta, mode, _lineage, _db, _config, _cleanup -> mode },
ch_busco_annotation_inputs.map { _meta, _fasta, _mode, lineage, _db, _config, _cleanup -> lineage },
ch_busco_annotation_inputs.map { _meta, _fasta, _mode, _lineage, db, _config, _cleanup -> db },
ch_busco_annotation_inputs.map { _meta, _fasta, _mode, _lineage, _db, config, _cleanup -> config },
ch_busco_annotation_inputs.map { _meta, _fasta, _mode, _lineage, _db, _config, cleanup -> cleanup }
)

ch_annotation_batch_summary = BUSCO_ANNOTATION.out.batch_summary
Expand Down
5 changes: 5 additions & 0 deletions subworkflows/nf-core/fasta_gxf_busco_plot/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@ input:
description: |
Path to BUSCO config. It is optional and can be set to `[]`
Structure:val(busco_config)
- val_busco_cleanup:
type: boolean
description: |
Flag to indicate if intermediate BUSCO files should be removed
Structure:val(busco_cleanup)
output:
- assembly_batch_summary:
type: file
Expand Down
2 changes: 2 additions & 0 deletions subworkflows/nf-core/fasta_gxf_busco_plot/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ nextflow_workflow {
input[3] = [ 'bacteria_odb10', 'archaea_odb10' ]
input[4] = []
input[5] = []
input[6] = false
"""
}
}
Expand Down Expand Up @@ -100,6 +101,7 @@ nextflow_workflow {
input[3] = [ 'bacteria_odb10', 'archaea_odb10' ]
input[4] = []
input[5] = []
input[6] = false
"""
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,9 +180,9 @@
],
"meta": {
"nf-test": "0.9.2",
"nextflow": "24.10.1"
"nextflow": "24.10.4"
},
"timestamp": "2024-12-11T12:47:54.913113837"
"timestamp": "2025-02-11T12:15:06.501270463"
},
"candidatus_portiera_aleyrodidarum - bacteroides_fragilis - genome": {
"content": [
Expand Down
Loading