Skip to content

Commit

Permalink
Added and fixed nf-tests
Browse files Browse the repository at this point in the history
  • Loading branch information
CarsonJM committed Jan 4, 2024
1 parent 40b4ebf commit 2188e59
Show file tree
Hide file tree
Showing 27 changed files with 1,653 additions and 412 deletions.
2 changes: 2 additions & 0 deletions bin/combine_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ def main(args=None):
results, gtdbtk_results, left_on="bin", right_on="user_genome", how="outer"
) # assuming depths for all bins are given

# sort results for reproducibility
results.sort_values(by='bin', inplace=True, ignore_index=True)
results.to_csv(args.out, sep="\t")


Expand Down
2 changes: 2 additions & 0 deletions bin/summary_busco.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,8 @@ def main(args=None):
else:
df_final = df_specific.append(df_failed)

# sort output file for reproducibility
df_final.sort_values(by='GenomeBin', inplace=True)
df_final.to_csv(args.out, sep="\t", index=False)


Expand Down
7 changes: 7 additions & 0 deletions conf/base.config
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,13 @@ process {
time = { check_max (16.h * task.attempt, 'time' ) }
errorStrategy = { task.exitStatus in [143,137,104,134,139,250] ? 'retry' : 'finish' }
}
//bowtie2 returns exit code 250 when running out of memory
withName: BOWTIE2_PHIX_REMOVAL_ALIGN {
cpus = { check_bowtie2_cpus (8, task.attempt ) }
memory = { check_max (40.GB * task.attempt, 'memory' ) }
time = { check_max (16.h * task.attempt, 'time' ) }
errorStrategy = { task.exitStatus in [143,137,104,134,139,250] ? 'retry' : 'finish' }
}
//MEGAHIT returns exit code 250 when running out of memory
withName: MEGAHIT {
cpus = { check_megahit_cpus (8, task.attempt ) }
Expand Down
12 changes: 7 additions & 5 deletions conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,16 @@ params {
centrifuge_db = "https://raw.githubusercontent.com/nf-core/test-datasets/mag/test_data/minigut_cf.tar.gz"
kraken2_db = "https://raw.githubusercontent.com/nf-core/test-datasets/mag/test_data/minigut_kraken.tgz"
skip_krona = true
min_length_unbinned_contigs = 1
megahit_fix_cpu_1 = true
bowtie2_fix_cpu_1 = true
metabat2_fix_cpu_1 = true
maxbin2_fix_cpu_1 = true
concoct_fix_cpu_1 = true
binning_map_mode = 'own'
min_length_unbinned_contigs = 1000000
max_unbinned_contigs = 2
busco_db = "https://busco-data.ezlab.org/v5/data/lineages/bacteria_odb10.2020-03-06.tar.gz"
busco_clean = true
skip_gtdbtk = true
skip_concoct = true
megahit_fix_cpu_1 = true
spades_fix_cpus = 2
spadeshybrid_fix_cpus = 2
metabat_rng_seed = 1
}
3 changes: 1 addition & 2 deletions conf/test_adapterremoval.config
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,9 @@ params {
max_time = '6.h'

// Input data
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/mag/samplesheets/samplesheet.multirun.csv'
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/mag/samplesheets/samplesheet.euk.csv'
clip_tool = 'adapterremoval'
keep_phix = true
skip_adapter_trimming = true
centrifuge_db = null
kraken2_db = null
skip_krona = true
Expand Down
54 changes: 30 additions & 24 deletions conf/test_ancient_dna.config
Original file line number Diff line number Diff line change
Expand Up @@ -15,31 +15,37 @@ params {
config_profile_description = 'Minimal test dataset to check pipeline function for ancient DNA step'

// Limit resources so that this can run on GitHub Actions
//max_cpus = 2
//max_memory = '6.GB'
max_cpus = 2
max_memory = '6.GB'
max_time = '6.h'

// Input data
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/mag/samplesheets/samplesheet.csv'
skip_clipping = true
keep_phix = true
kraken2_db = null
centrifuge_db = null
skip_krona = true
megahit_fix_cpu_1 = true
skip_spades = false
spades_fix_cpus = 1
skip_spadeshybrid = true
ancient_dna = true
skip_quast = true
skip_prodigal = true
bowtie2_fix_cpu_1 = true
binning_map_mode = 'own'
metabat2_fix_cpu_1 = true
maxbin2_fix_cpu_1 = true
concoct_fix_cpu_1 = true
skip_binqc = true
skip_gtdbtk = true
skip_prokka = true
skip_metaeuk = true
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/mag/samplesheets/samplesheet.csv'
skip_clipping = true
keep_phix = true
kraken2_db = null
centrifuge_db = null
skip_krona = true
megahit_fix_cpu_1 = true
spades_fix_cpus = 1
skip_spadeshybrid = true
ancient_dna = true
skip_quast = true
skip_prodigal = true
bowtie2_fix_cpu_1 = true
binning_map_mode = 'own'
metabat2_fix_cpu_1 = true
maxbin2_fix_cpu_1 = true
concoct_fix_cpu_1 = true
bcftools_view_high_variant_quality = 0
bcftools_view_medium_variant_quality = 0
bcftools_view_minimal_allelesupport = 3
refine_bins_dastool = true
refine_bins_dastool_threshold = 0
min_length_unbinned_contigs = 1
max_unbinned_contigs = 2
skip_binqc = true
skip_gtdbtk = true
skip_prokka = true
skip_metaeuk = true
}
2 changes: 1 addition & 1 deletion conf/test_bbnorm.config
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ params {
max_time = '6.h'

// Input data
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/mag/samplesheets/samplesheet.multirun.csv'
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/mag/samplesheets/samplesheet.csv'
bbnorm = true
keep_phix = true
skip_adapter_trimming = true
Expand Down
41 changes: 27 additions & 14 deletions conf/test_binrefinement.config
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,31 @@ params {
max_time = '6.h'

// Input data
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/mag/samplesheets/samplesheet.csv'
assembly_input = 'https://raw.githubusercontent.com/nf-core/test-datasets/mag/samplesheets/assembly_samplesheet.csv'
centrifuge_db = "https://raw.githubusercontent.com/nf-core/test-datasets/mag/test_data/minigut_cf.tar.gz"
kraken2_db = "https://raw.githubusercontent.com/nf-core/test-datasets/mag/test_data/minigut_kraken.tgz"
skip_krona = true
min_length_unbinned_contigs = 1
max_unbinned_contigs = 2
busco_db = "https://busco-data.ezlab.org/v5/data/lineages/bacteria_odb10.2020-03-06.tar.gz"
skip_gtdbtk = true
refine_bins_dastool = true
refine_bins_dastool_threshold = 0
// TODO not using 'both' until #489 merged
postbinning_input = 'refined_bins_only'
busco_clean = true
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/mag/samplesheets/samplesheet.csv'
assembly_input = 'https://raw.githubusercontent.com/nf-core/test-datasets/mag/samplesheets/assembly_samplesheet.csv'
bbnorm = true
keep_phix = true
skip_adapter_trimming = true
centrifuge_db = null
kraken2_db = null
skip_krona = true
skip_quast = true
skip_prodigal = true
bowtie2_fix_cpu_1 = true
metabat2_fix_cpu_1 = true
maxbin2_fix_cpu_1 = true
concoct_fix_cpu_1 = true
binning_map_mode = 'own'
min_length_unbinned_contigs = 1
max_unbinned_contigs = 2
refine_bins_dastool = true
refine_bins_dastool_threshold = 0
// TODO not using 'both' until #489 merged
postbinning_input = 'refined_bins_only'
skip_busco = true
skip_binqc = true
skip_gtdbtk = true
skip_prokka = true
skip_metaeuk = true

}
31 changes: 22 additions & 9 deletions conf/test_busco_auto.config
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,26 @@ params {
max_time = '6.h'

// Input data
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/mag/samplesheets/samplesheet.csv'
skip_spades = true
min_length_unbinned_contigs = 1
max_unbinned_contigs = 2
skip_gtdbtk = true
skip_prokka = true
skip_prodigal = true
skip_quast = true
skip_concoct = true
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/mag/samplesheets/samplesheet.csv'
keep_phix = true
skip_adapter_trimming = true
centrifuge_db = null
kraken2_db = null
skip_krona = true
megahit_fix_cpu_1 = true
skip_spades = true
skip_spadeshybrid = true
skip_quast = true
skip_prodigal = true
bowtie2_fix_cpu_1 = true
metabat2_fix_cpu_1 = true
maxbin2_fix_cpu_1 = true
concoct_fix_cpu_1 = true
binning_map_mode = 'own'
min_length_unbinned_contigs = 1
max_unbinned_contigs = 2
skip_concoct = true
skip_gtdbtk = true
skip_prokka = true
skip_metaeuk = true
}
6 changes: 2 additions & 4 deletions conf/test_virus_identification.config
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,6 @@ params {

// Input data
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/mag/samplesheets/samplesheet.multirun.csv'
run_virus_identification = true
genomad_splits = 7

// For computational efficiency
keep_phix = true
skip_clipping = true
skip_adapter_trimming = true
Expand All @@ -36,6 +32,8 @@ params {
skip_spades = true
skip_spadeshybrid = true
skip_quast = true
run_virus_identification = true
genomad_splits = 7
skip_prodigal = true
skip_binning = true
skip_binqc = true
Expand Down
1 change: 0 additions & 1 deletion subworkflows/local/busco_qc.nf
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@ workflow BUSCO_QC {
)

emit:
summary_specific = BUSCO.out.summary_specific
summary = BUSCO_SUMMARY.out.summary
failed_bin = BUSCO.out.failed_bin.map{it[1]}
multiqc = BUSCO.out.summary_domain.mix(BUSCO.out.summary_specific).map{it[1]}
Expand Down
33 changes: 4 additions & 29 deletions workflows/mag/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -225,10 +225,11 @@ if(params.metaeuk_db && !params.skip_metaeuk) {
ch_binning_results_unbins = Channel.empty()
ch_refined_bins = Channel.empty()
ch_refined_unbins = Channel.empty()
ch_busco_summary_specific = Channel.empty()
ch_busco_summary = Channel.empty()
ch_checkm_tsv = Channel.empty()
ch_gunc_maxcss_level_tsv = Channel.empty()
ch_quast_bin_summaries = Channel.empty()
ch_bin_summaries = Channel.empty()
ch_cat_tax_classification_names = Channel.empty()
ch_gtdbtk_summaries = Channel.empty()
ch_prokka_faa = Channel.empty()
Expand Down Expand Up @@ -881,7 +882,6 @@ workflow MAG {
ch_input_bins_for_qc
)
ch_busco_summary = BUSCO_QC.out.summary
ch_busco_summary_specific = BUSCO_QC.out.summary_specific
ch_versions = ch_versions.mix(BUSCO_QC.out.versions.first())
// process information if BUSCO analysis failed for individual bins due to no matching genes
BUSCO_QC.out
Expand Down Expand Up @@ -1000,6 +1000,7 @@ workflow MAG {
ch_quast_bins_summary.ifEmpty([]),
ch_gtdbtk_summary.ifEmpty([])
)
ch_bin_summaries = BIN_SUMMARY.out.summary
}

/*
Expand Down Expand Up @@ -1120,39 +1121,13 @@ workflow MAG {

emit:
short_reads = ch_short_reads
// fastqc_raw = FASTQC_RAW.out.json
// adapterremoval_se = ADAPTERREMOVAL_SE.out.singles_truncated
// adapterremoval_pe = ADAPTERREMOVAL_PE.out.paired_truncated
// host_rm = BOWTIE2_HOST_REMOVAL_ALIGN.out.reads
// phix_rm = BOWTIE2_PHIX_REMOVAL_ALIGN.out.reads
// fastqc_trimmed = FASTQC_TRIMMED.out.json
// cat_fastq = CAT_FASTQ.out.reads
// seqtk = SEQTK_MERGEPE.out.reads
// bbmap = BBMAP_BBNORM.out.fastq
// nanoplot_raw = NANOPLOT_RAW.out.txt
// porechop = PORECHOP.out.reads
// nanolyse = NANOLYSE.out.reads
// filtlong = FILTLONG.out.reads
// nanoplot_filtered = NANOPLOT_FILTERED.out.txt
// kraken2 = KRAKEN2.out.report
// centrifuge = CENTRIFUGE.out.report
assemblies = ch_assemblies
// megahit = ch_megahit_assemblies
// spades = ch_spades_assemblies
// spadeshybrid = ch_spadeshybrid_assemblies
// quast_contigs = QUAST.out.report
prodigal = ch_prodigal_gene_annotations
genomad = ch_genomad_virus_summary
bins = ch_binning_results_bins
unbins = ch_binning_results_unbins
refined_bins = ch_refined_bins
refined_unbins = ch_refined_unbins
busco = ch_busco_summary_specific
checkm = ch_checkm_tsv
gunc = ch_gunc_maxcss_level_tsv
quast_bins = ch_quast_bin_summaries
bin_summary = ch_bin_summaries
cat = ch_cat_tax_classification_names
gtdbtk = ch_gtdbtk_summaries
prokka = ch_prokka_faa
metaeuk = ch_metaeuk_easypredict_faa
versions = ch_versions
Expand Down
21 changes: 21 additions & 0 deletions workflows/mag/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
nextflow_workflow {

name "Test workflow: MAG"
script "../main.nf"
workflow "MAG"
tag "workflows"
tag "mag"
tag "mag_test"

test("Default paramters") {

then {
assertAll(
{ assert workflow.success },
{ assert snapshot(
workflow.out
).match() }
)
}
}
}
Loading

0 comments on commit 2188e59

Please sign in to comment.