From 328740c9f07114e50bc89875baa8bc68385db063 Mon Sep 17 00:00:00 2001
From: gregorysprenger <42686628+gregorysprenger@users.noreply.github.com>
Date: Thu, 31 Aug 2023 12:13:53 -0400
Subject: [PATCH 01/49] handle local kraken2 db
---
modules/local/kraken2_db_preparation.nf | 19 +++++++++++++++----
1 file changed, 15 insertions(+), 4 deletions(-)
diff --git a/modules/local/kraken2_db_preparation.nf b/modules/local/kraken2_db_preparation.nf
index ebc9f912..78287f48 100644
--- a/modules/local/kraken2_db_preparation.nf
+++ b/modules/local/kraken2_db_preparation.nf
@@ -14,10 +14,21 @@ process KRAKEN2_DB_PREPARATION {
script:
"""
- mkdir db_tmp
- tar -xf "${db}" -C db_tmp
- mkdir database
- mv `find db_tmp/ -name "*.k2d"` database/
+ if [[ -d ${db} ]]; then
+ if [[ ${db} != database ]]; then
+ ln -sr ${db} database
+ fi
+
+ # Make sure {hash,opts,taxo}.k2d are found in direcotry input
+ if [[ \$(find database/ -name "*.k2d" | wc -l) -lt 3 ]]; then
+ error "ERROR: Kraken2 requires '{hash,opts,taxo}.k2d' files."
+ fi
+ else
+ mkdir db_tmp
+ tar -xf "${db}" -C db_tmp
+ mkdir database
+ mv `find db_tmp/ -name "*.k2d"` database/
+ fi
cat <<-END_VERSIONS > versions.yml
"${task.process}":
From 7f8909b7f2ca4be68fae1a073fcbd2d6d3d2179c Mon Sep 17 00:00:00 2001
From: gregorysprenger <42686628+gregorysprenger@users.noreply.github.com>
Date: Thu, 31 Aug 2023 12:14:37 -0400
Subject: [PATCH 02/49] handle local centrifuge db
---
modules/local/centrifuge_db_preparation.nf | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/modules/local/centrifuge_db_preparation.nf b/modules/local/centrifuge_db_preparation.nf
index ecfbdb7b..b4926aff 100644
--- a/modules/local/centrifuge_db_preparation.nf
+++ b/modules/local/centrifuge_db_preparation.nf
@@ -14,7 +14,11 @@ process CENTRIFUGE_DB_PREPARATION {
script:
"""
- tar -xf "${db}"
+ if [[ -d ${db} ]]; then
+ ln -srf `find ${db}/ -type f -name "*.cf"` \${PWD}
+ else
+ tar -xf "${db}"
+ fi
cat <<-END_VERSIONS > versions.yml
"${task.process}":
From 118271c0d8eb66cf256336f6ad5ae49fc4b4da2e Mon Sep 17 00:00:00 2001
From: gregorysprenger <42686628+gregorysprenger@users.noreply.github.com>
Date: Tue, 5 Sep 2023 12:31:58 -0400
Subject: [PATCH 03/49] use busco_db and handle local and downloaded tar.gz
files
---
modules/local/busco.nf | 21 +++++++------
modules/local/busco_db_preparation.nf | 5 ++-
modules/local/busco_summary.nf | 5 +--
subworkflows/local/busco_qc.nf | 45 +++++++++++++++++++++------
subworkflows/local/gtdbtk.nf | 2 +-
workflows/mag.nf | 18 +++--------
6 files changed, 57 insertions(+), 39 deletions(-)
diff --git a/modules/local/busco.nf b/modules/local/busco.nf
index 9cf4b4ce..d009c6ba 100644
--- a/modules/local/busco.nf
+++ b/modules/local/busco.nf
@@ -8,8 +8,7 @@ process BUSCO {
input:
tuple val(meta), path(bin)
- path(db)
- path(download_folder)
+ tuple val(db_meta), path(db)
output:
tuple val(meta), path("short_summary.domain.*.${bin}.txt") , optional:true , emit: summary_domain
@@ -25,17 +24,19 @@ process BUSCO {
script:
def cp_augustus_config = workflow.profile.toString().indexOf("conda") != -1 ? "N" : "Y"
- def lineage_dataset_provided = params.busco_reference ? "Y" : "N"
+ def lineage_dataset_provided = "${db_meta.lineage}"
def busco_clean = params.busco_clean ? "Y" : "N"
- def p = "--auto-lineage"
- if (params.busco_reference){
- p = "--lineage_dataset dataset/${db}"
+ def p = ""
+ if ( "${lineage_dataset_provided}" == "Y" ) {
+ p += "--lineage_dataset dataset/${db}"
} else {
- if (params.busco_auto_lineage_prok)
- p = "--auto-lineage-prok"
- if (params.busco_download_path)
- p += " --offline --download_path ${download_folder}"
+ p = "--offline --download_path ${db}"
+ if (params.busco_auto_lineage_prok) {
+ p += " --auto-lineage-prok"
+ } else {
+ p += " --auto-lineage"
+ }
}
"""
run_busco.sh "${p}" "${cp_augustus_config}" "${db}" "${bin}" ${task.cpus} "${lineage_dataset_provided}" "${busco_clean}"
diff --git a/modules/local/busco_db_preparation.nf b/modules/local/busco_db_preparation.nf
index b78133a5..a56a5af6 100644
--- a/modules/local/busco_db_preparation.nf
+++ b/modules/local/busco_db_preparation.nf
@@ -10,9 +10,8 @@ process BUSCO_DB_PREPARATION {
path database
output:
- path "buscodb/*" , emit: db
- path database , emit: database
- path "versions.yml" , emit: versions
+ tuple val("${database.toString().replace(".tar.gz", "")}"), path("buscodb/*"), emit: db
+ path "versions.yml" , emit: versions
script:
"""
diff --git a/modules/local/busco_summary.nf b/modules/local/busco_summary.nf
index c4023276..9ed758e2 100644
--- a/modules/local/busco_summary.nf
+++ b/modules/local/busco_summary.nf
@@ -15,11 +15,12 @@ process BUSCO_SUMMARY {
path "versions.yml" , emit: versions
script:
- def auto = params.busco_reference ? "" : "-a"
+ def reference = "${params.busco_db.toString().contains('odb10')}"
+ def auto = reference ? "" : "-a"
def ss = summaries_specific.sort().size() > 0 ? "-ss ${summaries_specific}" : ""
def sd = summaries_domain.sort().size() > 0 ? "-sd ${summaries_domain}" : ""
def f = ""
- if (!params.busco_reference && failed_bins.sort().size() > 0)
+ if ("${reference}" == false && failed_bins.sort().size() > 0)
f = "-f ${failed_bins}"
"""
summary_busco.py $auto $ss $sd $f -o busco_summary.tsv
diff --git a/subworkflows/local/busco_qc.nf b/subworkflows/local/busco_qc.nf
index 6165be47..eb93703a 100644
--- a/subworkflows/local/busco_qc.nf
+++ b/subworkflows/local/busco_qc.nf
@@ -9,23 +9,48 @@ include { BUSCO_SUMMARY } from '../../modules/local/busco_summ
workflow BUSCO_QC {
take:
- busco_db_file // channel: path
- busco_download_folder // channel: path
- bins // channel: [ val(meta), path(bin) ]
+ busco_db // channel: path
+ bins // channel: [ val(meta), path(bin) ]
main:
- if (params.busco_reference){
- BUSCO_DB_PREPARATION ( busco_db_file )
- ch_busco_db = BUSCO_DB_PREPARATION.out.db
+ if ( busco_db.extension == 'gz' ) {
+ // Expects to be tar.gz!
+ BUSCO_DB_PREPARATION ( busco_db )
+
+ ch_db_for_busco = BUSCO_DB_PREPARATION.out.db
+ .map{
+ meta, db ->
+ def meta_new = [:]
+ meta_new['id'] = meta
+ meta_new['lineage'] = 'Y'
+ [ meta_new, db ]
+ }
+ } else if ( busco_db.isDirectory() ) {
+ // Set meta to match expected channel cardinality for BUSCO
+ ch_db_for_busco = Channel
+ .of(busco_db)
+ .map{
+ db ->
+ def meta = [:]
+ meta['id'] = db.toString().split('/').last()
+ if ("${meta['id'].toString().contains('odb10')}" == true) {
+ meta['lineage'] = 'Y'
+ } else {
+ meta['lineage'] = 'N'
+ }
+ [ meta, db ]
+ }
+ .collect()
} else {
- ch_busco_db = Channel.empty()
+ error("Unsupported object given to --busco_db, database must be supplied as either a directory or a .tar.gz file!")
}
+
BUSCO (
bins,
- ch_busco_db.collect().ifEmpty([]),
- busco_download_folder.collect().ifEmpty([])
+ ch_db_for_busco
)
- if (params.save_busco_reference){
+
+ if (params.save_busco_db){
// publish files downloaded by Busco
ch_downloads = BUSCO.out.busco_downloads.groupTuple().map{lin,downloads -> downloads[0]}.toSortedList().flatten()
BUSCO_SAVE_DOWNLOAD ( ch_downloads )
diff --git a/subworkflows/local/gtdbtk.nf b/subworkflows/local/gtdbtk.nf
index 21823962..f8020aa3 100644
--- a/subworkflows/local/gtdbtk.nf
+++ b/subworkflows/local/gtdbtk.nf
@@ -24,7 +24,7 @@ workflow GTDBTK {
def completeness = -1
def contamination = -1
def missing, duplicated
- if (params.busco_reference) {
+ if (params.busco_db.toString().contains('odb10')) {
missing = row.'%Missing (specific)' // TODO or just take '%Complete'?
duplicated = row.'%Complete and duplicated (specific)'
} else {
diff --git a/workflows/mag.nf b/workflows/mag.nf
index 11be8584..09e4119e 100644
--- a/workflows/mag.nf
+++ b/workflows/mag.nf
@@ -31,7 +31,7 @@ log.info logo + paramsSummaryLog(workflow) + citation
WorkflowMag.initialise(params, log, hybrid)
// Check input path parameters to see if they exist
-def checkPathParamList = [ params.input, params.multiqc_config, params.phix_reference, params.host_fasta, params.centrifuge_db, params.kraken2_db, params.cat_db, params.krona_db, params.gtdb_db, params.lambda_reference, params.busco_reference ]
+def checkPathParamList = [ params.input, params.multiqc_config, params.phix_reference, params.host_fasta, params.centrifuge_db, params.kraken2_db, params.cat_db, params.krona_db, params.gtdb_db, params.lambda_reference, params.busco_db ]
for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } }
/*
@@ -145,17 +145,10 @@ if ( params.host_genome ) {
ch_host_fasta = Channel.empty()
}
-if(params.busco_reference){
- ch_busco_db_file = Channel
- .value(file( "${params.busco_reference}" ))
+if(params.busco_db){
+ ch_busco_db = file( "${params.busco_db}", checkIfExists: true )
} else {
- ch_busco_db_file = Channel.empty()
-}
-if (params.busco_download_path) {
- ch_busco_download_folder = Channel
- .value(file( "${params.busco_download_path}" ))
-} else {
- ch_busco_download_folder = Channel.empty()
+ ch_busco_db = []
}
if(params.checkm_db) {
@@ -805,8 +798,7 @@ workflow MAG {
*/
BUSCO_QC (
- ch_busco_db_file,
- ch_busco_download_folder,
+ ch_busco_db,
ch_input_bins_for_qc
)
ch_busco_summary = BUSCO_QC.out.summary
From d4b6258399503c8b996e3de1c7bf453ce04e6b05 Mon Sep 17 00:00:00 2001
From: gregorysprenger <42686628+gregorysprenger@users.noreply.github.com>
Date: Tue, 5 Sep 2023 12:37:58 -0400
Subject: [PATCH 04/49] update information about kraken, centrifuge, and busco
databases
---
conf/test.config | 2 +-
conf/test_adapterremoval.config | 2 +-
conf/test_ancient_dna.config | 2 +-
conf/test_bbnorm.config | 2 +-
conf/test_binrefinement.config | 2 +-
conf/test_full.config | 2 +-
conf/test_host_rm.config | 2 +-
conf/test_hybrid.config | 4 ++--
docs/output.md | 8 ++++----
docs/usage.md | 2 +-
lib/WorkflowMag.groovy | 13 ++-----------
nextflow.config | 5 ++---
nextflow_schema.json | 19 +++++++------------
13 files changed, 25 insertions(+), 40 deletions(-)
diff --git a/conf/test.config b/conf/test.config
index 348b95d5..9c93278f 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -26,7 +26,7 @@ params {
skip_krona = true
min_length_unbinned_contigs = 1
max_unbinned_contigs = 2
- busco_reference = "https://busco-data.ezlab.org/v5/data/lineages/bacteria_odb10.2020-03-06.tar.gz"
+ busco_db = "https://busco-data.ezlab.org/v5/data/lineages/bacteria_odb10.2020-03-06.tar.gz"
busco_clean = true
skip_gtdbtk = true
skip_concoct = true
diff --git a/conf/test_adapterremoval.config b/conf/test_adapterremoval.config
index 92d51aec..d8bd581a 100644
--- a/conf/test_adapterremoval.config
+++ b/conf/test_adapterremoval.config
@@ -27,7 +27,7 @@ params {
skip_krona = true
min_length_unbinned_contigs = 1
max_unbinned_contigs = 2
- busco_reference = "https://busco-data.ezlab.org/v5/data/lineages/bacteria_odb10.2020-03-06.tar.gz"
+ busco_db = "https://busco-data.ezlab.org/v5/data/lineages/bacteria_odb10.2020-03-06.tar.gz"
skip_gtdbtk = true
clip_tool = 'adapterremoval'
skip_concoct = true
diff --git a/conf/test_ancient_dna.config b/conf/test_ancient_dna.config
index 325362fc..ea672651 100644
--- a/conf/test_ancient_dna.config
+++ b/conf/test_ancient_dna.config
@@ -26,7 +26,7 @@ params {
skip_krona = true
min_length_unbinned_contigs = 1
max_unbinned_contigs = 2
- busco_reference = "https://busco-data.ezlab.org/v5/data/lineages/bacteria_odb10.2020-03-06.tar.gz"
+ busco_db = "https://busco-data.ezlab.org/v5/data/lineages/bacteria_odb10.2020-03-06.tar.gz"
skip_gtdbtk = true
ancient_dna = true
binning_map_mode = 'own'
diff --git a/conf/test_bbnorm.config b/conf/test_bbnorm.config
index 5f481adf..55d48a8b 100644
--- a/conf/test_bbnorm.config
+++ b/conf/test_bbnorm.config
@@ -32,7 +32,7 @@ params {
skip_krona = true
min_length_unbinned_contigs = 1
max_unbinned_contigs = 2
- busco_reference = "https://busco-data.ezlab.org/v5/data/lineages/bacteria_odb10.2020-03-06.tar.gz"
+ busco_db = "https://busco-data.ezlab.org/v5/data/lineages/bacteria_odb10.2020-03-06.tar.gz"
busco_clean = true
skip_gtdbtk = true
bbnorm = true
diff --git a/conf/test_binrefinement.config b/conf/test_binrefinement.config
index 85dda8db..bc1796d5 100644
--- a/conf/test_binrefinement.config
+++ b/conf/test_binrefinement.config
@@ -27,7 +27,7 @@ params {
skip_krona = true
min_length_unbinned_contigs = 1
max_unbinned_contigs = 2
- busco_reference = "https://busco-data.ezlab.org/v5/data/lineages/bacteria_odb10.2020-03-06.tar.gz"
+ busco_db = "https://busco-data.ezlab.org/v5/data/lineages/bacteria_odb10.2020-03-06.tar.gz"
skip_gtdbtk = true
refine_bins_dastool = true
refine_bins_dastool_threshold = 0
diff --git a/conf/test_full.config b/conf/test_full.config
index 4917332e..ea694247 100644
--- a/conf/test_full.config
+++ b/conf/test_full.config
@@ -28,7 +28,7 @@ params {
spades_fix_cpus = 10
spadeshybrid_fix_cpus = 10
megahit_fix_cpu_1 = true
- // available options to enable reproducibility for BUSCO (--busco_download_path or --busco_reference) not used here
+ // available options to enable reproducibility for BUSCO (--busco_db) not used here
// to allow detection of possible problems in automated lineage selection mode using public databases
// test CAT with official taxonomic ranks only
diff --git a/conf/test_host_rm.config b/conf/test_host_rm.config
index b3487c6b..7af3bcd4 100644
--- a/conf/test_host_rm.config
+++ b/conf/test_host_rm.config
@@ -24,7 +24,7 @@ params {
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/mag/samplesheets/samplesheet.host_rm.csv'
min_length_unbinned_contigs = 1
max_unbinned_contigs = 2
- busco_reference = "https://busco-data.ezlab.org/v5/data/lineages/bacteria_odb10.2020-03-06.tar.gz"
+ busco_db = "https://busco-data.ezlab.org/v5/data/lineages/bacteria_odb10.2020-03-06.tar.gz"
skip_gtdbtk = true
skip_concoct = true
}
diff --git a/conf/test_hybrid.config b/conf/test_hybrid.config
index bc22d3d2..0600c88c 100644
--- a/conf/test_hybrid.config
+++ b/conf/test_hybrid.config
@@ -23,7 +23,7 @@ params {
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/mag/samplesheets/samplesheet.hybrid.csv'
min_length_unbinned_contigs = 1
max_unbinned_contigs = 2
- busco_reference = "https://busco-data.ezlab.org/v5/data/lineages/bacteria_odb10.2020-03-06.tar.gz"
- skip_gtdbtk = true
+ busco_db = "https://busco-data.ezlab.org/v5/data/lineages/bacteria_odb10.2020-03-06.tar.gz"
+ skip_gtdbtk = true
skip_concoct = true
}
diff --git a/docs/output.md b/docs/output.md
index 902d0f12..8c56a713 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -484,7 +484,7 @@ For each bin or refined bin the median sequencing depth is computed based on the
#### BUSCO
-[BUSCO](https://busco.ezlab.org/) is a tool used to assess the completeness of a genome assembly. It is run on all the genome bins and high quality contigs obtained by the applied binning and/or binning refinement methods (depending on the `--postbinning_input` parameter). By default, BUSCO is run in automated lineage selection mode in which it first tries to select the domain and then a more specific lineage based on phylogenetic placement. If available, result files for both the selected domain lineage and the selected more specific lineage are placed in the output directory. If a lineage dataset is specified already with `--busco_reference`, only results for this specific lineage will be generated.
+[BUSCO](https://busco.ezlab.org/) is a tool used to assess the completeness of a genome assembly. It is run on all the genome bins and high quality contigs obtained by the applied binning and/or binning refinement methods (depending on the `--postbinning_input` parameter). By default, BUSCO is run in automated lineage selection mode in which it first tries to select the domain and then a more specific lineage based on phylogenetic placement. If available, result files for both the selected domain lineage and the selected more specific lineage are placed in the output directory. If a lineage dataset is specified already with `--busco_db`, only results for this specific lineage will be generated.
Output files
@@ -493,21 +493,21 @@ For each bin or refined bin the median sequencing depth is computed based on the
- `[assembler]-[bin]_busco.log`: Log file containing the standard output of BUSCO.
- `[assembler]-[bin]_busco.err`: File containing potential error messages returned from BUSCO.
- `short_summary.domain.[lineage].[assembler]-[bin].txt`: BUSCO summary of the results for the selected domain when run in automated lineage selection mode. Not available for bins for which a viral lineage was selected.
- - `short_summary.specific_lineage.[lineage].[assembler]-[bin].txt`: BUSCO summary of the results in case a more specific lineage than the domain could be selected or for the lineage provided via `--busco_reference`.
+ - `short_summary.specific_lineage.[lineage].[assembler]-[bin].txt`: BUSCO summary of the results in case a more specific lineage than the domain could be selected or for the lineage provided via `--busco_db`.
- `[assembler]-[bin]_buscos.[lineage].fna.gz`: Nucleotide sequence of all identified BUSCOs for used lineages (domain or specific).
- `[assembler]-[bin]_buscos.[lineage].faa.gz`: Aminoacid sequence of all identified BUSCOs for used lineages (domain or specific).
- `[assembler]-[bin]_prodigal.gff`: Genes predicted with Prodigal.
-If the parameter `--save_busco_reference` is set, additionally the used BUSCO lineage datasets are stored in the output directory.
+If the parameter `--save_busco_db` is set, additionally the used BUSCO lineage datasets are stored in the output directory.
Output files
- `GenomeBinning/QC/BUSCO/`
- `busco_downloads/`: All files and lineage datasets downloaded by BUSCO when run in automated lineage selection mode. (Can currently not be used to reproduce analysis, see the [nf-core/mag website documentation](https://nf-co.re/mag/usage#reproducibility) how to achieve reproducible BUSCO results).
- - `reference/*.tar.gz`: BUSCO reference lineage dataset that was provided via `--busco_reference`.
+ - `reference/*.tar.gz`: BUSCO reference lineage dataset that was provided via `--busco_db`.
diff --git a/docs/usage.md b/docs/usage.md
index f61f5621..45b8fa6c 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -187,7 +187,7 @@ You can fix this by using the prameter `--megahit_fix_cpu_1`. In both cases, do
MetaBAT2 is run by default with a fixed seed within this pipeline, thus producing reproducible results.
-To allow also reproducible bin QC with BUSCO, run BUSCO providing already downloaded lineage datasets with `--busco_download_path` (BUSCO will be run using automated lineage selection in offline mode) or provide a specific lineage dataset via `--busco_reference` and use the parameter `--save_busco_reference`. This may be useful since BUSCO datasets are frequently updated and old versions do not always remain (easily) accessible.
+To allow also reproducible bin QC with BUSCO, run BUSCO providing already downloaded lineage datasets (BUSCO will be run using automated lineage selection in offline mode) or provide a specific lineage dataset via `--busco_db` and use the parameter `--save_busco_db`. This may be useful since BUSCO datasets are frequently updated and old versions do not always remain (easily) accessible.
For the taxonomic bin classification with [CAT](https://github.com/dutilh/CAT), when running the pipeline with `--cat_db_generate` the parameter `--save_cat_db` can be used to also save the generated database to allow reproducibility in future runs. Note that when specifying a pre-built database with `--cat_db`, currently the database can not be saved.
diff --git a/lib/WorkflowMag.groovy b/lib/WorkflowMag.groovy
index 51822e4e..c290ecbf 100755
--- a/lib/WorkflowMag.groovy
+++ b/lib/WorkflowMag.groovy
@@ -102,22 +102,13 @@ class WorkflowMag {
Nextflow.error('Both --skip_binqc and --binqc_tool \'checkm\' are specified! Invalid combination, please specify either --skip_binqc or --binqc_tool.')
}
if (params.skip_binqc) {
- if (params.busco_reference) {
- Nextflow.error('Both --skip_binqc and --busco_reference are specified! Invalid combination, please specify either --skip_binqc or --binqc_tool \'busco\' with --busco_reference.')
- }
- if (params.busco_download_path) {
- Nextflow.error('Both --skip_binqc and --busco_download_path are specified! Invalid combination, please specify either --skip_binqc or --binqc_tool \'busco\' with --busco_download_path.')
+ if (params.busco_db) {
+ Nextflow.error('Both --skip_binqc and --busco_db are specified! Invalid combination, please specify either --skip_binqc or --binqc_tool \'busco\' with --busco_db.')
}
if (params.busco_auto_lineage_prok) {
Nextflow.error('Both --skip_binqc and --busco_auto_lineage_prok are specified! Invalid combination, please specify either --skip_binqc or --binqc_tool \'busco\' with --busco_auto_lineage_prok.')
}
}
- if (params.busco_reference && params.busco_download_path) {
- Nextflow.error('Both --busco_reference and --busco_download_path are specified! Invalid combination, please specify either --busco_reference or --busco_download_path.')
- }
- if (params.busco_auto_lineage_prok && params.busco_reference) {
- Nextflow.error('Both --busco_auto_lineage_prok and --busco_reference are specified! Invalid combination, please specify either --busco_auto_lineage_prok or --busco_reference.')
- }
if (params.skip_binqc && !params.skip_gtdbtk) {
log.warn '--skip_binqc is specified, but --skip_gtdbtk is explictly set to run! GTDB-tk will be omitted because GTDB-tk bin classification requires bin filtering based on BUSCO or CheckM QC results to avoid GTDB-tk errors.'
diff --git a/nextflow.config b/nextflow.config
index 0ac8d964..329f70ad 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -121,10 +121,9 @@ params {
// Bin QC
skip_binqc = false
binqc_tool = 'busco'
- busco_reference = null
- busco_download_path = null
+ busco_db = null
busco_auto_lineage_prok = false
- save_busco_reference = false
+ save_busco_db = false
busco_clean = false
checkm_download_url = "https://data.ace.uq.edu.au/public/CheckM_databases/checkm_data_2015_01_16.tar.gz"
checkm_db = null
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 5dbd2a26..85fce7c1 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -481,12 +481,12 @@
"centrifuge_db": {
"type": "string",
"description": "Database for taxonomic binning with centrifuge.",
- "help_text": "E.g. ftp://ftp.ccb.jhu.edu/pub/infphilo/centrifuge/data/p_compressed+h+v.tar.gz."
+ "help_text": "Local directory containing `*.cf` files or path to download compressed tar archive. E.g. ftp://ftp.ccb.jhu.edu/pub/infphilo/centrifuge/data/p_compressed+h+v.tar.gz."
},
"kraken2_db": {
"type": "string",
"description": "Database for taxonomic binning with kraken2.",
- "help_text": "The database file must be a compressed tar archive that contains at least the three files `hash.k2d`, `opts.k2d` and `taxo.k2d`. E.g. ftp://ftp.ccb.jhu.edu/pub/data/kraken2_dbs/minikraken_8GB_202003.tgz."
+ "help_text": "Local directory or compressed tar archive that contains at least the three files `hash.k2d`, `opts.k2d` and `taxo.k2d`. E.g. ftp://ftp.ccb.jhu.edu/pub/data/kraken2_dbs/minikraken_8GB_202003.tgz."
},
"krona_db": {
"type": "string",
@@ -757,23 +757,18 @@
"description": "Specify which tool for bin quality-control validation to use.",
"enum": ["busco", "checkm"]
},
- "busco_reference": {
+ "busco_db": {
"type": "string",
- "description": "Download path for BUSCO lineage dataset, instead of using automated lineage selection.",
- "help_text": "E.g. https://busco-data.ezlab.org/v5/data/lineages/bacteria_odb10.2020-03-06.tar.gz. Available databases are listed here: https://busco-data.ezlab.org/v5/data/lineages/."
- },
- "busco_download_path": {
- "type": "string",
- "description": "Path to local folder containing already downloaded and unpacked lineage datasets.",
- "help_text": "If provided, BUSCO analysis will be run in offline mode. Data can be downloaded from https://busco-data.ezlab.org/v5/data/ (files still need to be unpacked manually). Run in combination with automated lineage selection."
+ "description": "Download path for BUSCO lineage dataset or path to local directory containing already downloaded and unpacked lineage datasets.",
+ "help_text": "E.g. https://busco-data.ezlab.org/v5/data/lineages/bacteria_odb10.2020-03-06.tar.gz or '/path/to/buscodb' (files still need to be unpacked manually). Available databases are listed here: https://busco-data.ezlab.org/v5/data/lineages/."
},
"busco_auto_lineage_prok": {
"type": "boolean",
"description": "Run BUSCO with automated lineage selection, but ignoring eukaryotes (saves runtime)."
},
- "save_busco_reference": {
+ "save_busco_db": {
"type": "boolean",
- "description": "Save the used BUSCO lineage datasets provided via --busco_reference or downloaded when not using --busco_reference or --busco_download_path.",
+ "description": "Save the used BUSCO lineage datasets provided via `--busco_db`.",
"help_text": "Useful to allow reproducibility, as BUSCO datasets are frequently updated and old versions do not always remain accessible."
},
"busco_clean": {
From 4304cf6947529b1542f15a2c7497ed9b17632747 Mon Sep 17 00:00:00 2001
From: gregorysprenger <42686628+gregorysprenger@users.noreply.github.com>
Date: Fri, 15 Sep 2023 12:52:06 -0400
Subject: [PATCH 05/49] update parameter handling
---
modules/local/busco.nf | 13 +++++--------
modules/local/busco_summary.nf | 2 +-
2 files changed, 6 insertions(+), 9 deletions(-)
diff --git a/modules/local/busco.nf b/modules/local/busco.nf
index d009c6ba..ab1c5917 100644
--- a/modules/local/busco.nf
+++ b/modules/local/busco.nf
@@ -27,16 +27,13 @@ process BUSCO {
def lineage_dataset_provided = "${db_meta.lineage}"
def busco_clean = params.busco_clean ? "Y" : "N"
- def p = ""
+ def p = params.busco_auto_lineage_prok ? "--auto-lineage-prok" : "--auto-lineage"
if ( "${lineage_dataset_provided}" == "Y" ) {
- p += "--lineage_dataset dataset/${db}"
+ p = "--lineage_dataset dataset/${db}"
+ } else if ( "${lineage_dataset_provided}" == "N" ) {
+ p += "--offline --download_path ${db}"
} else {
- p = "--offline --download_path ${db}"
- if (params.busco_auto_lineage_prok) {
- p += " --auto-lineage-prok"
- } else {
- p += " --auto-lineage"
- }
+ lineage_dataset_provided = ""
}
"""
run_busco.sh "${p}" "${cp_augustus_config}" "${db}" "${bin}" ${task.cpus} "${lineage_dataset_provided}" "${busco_clean}"
diff --git a/modules/local/busco_summary.nf b/modules/local/busco_summary.nf
index 9ed758e2..e5015387 100644
--- a/modules/local/busco_summary.nf
+++ b/modules/local/busco_summary.nf
@@ -15,7 +15,7 @@ process BUSCO_SUMMARY {
path "versions.yml" , emit: versions
script:
- def reference = "${params.busco_db.toString().contains('odb10')}"
+ def reference = params.busco_db.toString().contains('odb10')
def auto = reference ? "" : "-a"
def ss = summaries_specific.sort().size() > 0 ? "-ss ${summaries_specific}" : ""
def sd = summaries_domain.sort().size() > 0 ? "-sd ${summaries_domain}" : ""
From 190129e48ea836d72adf2e06abc9a6324ce11290 Mon Sep 17 00:00:00 2001
From: gregorysprenger <42686628+gregorysprenger@users.noreply.github.com>
Date: Fri, 15 Sep 2023 12:53:43 -0400
Subject: [PATCH 06/49] set busco_db input as a path channel
---
workflows/mag.nf | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/workflows/mag.nf b/workflows/mag.nf
index 09e4119e..061a3734 100644
--- a/workflows/mag.nf
+++ b/workflows/mag.nf
@@ -145,8 +145,8 @@ if ( params.host_genome ) {
ch_host_fasta = Channel.empty()
}
-if(params.busco_db){
- ch_busco_db = file( "${params.busco_db}", checkIfExists: true )
+if (params.busco_db) {
+ ch_busco_db = file(params.busco_db, checkIfExists: true)
} else {
ch_busco_db = []
}
From 1961247b6fb73deda49038e2dfdfac134025e060 Mon Sep 17 00:00:00 2001
From: gregorysprenger <42686628+gregorysprenger@users.noreply.github.com>
Date: Fri, 15 Sep 2023 12:58:17 -0400
Subject: [PATCH 07/49] handle busco_db inputs and allow for busco to auto
download lineages
---
subworkflows/local/busco_qc.nf | 63 +++++++++++++++++++---------------
1 file changed, 36 insertions(+), 27 deletions(-)
diff --git a/subworkflows/local/busco_qc.nf b/subworkflows/local/busco_qc.nf
index eb93703a..4c00436b 100644
--- a/subworkflows/local/busco_qc.nf
+++ b/subworkflows/local/busco_qc.nf
@@ -13,36 +13,45 @@ workflow BUSCO_QC {
bins // channel: [ val(meta), path(bin) ]
main:
- if ( busco_db.extension == 'gz' ) {
- // Expects to be tar.gz!
- BUSCO_DB_PREPARATION ( busco_db )
+ if ( !busco_db.isEmpty() ) {
+ if ( busco_db.extension == "gz" ) {
+ // Expects to be tar.gz!
+ BUSCO_DB_PREPARATION ( busco_db )
- ch_db_for_busco = BUSCO_DB_PREPARATION.out.db
- .map{
- meta, db ->
- def meta_new = [:]
- meta_new['id'] = meta
- meta_new['lineage'] = 'Y'
- [ meta_new, db ]
- }
- } else if ( busco_db.isDirectory() ) {
- // Set meta to match expected channel cardinality for BUSCO
- ch_db_for_busco = Channel
- .of(busco_db)
- .map{
- db ->
- def meta = [:]
- meta['id'] = db.toString().split('/').last()
- if ("${meta['id'].toString().contains('odb10')}" == true) {
- meta['lineage'] = 'Y'
- } else {
- meta['lineage'] = 'N'
+ ch_db_for_busco = BUSCO_DB_PREPARATION.out.db
+ .map{
+ meta, db ->
+ def meta_new = [:]
+ meta_new['id'] = meta
+ meta_new['lineage'] = 'Y'
+ [ meta_new, db ]
+ }
+ } else if ( busco_db.isDirectory() ) {
+ // Set meta to match expected channel cardinality for BUSCO
+ ch_db_for_busco = busco_db
+ .map{
+ db ->
+ def meta = [:]
+ meta['id'] = db.toString().split('/').last()
+ if ("${meta['id'].toString().contains('odb10')}" == true) {
+ meta['lineage'] = 'Y'
+ } else {
+ meta['lineage'] = 'N'
+ }
+ [ meta, db ]
}
- [ meta, db ]
- }
- .collect()
+ .collect()
+ }
} else {
- error("Unsupported object given to --busco_db, database must be supplied as either a directory or a .tar.gz file!")
+ // Set BUSCO database to empty to allow for --auto-lineage
+ ch_db_for_busco = Channel.of([])
+ .map{
+ empty_db ->
+ def meta = [:]
+ meta['lineage'] = ""
+ [ meta, [] ]
+ }
+ .collect()
}
BUSCO (
From b6d3e784f3aea973cecf37c6ea7173c5d2021455 Mon Sep 17 00:00:00 2001
From: nf-core-bot
Date: Mon, 25 Sep 2023 15:16:30 +0000
Subject: [PATCH 08/49] Template update for nf-core/tools version 2.10
---
.devcontainer/devcontainer.json | 1 +
.github/CONTRIBUTING.md | 4 +-
.github/workflows/linting.yml | 2 +-
.github/workflows/release-announcments.yml | 68 +++++++++
CHANGELOG.md | 2 +-
CITATIONS.md | 2 +-
CODE_OF_CONDUCT.md | 133 ++++++++++++++----
README.md | 21 +--
assets/multiqc_config.yml | 6 +-
conf/modules.config | 9 ++
docs/output.md | 5 +-
docs/usage.md | 16 ++-
lib/NfcoreTemplate.groovy | 16 +++
lib/WorkflowMag.groovy | 2 +-
main.nf | 3 +
modules.json | 6 +-
.../custom/dumpsoftwareversions/main.nf | 2 +-
modules/nf-core/fastqc/main.nf | 8 +-
modules/nf-core/multiqc/main.nf | 2 +-
nextflow.config | 9 +-
nextflow_schema.json | 15 --
workflows/mag.nf | 1 +
22 files changed, 255 insertions(+), 78 deletions(-)
create mode 100644 .github/workflows/release-announcments.yml
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index ea27a584..4ecfbfe3 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -2,6 +2,7 @@
"name": "nfcore",
"image": "nfcore/gitpod:latest",
"remoteUser": "gitpod",
+ "runArgs": ["--privileged"],
// Configure tool-specific properties.
"customizations": {
diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
index 2ac5f2d9..eef260f4 100644
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@@ -9,7 +9,9 @@ Please use the pre-filled template to save time.
However, don't be put off by this template - other more general issues and suggestions are welcome!
Contributions to the code are even more welcome ;)
-> If you need help using or modifying nf-core/mag then the best place to ask is on the nf-core Slack [#mag](https://nfcore.slack.com/channels/mag) channel ([join our Slack here](https://nf-co.re/join/slack)).
+:::info
+If you need help using or modifying nf-core/mag then the best place to ask is on the nf-core Slack [#mag](https://nfcore.slack.com/channels/mag) channel ([join our Slack here](https://nf-co.re/join/slack)).
+:::
## Contribution workflow
diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
index 888cb4bc..b8bdd214 100644
--- a/.github/workflows/linting.yml
+++ b/.github/workflows/linting.yml
@@ -78,7 +78,7 @@ jobs:
- uses: actions/setup-python@v4
with:
- python-version: "3.8"
+ python-version: "3.11"
architecture: "x64"
- name: Install dependencies
diff --git a/.github/workflows/release-announcments.yml b/.github/workflows/release-announcments.yml
new file mode 100644
index 00000000..6ad33927
--- /dev/null
+++ b/.github/workflows/release-announcments.yml
@@ -0,0 +1,68 @@
+name: release-announcements
+# Automatic release toot and tweet anouncements
+on:
+ release:
+ types: [published]
+ workflow_dispatch:
+
+jobs:
+ toot:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: rzr/fediverse-action@master
+ with:
+ access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }}
+ host: "mstdn.science" # custom host if not "mastodon.social" (default)
+ # GitHub event payload
+ # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#release
+ message: |
+ Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}!
+
+ Please see the changelog: ${{ github.event.release.html_url }}
+
+ send-tweet:
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/setup-python@v4
+ with:
+ python-version: "3.10"
+ - name: Install dependencies
+ run: pip install tweepy==4.14.0
+ - name: Send tweet
+ shell: python
+ run: |
+ import os
+ import tweepy
+
+ client = tweepy.Client(
+ access_token=os.getenv("TWITTER_ACCESS_TOKEN"),
+ access_token_secret=os.getenv("TWITTER_ACCESS_TOKEN_SECRET"),
+ consumer_key=os.getenv("TWITTER_CONSUMER_KEY"),
+ consumer_secret=os.getenv("TWITTER_CONSUMER_SECRET"),
+ )
+ tweet = os.getenv("TWEET")
+ client.create_tweet(text=tweet)
+ env:
+ TWEET: |
+ Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}!
+
+ Please see the changelog: ${{ github.event.release.html_url }}
+ TWITTER_CONSUMER_KEY: ${{ secrets.TWITTER_CONSUMER_KEY }}
+ TWITTER_CONSUMER_SECRET: ${{ secrets.TWITTER_CONSUMER_SECRET }}
+ TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }}
+ TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }}
+
+ bsky-post:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: zentered/bluesky-post-action@v0.0.2
+ with:
+ post: |
+ Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}!
+
+ Please see the changelog: ${{ github.event.release.html_url }}
+ env:
+ BSKY_IDENTIFIER: ${{ secrets.BSKY_IDENTIFIER }}
+ BSKY_PASSWORD: ${{ secrets.BSKY_PASSWORD }}
+ #
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3aa0b93d..70b6673d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,7 +3,7 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
-## v2.3.1dev - [date]
+## v2.4.0 - [date]
Initial release of nf-core/mag, created with the [nf-core](https://nf-co.re/) template.
diff --git a/CITATIONS.md b/CITATIONS.md
index 49d0bcbc..0e8ba8c7 100644
--- a/CITATIONS.md
+++ b/CITATIONS.md
@@ -12,7 +12,7 @@
- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)
- > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. Available online https://www.bioinformatics.babraham.ac.uk/projects/fastqc/.
+ > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online].
- [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/)
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
index f4fd052f..c089ec78 100644
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@@ -1,18 +1,20 @@
-# Code of Conduct at nf-core (v1.0)
+# Code of Conduct at nf-core (v1.4)
## Our Pledge
-In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core, pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of:
+In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of:
- Age
+- Ability
- Body size
+- Caste
- Familial status
- Gender identity and expression
- Geographical location
- Level of experience
- Nationality and national origins
- Native language
-- Physical and neurological ability
+- Neurodiversity
- Race or ethnicity
- Religion
- Sexual identity and orientation
@@ -22,80 +24,133 @@ Please note that the list above is alphabetised and is therefore not ranked in a
## Preamble
-> Note: This Code of Conduct (CoC) has been drafted by the nf-core Safety Officer and been edited after input from members of the nf-core team and others. "We", in this document, refers to the Safety Officer and members of the nf-core core team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will amended periodically to keep it up-to-date, and in case of any dispute, the most current version will apply.
+:::note
+This Code of Conduct (CoC) has been drafted by Renuka Kudva, Cris Tuñí, and Michael Heuer, with input from the nf-core Core Team and Susanna Marquez from the nf-core community. "We", in this document, refers to the Safety Officers and members of the nf-core Core Team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will be amended periodically to keep it up-to-date. In case of any dispute, the most current version will apply.
+:::
-An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). Our current safety officer is Renuka Kudva.
+An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about).
+
+Our Safety Officers are Saba Nafees, Cris Tuñí, and Michael Heuer.
nf-core is a young and growing community that welcomes contributions from anyone with a shared vision for [Open Science Policies](https://www.fosteropenscience.eu/taxonomy/term/8). Open science policies encompass inclusive behaviours and we strive to build and maintain a safe and inclusive environment for all individuals.
-We have therefore adopted this code of conduct (CoC), which we require all members of our community and attendees in nf-core events to adhere to in all our workspaces at all times. Workspaces include but are not limited to Slack, meetings on Zoom, Jitsi, YouTube live etc.
+We have therefore adopted this CoC, which we require all members of our community and attendees of nf-core events to adhere to in all our workspaces at all times. Workspaces include, but are not limited to, Slack, meetings on Zoom, gather.town, YouTube live etc.
-Our CoC will be strictly enforced and the nf-core team reserve the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities.
+Our CoC will be strictly enforced and the nf-core team reserves the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities.
-We ask all members of our community to help maintain a supportive and productive workspace and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC.
+We ask all members of our community to help maintain supportive and productive workspaces and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC.
-Questions, concerns or ideas on what we can include? Contact safety [at] nf-co [dot] re
+Questions, concerns, or ideas on what we can include? Contact members of the Safety Team on Slack or email safety [at] nf-co [dot] re.
## Our Responsibilities
-The safety officer is responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour.
+Members of the Safety Team (the Safety Officers) are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour.
-The safety officer in consultation with the nf-core core team have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
+The Safety Team, in consultation with the nf-core core team, have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this CoC, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
-Members of the core team or the safety officer who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and be subject to the same actions as others in violation of the CoC.
+Members of the core team or the Safety Team who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and will be subject to the same actions as others in violation of the CoC.
-## When are where does this Code of Conduct apply?
+## When and where does this Code of Conduct apply?
-Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events. This includes but is not limited to the following listed alphabetically and therefore in no order of preference:
+Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events, such as hackathons, workshops, bytesize, and collaborative workspaces on gather.town. These guidelines include, but are not limited to, the following (listed alphabetically and therefore in no order of preference):
- Communicating with an official project email address.
- Communicating with community members within the nf-core Slack channel.
- Participating in hackathons organised by nf-core (both online and in-person events).
-- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence.
-- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, Jitsi, YouTube live etc.
+- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence, and on the nf-core gather.town workspace.
+- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, gather.town, Jitsi, YouTube live etc.
- Representing nf-core on social media. This includes both official and personal accounts.
## nf-core cares 😊
-nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include but are not limited to the following (listed in alphabetical order):
+nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include, but are not limited to, the following (listed in alphabetical order):
- Ask for consent before sharing another community member’s personal information (including photographs) on social media.
- Be respectful of differing viewpoints and experiences. We are all here to learn from one another and a difference in opinion can present a good learning opportunity.
-- Celebrate your accomplishments at events! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !)
+- Celebrate your accomplishments! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !)
- Demonstrate empathy towards other community members. (We don’t all have the same amount of time to dedicate to nf-core. If tasks are pending, don’t hesitate to gently remind members of your team. If you are leading a task, ask for help if you feel overwhelmed.)
- Engage with and enquire after others. (This is especially important given the geographically remote nature of the nf-core community, so let’s do this the best we can)
- Focus on what is best for the team and the community. (When in doubt, ask)
-- Graciously accept constructive criticism, yet be unafraid to question, deliberate, and learn.
+- Accept feedback, yet be unafraid to question, deliberate, and learn.
- Introduce yourself to members of the community. (We’ve all been outsiders and we know that talking to strangers can be hard for some, but remember we’re interested in getting to know you and your visions for open science!)
-- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communications to be kind.**)
+- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communication to be kind.**)
- Take breaks when you feel like you need them.
-- Using welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack.)
+- Use welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack)
## nf-core frowns on 😕
-The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this code of conduct. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces.
+The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this CoC. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces:
- Deliberate intimidation, stalking or following and sustained disruption of communication among participants of the community. This includes hijacking shared screens through actions such as using the annotate tool in conferencing software such as Zoom.
- “Doxing” i.e. posting (or threatening to post) another person’s personal identifying information online.
- Spamming or trolling of individuals on social media.
-- Use of sexual or discriminatory imagery, comments, or jokes and unwelcome sexual attention.
-- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion or work experience.
+- Use of sexual or discriminatory imagery, comments, jokes, or unwelcome sexual attention.
+- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion, or work experience.
### Online Trolling
-The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the added issue of online trolling. This is unacceptable, reports of such behaviour will be taken very seriously, and perpetrators will be excluded from activities immediately.
+The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the risk of online trolling. This is unacceptable — reports of such behaviour will be taken very seriously and perpetrators will be excluded from activities immediately.
-All community members are required to ask members of the group they are working within for explicit consent prior to taking screenshots of individuals during video calls.
+All community members are **required** to ask members of the group they are working with for explicit consent prior to taking screenshots of individuals during video calls.
-## Procedures for Reporting CoC violations
+## Procedures for reporting CoC violations
If someone makes you feel uncomfortable through their behaviours or actions, report it as soon as possible.
-You can reach out to members of the [nf-core core team](https://nf-co.re/about) and they will forward your concerns to the safety officer(s).
+You can reach out to members of the Safety Team (Saba Nafees, Cris Tuñí, and Michael Heuer) on Slack. Alternatively, contact a member of the nf-core core team [nf-core core team](https://nf-co.re/about), and they will forward your concerns to the Safety Team.
+
+Issues directly concerning members of the Core Team or the Safety Team will be dealt with by other members of the core team and the safety manager — possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson and details will be shared in due course.
+
+All reports will be handled with the utmost discretion and confidentiality.
+
+You can also report any CoC violations to safety [at] nf-co [dot] re. In your email report, please do your best to include:
+
+- Your contact information.
+- Identifying information (e.g. names, nicknames, pseudonyms) of the participant who has violated the Code of Conduct.
+- The behaviour that was in violation and the circumstances surrounding the incident.
+- The approximate time of the behaviour (if different than the time the report was made).
+- Other people involved in the incident, if applicable.
+- If you believe the incident is ongoing.
+- If there is a publicly available record (e.g. mailing list record, a screenshot).
+- Any additional information.
+
+After you file a report, one or more members of our Safety Team will contact you to follow up on your report.
+
+## Who will read and handle reports
+
+All reports will be read and handled by the members of the Safety Team at nf-core.
+
+If members of the Safety Team are deemed to have a conflict of interest with a report, they will be required to recuse themselves as per our Code of Conduct and will not have access to any follow-ups.
+
+To keep this first report confidential from any of the Safety Team members, please submit your first report by direct messaging on Slack/direct email to any of the nf-core members you are comfortable disclosing the information to, and be explicit about which member(s) you do not consent to sharing the information with.
+
+## Reviewing reports
+
+After receiving the report, members of the Safety Team will review the incident report to determine whether immediate action is required, for example, whether there is immediate threat to participants’ safety.
+
+The Safety Team, in consultation with members of the nf-core core team, will assess the information to determine whether the report constitutes a Code of Conduct violation, for them to decide on a course of action.
+
+In the case of insufficient information, one or more members of the Safety Team may contact the reporter, the reportee, or any other attendees to obtain more information.
-Issues directly concerning members of the core team will be dealt with by other members of the core team and the safety manager, and possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson, and details will be shared in due course.
+Once additional information is gathered, the Safety Team will collectively review and decide on the best course of action to take, if any. The Safety Team reserves the right to not act on a report.
-All reports will be handled with utmost discretion and confidentially.
+## Confidentiality
+
+All reports, and any additional information included, are only shared with the team of safety officers (and possibly members of the core team, in case the safety officer is in violation of the CoC). We will respect confidentiality requests for the purpose of protecting victims of abuse.
+
+We will not name harassment victims, beyond discussions between the safety officer and members of the nf-core team, without the explicit consent of the individuals involved.
+
+## Enforcement
+
+Actions taken by the nf-core’s Safety Team may include, but are not limited to:
+
+- Asking anyone to stop a behaviour.
+- Asking anyone to leave the event and online spaces either temporarily, for the remainder of the event, or permanently.
+- Removing access to the gather.town and Slack, either temporarily or permanently.
+- Communicating to all participants to reinforce our expectations for conduct and remind what is unacceptable behaviour; this may be public for practical reasons.
+- Communicating to all participants that an incident has taken place and how we will act or have acted — this may be for the purpose of letting event participants know we are aware of and dealing with the incident.
+- Banning anyone from participating in nf-core-managed spaces, future events, and activities, either temporarily or permanently.
+- No action.
## Attribution and Acknowledgements
@@ -106,6 +161,22 @@ All reports will be handled with utmost discretion and confidentially.
## Changelog
-### v1.0 - March 12th, 2021
+### v1.4 - February 8th, 2022
+
+- Included a new member of the Safety Team. Corrected a typographical error in the text.
+
+### v1.3 - December 10th, 2021
+
+- Added a statement that the CoC applies to nf-core gather.town workspaces. Corrected typographical errors in the text.
+
+### v1.2 - November 12th, 2021
+
+- Removed information specific to reporting CoC violations at the Hackathon in October 2021.
+
+### v1.1 - October 14th, 2021
+
+- Updated with names of new Safety Officers and specific information for the hackathon in October 2021.
+
+### v1.0 - March 15th, 2021
- Complete rewrite from original [Contributor Covenant](http://contributor-covenant.org/) CoC.
diff --git a/README.md b/README.md
index f7a63bd0..c6f2cc49 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,7 @@
# ![nf-core/mag](docs/images/nf-core-mag_logo_light.png#gh-light-mode-only) ![nf-core/mag](docs/images/nf-core-mag_logo_dark.png#gh-dark-mode-only)
-[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/mag/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)
+[![GitHub Actions CI Status](https://github.com/nf-core/mag/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/mag/actions?query=workflow%3A%22nf-core+CI%22)
+[![GitHub Actions Linting Status](https://github.com/nf-core/mag/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/mag/actions?query=workflow%3A%22nf-core+linting%22)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/mag/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)
[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/)
[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)
@@ -29,10 +30,11 @@
## Usage
-> **Note**
-> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how
-> to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline)
-> with `-profile test` before running the workflow on actual data.
+:::note
+If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how
+to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline)
+with `-profile test` before running the workflow on actual data.
+:::