From 7f1d4b01f26e026042e08b7e744a01a596c1fd44 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 5 Mar 2019 13:49:20 +0000 Subject: [PATCH 01/18] Check for fastq.gz in design --- bin/get_autosomes.py | 4 ++-- bin/reformat_design.py | 7 ++++++- environment.yml | 28 ++++++++++++++-------------- 3 files changed, 22 insertions(+), 17 deletions(-) diff --git a/bin/get_autosomes.py b/bin/get_autosomes.py index 7330fbfa..466ade75 100755 --- a/bin/get_autosomes.py +++ b/bin/get_autosomes.py @@ -2,7 +2,7 @@ ####################################################################### ####################################################################### -## Created on January 23rd 2019 to get autosomes from FAI file +## Created on February 26th 2019 to render methods ####################################################################### ####################################################################### @@ -21,7 +21,7 @@ argParser = argparse.ArgumentParser(description=Description, epilog=Epilog) ## REQUIRED PARAMETERS -argParser.add_argument('FAI_FILE', help="FAI input file.") +argParser.add_argument('METHODS', help="FAI input file.") argParser.add_argument('OUT_FILE', help="Output file containing one chromosome per line.") args = argParser.parse_args() diff --git a/bin/reformat_design.py b/bin/reformat_design.py index d4e30ab1..4fbca5da 100755 --- a/bin/reformat_design.py +++ b/bin/reformat_design.py @@ -70,8 +70,13 @@ def reformat_design(DesignFileIn,DesignFileOut): print "{}: Replicate id not an integer!\nLine: '{}'".format(ERROR_STR,line.strip()) sys.exit(1) - ## CHECK FASTQ FILES EXIST PER SAMPLE for fastq in fastQFiles: + ## CHECK FASTQ FILE EXTENSION + if fastq[-9:] != '.fastq.gz': + print "{}: FastQ file has incorrect extension (has to be '.fastq.gz') - {}\nLine: '{}'".format(ERROR_STR,fastq,line.strip()) + sys.exit(1) + + ## CHECK FASTQ FILES EXIST PER SAMPLE if fastq[:4] not in ['http']: if not os.path.exists(fastq): print "{}: FastQ file does not exist - {}\nLine: '{}'".format(ERROR_STR,fastq,line.strip()) diff --git a/environment.yml b/environment.yml index 7abc202a..1d8625f6 100755 --- a/environment.yml +++ b/environment.yml @@ -16,19 +16,19 @@ dependencies: - conda-forge::r-upsetr=1.3.3 ## bioconda packages - - fastqc=0.11.8 - - trim-galore=0.5.0 - - bwa=0.7.17 - - samtools=1.9 - - picard=2.18.26 - - bamtools=2.5.1 - - pysam=0.15.2 - - bedtools=2.27.1 - - ucsc-bedgraphtobigwig=377 - - macs2=2.1.2 - - homer=4.9.1 - - ataqv=1.0.0 - - subread=1.6.3 - - multiqc=1.7 + - bioconda::fastqc=0.11.8 + - bioconda::trim-galore=0.5.0 + - bioconda::bwa=0.7.17 + - bioconda::samtools=1.9 + - bioconda::picard=2.18.26 + - bioconda::bamtools=2.5.1 + - bioconda::pysam=0.15.2 + - bioconda::bedtools=2.27.1 + - bioconda::ucsc-bedgraphtobigwig=377 + - bioconda::macs2=2.1.2 + - bioconda::homer=4.9.1 + - bioconda::ataqv=1.0.0 + - bioconda::subread=1.6.3 + - bioconda::multiqc=1.7 - bioconda::bioconductor-deseq2=1.20.0 - bioconda::bioconductor-vsn=3.46.0 From c19aeb358774e62af7bb7faf445447311ef8b654 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 5 Mar 2019 13:54:01 +0000 Subject: [PATCH 02/18] Changed back due to unplanned edits --- bin/get_autosomes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) mode change 100755 => 100644 bin/get_autosomes.py diff --git a/bin/get_autosomes.py b/bin/get_autosomes.py old mode 100755 new mode 100644 index 466ade75..7330fbfa --- a/bin/get_autosomes.py +++ b/bin/get_autosomes.py @@ -2,7 +2,7 @@ ####################################################################### ####################################################################### -## Created on February 26th 2019 to render methods +## Created on January 23rd 2019 to get autosomes from FAI file ####################################################################### ####################################################################### @@ -21,7 +21,7 @@ argParser = argparse.ArgumentParser(description=Description, epilog=Epilog) ## REQUIRED PARAMETERS -argParser.add_argument('METHODS', help="FAI input file.") +argParser.add_argument('FAI_FILE', help="FAI input file.") argParser.add_argument('OUT_FILE', help="Output file containing one chromosome per line.") args = argParser.parse_args() From ed24c83eaa03b3b79231e92018be0ed28aa64a1d Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 5 Mar 2019 17:02:27 +0000 Subject: [PATCH 03/18] Changed environment.yml --- bin/get_autosomes.py | 2 +- environment.yml | 50 ++++++++++++++++++++++---------------------- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/bin/get_autosomes.py b/bin/get_autosomes.py index 7330fbfa..a8603b6b 100644 --- a/bin/get_autosomes.py +++ b/bin/get_autosomes.py @@ -2,7 +2,7 @@ ####################################################################### ####################################################################### -## Created on January 23rd 2019 to get autosomes from FAI file +## Created on January 23rd 2019 to get autosomes from samtools FAI file ####################################################################### ####################################################################### diff --git a/environment.yml b/environment.yml index 1d8625f6..6eadb3e6 100755 --- a/environment.yml +++ b/environment.yml @@ -5,30 +5,30 @@ channels: - defaults dependencies: ## conda-forge packages - - conda-forge::r-base=3.4.1 - - conda-forge::r-optparse=1.6.0 - - conda-forge::r-rcolorbrewer=1.1_2 - - conda-forge::r-ggplot2=3.1.0 - - conda-forge::r-reshape2=1.4.3 - - conda-forge::r-scales=1.0.0 - - conda-forge::r-pheatmap=1.0.10 - - conda-forge::r-lattice=0.20_35 - - conda-forge::r-upsetr=1.3.3 + - r-base=3.4.1 + - r-optparse=1.6.0 + - r-rcolorbrewer=1.1_2 + - r-ggplot2=3.1.0 + - r-reshape2=1.4.3 + - r-scales=1.0.0 + - r-pheatmap=1.0.10 + - r-lattice=0.20_35 + - r-upsetr=1.3.3 ## bioconda packages - - bioconda::fastqc=0.11.8 - - bioconda::trim-galore=0.5.0 - - bioconda::bwa=0.7.17 - - bioconda::samtools=1.9 - - bioconda::picard=2.18.26 - - bioconda::bamtools=2.5.1 - - bioconda::pysam=0.15.2 - - bioconda::bedtools=2.27.1 - - bioconda::ucsc-bedgraphtobigwig=377 - - bioconda::macs2=2.1.2 - - bioconda::homer=4.9.1 - - bioconda::ataqv=1.0.0 - - bioconda::subread=1.6.3 - - bioconda::multiqc=1.7 - - bioconda::bioconductor-deseq2=1.20.0 - - bioconda::bioconductor-vsn=3.46.0 + - fastqc=0.11.8 + - trim-galore=0.5.0 + - bwa=0.7.17 + - samtools=1.9 + - picard=2.18.26 + - bamtools=2.5.1 + - pysam=0.15.2 + - bedtools=2.27.1 + - ucsc-bedgraphtobigwig=377 + - macs2=2.1.2 + - homer=4.9.1 + - ataqv=1.0.0 + - subread=1.6.3 + - multiqc=1.7 + - bioconductor-deseq2=1.20.0 + - bioconductor-vsn=3.46.0 From eb52ad55b07cc1b312868b44c20fa908848d372e Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 5 Mar 2019 21:15:31 +0000 Subject: [PATCH 04/18] Test transfer --- test.txt | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 test.txt diff --git a/test.txt b/test.txt new file mode 100644 index 00000000..e69de29b From a5fbdaf25586776ca3cffa4d93d3f3519d398350 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 5 Mar 2019 21:16:20 +0000 Subject: [PATCH 05/18] Test transfer --- test.txt | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 test.txt diff --git a/test.txt b/test.txt deleted file mode 100644 index e69de29b..00000000 From cde13ccef1a004d117fe682808c4cbcd6f96550a Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 14 Mar 2019 10:07:45 +0000 Subject: [PATCH 06/18] Pass linting with tools 1.5 --- .travis.yml | 2 +- README.md | 2 -- Singularity | 18 ---------------- conf/base.config | 2 -- environment.yml | 54 ++++++++++++++++++++++++------------------------ nextflow.config | 15 ++++++-------- 6 files changed, 34 insertions(+), 59 deletions(-) delete mode 100755 Singularity diff --git a/.travis.yml b/.travis.yml index f0ac276c..03ae6bfe 100755 --- a/.travis.yml +++ b/.travis.yml @@ -13,7 +13,7 @@ before_install: # Pull the docker image first so the test doesn't wait for this - docker pull nfcore/atacseq:dev # Fake the tag locally so that the pipeline runs properly - - docker tag nfcore/atacseq:dev nfcore/atacseq:latest + - docker tag nfcore/atacseq:dev nfcore/atacseq:dev install: # Install Nextflow diff --git a/README.md b/README.md index 0e7c644c..52920085 100755 --- a/README.md +++ b/README.md @@ -6,8 +6,6 @@ [![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg)](http://bioconda.github.io/) [![Docker](https://img.shields.io/docker/automated/nfcore/atacseq.svg)](https://hub.docker.com/r/nfcore/atacseq) -![Singularity Container available]( -https://img.shields.io/badge/singularity-available-7E4C74.svg) ### Introduction diff --git a/Singularity b/Singularity deleted file mode 100755 index dc97a499..00000000 --- a/Singularity +++ /dev/null @@ -1,18 +0,0 @@ -From:nfcore/base -Bootstrap:docker - -%labels - MAINTAINER Harshil Patel - DESCRIPTION Singularity image containing all requirements for nf-core/atacseq pipeline - VERSION 1.0dev - -%environment - PATH=/opt/conda/envs/nf-core-atacseq-1.0dev/bin:$PATH - export PATH - -%files - environment.yml / - -%post - /opt/conda/bin/conda env create -f /environment.yml - /opt/conda/bin/conda clean -a diff --git a/conf/base.config b/conf/base.config index eb8f5044..7c029aa1 100755 --- a/conf/base.config +++ b/conf/base.config @@ -19,8 +19,6 @@ params { process { - container = params.container - cpus = { check_max( 1 * task.attempt, 'cpus' ) } memory = { check_max( 2.GB * task.attempt, 'memory' ) } time = { check_max( 2.h * task.attempt, 'time' ) } diff --git a/environment.yml b/environment.yml index 6eadb3e6..36e7fabf 100755 --- a/environment.yml +++ b/environment.yml @@ -4,31 +4,31 @@ channels: - bioconda - defaults dependencies: - ## conda-forge packages - - r-base=3.4.1 - - r-optparse=1.6.0 - - r-rcolorbrewer=1.1_2 - - r-ggplot2=3.1.0 - - r-reshape2=1.4.3 - - r-scales=1.0.0 - - r-pheatmap=1.0.10 - - r-lattice=0.20_35 - - r-upsetr=1.3.3 + ## conda-forge packages + - r-base=3.4.1 + - r-optparse=1.6.0 + - r-rcolorbrewer=1.1_2 + - r-ggplot2=3.1.0 + - r-reshape2=1.4.3 + - r-scales=1.0.0 + - r-pheatmap=1.0.10 + - r-lattice=0.20_35 + - r-upsetr=1.3.3 - ## bioconda packages - - fastqc=0.11.8 - - trim-galore=0.5.0 - - bwa=0.7.17 - - samtools=1.9 - - picard=2.18.26 - - bamtools=2.5.1 - - pysam=0.15.2 - - bedtools=2.27.1 - - ucsc-bedgraphtobigwig=377 - - macs2=2.1.2 - - homer=4.9.1 - - ataqv=1.0.0 - - subread=1.6.3 - - multiqc=1.7 - - bioconductor-deseq2=1.20.0 - - bioconductor-vsn=3.46.0 + ## bioconda packages + - fastqc=0.11.8 + - trim-galore=0.5.0 + - bwa=0.7.17 + - samtools=1.9 + - picard=2.18.26 + - bamtools=2.5.1 + - pysam=0.15.2 + - bedtools=2.27.1 + - ucsc-bedgraphtobigwig=377 + - macs2=2.1.2 + - homer=4.9.1 + - ataqv=1.0.0 + - subread=1.6.3 + - multiqc=1.7 + - bioconductor-deseq2=1.20.0 + - bioconductor-vsn=3.46.0 diff --git a/nextflow.config b/nextflow.config index d7fe8d20..f786d731 100755 --- a/nextflow.config +++ b/nextflow.config @@ -11,8 +11,6 @@ // Global default params, used in configs params { - container = 'nfcore/atacseq:latest' // Container slug. Stable releases should specify release tag! - // Options: Generic genome = false singleEnd = false @@ -69,6 +67,10 @@ params { } +// Container slug. Stable releases should specify release tag! +// Developmental code should specify :dev +process.container = 'nfcore/atacseq:dev' + // Load base.config by default for all pipelines includeConfig 'conf/base.config' @@ -83,13 +85,8 @@ profiles { awsbatch { includeConfig 'conf/awsbatch.config' } conda { process.conda = "$baseDir/environment.yml" } debug { process.beforeScript = 'echo $HOSTNAME' } - docker { - docker.enabled = true - process.container = params.container - } - singularity { - singularity.enabled = true - } + docker { docker.enabled = true } + singularity { singularity.enabled = true } test { includeConfig 'conf/test.config' } } From 9d23f069babd83f081018a080b3b679957b00d37 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 4 Apr 2019 14:01:51 +0100 Subject: [PATCH 07/18] Major template changes in line with tools v1.5 --- .travis.yml | 7 +- CODE_OF_CONDUCT.md | 2 +- Dockerfile | 4 +- LICENSE | 2 + README.md | 6 +- assets/bamtools_filter_pe.json | 2 +- assets/bamtools_filter_se.json | 2 +- assets/sendmail_template.txt | 6 +- conf/awsbatch.config | 11 ++- conf/base.config | 21 ++--- docs/configuration/adding_your_own.md | 4 +- docs/configuration/local.md | 11 ++- docs/installation.md | 52 ++++++------ docs/troubleshooting.md | 2 +- docs/usage.md | 98 ++++++++++----------- environment.yml | 2 + main.nf | 118 ++++++++++++++++++-------- nextflow.config | 6 +- 18 files changed, 205 insertions(+), 151 deletions(-) diff --git a/.travis.yml b/.travis.yml index 03ae6bfe..b00e5355 100755 --- a/.travis.yml +++ b/.travis.yml @@ -13,6 +13,7 @@ before_install: # Pull the docker image first so the test doesn't wait for this - docker pull nfcore/atacseq:dev # Fake the tag locally so that the pipeline runs properly + # Looks weird when this is :dev to :dev, but makes sense when testing code for a release (:dev to :1.0.1) - docker tag nfcore/atacseq:dev nfcore/atacseq:dev install: @@ -21,10 +22,12 @@ install: - wget -qO- get.nextflow.io | bash - sudo ln -s /tmp/nextflow/nextflow /usr/local/bin/nextflow # Install nf-core/tools - - pip install --upgrade pip # Get rid of dependency resolve issues from older pip versions + - pip install --upgrade pip - pip install nf-core # Reset - mkdir ${TRAVIS_BUILD_DIR}/tests && cd ${TRAVIS_BUILD_DIR}/tests + # Install markdownlint-cli + - sudo apt-get install npm && npm install -g markdownlint-cli env: - NXF_VER='0.32.0' # Specify a minimum NF version that should be tested and work @@ -33,5 +36,7 @@ env: script: # Lint the pipeline code - nf-core lint ${TRAVIS_BUILD_DIR} + # Lint the documentation + - markdownlint ${TRAVIS_BUILD_DIR} -c ${TRAVIS_BUILD_DIR}/.github/markdownlint.yml # Run the pipeline with the test profile - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 21096193..8e7ae058 100755 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -34,7 +34,7 @@ This Code of Conduct applies both within project spaces and in public spaces whe ## Enforcement -Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team on the [Gitter channel](https://gitter.im/nf-core/Lobby). The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. +Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team on [`Slack`](https://nf-core-invite.herokuapp.com/). The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. diff --git a/Dockerfile b/Dockerfile index a8dda009..3ffeca97 100755 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,6 @@ FROM nfcore/base - LABEL authors="harshil.patel@crick.ac.uk" \ - maintainer="harshil.patel@crick.ac.uk" \ - description="Docker image containing all requirements for the nfcore/atacseq pipeline" + description="Docker image containing all requirements for the nfcore/atacseq pipeline" COPY environment.yml / RUN conda env create -f /environment.yml && conda clean -a diff --git a/LICENSE b/LICENSE index 9cf10627..515e402d 100755 --- a/LICENSE +++ b/LICENSE @@ -1,5 +1,7 @@ MIT License +Copyright (c) Harshil Patel + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights diff --git a/README.md b/README.md index 52920085..716882ea 100755 --- a/README.md +++ b/README.md @@ -71,8 +71,6 @@ The pipeline was originally written by the [The Bioinformatics & Biostatistics G The pipeline was developed by [Harshil Patel](mailto:harshil.patel@crick.ac.uk). -The [nf-core/rnaseq](https://github.com/nf-core/rnaseq) and [nf-core/chipseq](https://github.com/nf-core/chipseq) pipelines developed by Phil Ewels were used as a template for this pipeline. Many thanks to Phil for all of his help and advice, and the team at SciLifeLab. +The [nf-core/rnaseq](https://github.com/nf-core/rnaseq) and [nf-core/chipseq](https://github.com/nf-core/chipseq) pipelines developed by Phil Ewels were initially used as a template for this pipeline. Many thanks to Phil for all of his help and advice, and the team at SciLifeLab. -Many thanks to other who have helped out along the way too, including (but not limited to): -[@pditommaso](https://github.com/pditommaso), -[@apeltzer](https://github.com/apeltzer), [@sven1103](https://github.com/sven1103), [@micans](https://github.com/micans). +Many thanks to other who have helped out along the way too, including (but not limited to): [@apeltzer](https://github.com/apeltzer), [@sven1103](https://github.com/sven1103), [@MaxUlysse](https://github.com/MaxUlysse), [@micans](https://github.com/micans), [@pditommaso](https://github.com/pditommaso). diff --git a/assets/bamtools_filter_pe.json b/assets/bamtools_filter_pe.json index c22b6fca..34e2c70e 100755 --- a/assets/bamtools_filter_pe.json +++ b/assets/bamtools_filter_pe.json @@ -9,7 +9,7 @@ }, { "id" : "mismatch", - "tag" : "NM:<=4" + "tag" : "NM:<=3" }, { "id" : "cigar", diff --git a/assets/bamtools_filter_se.json b/assets/bamtools_filter_se.json index 8c4e3834..9311b7f1 100755 --- a/assets/bamtools_filter_se.json +++ b/assets/bamtools_filter_se.json @@ -1,7 +1,7 @@ { "filters" : [ { "id" : "mismatch", - "tag" : "NM:<=4" + "tag" : "NM:<=3" }, { "id" : "cigar", diff --git a/assets/sendmail_template.txt b/assets/sendmail_template.txt index fd1cd739..cd34644e 100755 --- a/assets/sendmail_template.txt +++ b/assets/sendmail_template.txt @@ -1,11 +1,11 @@ To: $email Subject: $subject Mime-Version: 1.0 -Content-Type: multipart/related;boundary="nfmimeboundary" +Content-Type: multipart/related;boundary="nfcoremimeboundary" ---nfmimeboundary +--nfcoremimeboundary Content-Type: text/html; charset=utf-8 $email_html ---nfmimeboundary-- +--nfcoremimeboundary-- diff --git a/conf/awsbatch.config b/conf/awsbatch.config index 79078c7b..14af5866 100755 --- a/conf/awsbatch.config +++ b/conf/awsbatch.config @@ -1,10 +1,15 @@ /* * ------------------------------------------------- - * Nextflow config file for AWS Batch + * Nextflow config file for running on AWS batch * ------------------------------------------------- - * Imported under the 'awsbatch' Nextflow profile in nextflow.config - * Uses docker for software depedencies automagically, so not specified here. + * Base config needed for running with -profile awsbatch */ +params { + config_profile_name = 'AWSBATCH' + config_profile_description = 'AWSBATCH Cloud Profile' + config_profile_contact = 'Alexander Peltzer (@apeltzer)' + config_profile_url = 'https://aws.amazon.com/de/batch/' +} aws.region = params.awsregion process.executor = 'awsbatch' diff --git a/conf/base.config b/conf/base.config index 7c029aa1..4ac857c3 100755 --- a/conf/base.config +++ b/conf/base.config @@ -9,22 +9,14 @@ * run on the logged in environment. */ -// Defaults config params, may be overwritten by later configs -params { - max_memory = 128.GB - max_cpus = 16 - max_time = 240.h - igenomes_base = 's3://ngi-igenomes/igenomes/' -} - process { cpus = { check_max( 1 * task.attempt, 'cpus' ) } memory = { check_max( 2.GB * task.attempt, 'memory' ) } time = { check_max( 2.h * task.attempt, 'time' ) } - errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'finish' } - maxRetries = 2 + errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } + maxRetries = 1 maxErrors = '-1' // Process-specific resource requirements @@ -44,4 +36,13 @@ process { withName:get_software_versions { cache = false } + +} + +// Defaults config params, may be overwritten by later configs +params { + max_memory = 128.GB + max_cpus = 16 + max_time = 240.h + igenomes_base = 's3://ngi-igenomes/igenomes/' } diff --git a/docs/configuration/adding_your_own.md b/docs/configuration/adding_your_own.md index 7551ea49..c84edaad 100755 --- a/docs/configuration/adding_your_own.md +++ b/docs/configuration/adding_your_own.md @@ -57,7 +57,7 @@ To specify singularity usage in your pipeline config file, add the following: ```nextflow singularity.enabled = true -process.container = "shub://nf-core/atacseq" +process.container = "nf-core/atacseq" ``` If you intend to run the pipeline offline, nextflow will not be able to automatically download the singularity image for you. @@ -66,7 +66,7 @@ Instead, you'll have to do this yourself manually first, transfer the image file First, pull the image file where you have an internet connection: ```bash -singularity pull --name nf-core-atacseq.simg shub://nf-core/atacseq +singularity pull --name nf-core-atacseq.simg nf-core/atacseq ``` Then transfer this file and point the config file to the image: diff --git a/docs/configuration/local.md b/docs/configuration/local.md index 952b5723..81382053 100644 --- a/docs/configuration/local.md +++ b/docs/configuration/local.md @@ -3,27 +3,26 @@ If running the pipeline in a local environment, we highly recommend using either Docker or Singularity. ## Docker -Docker is a great way to run nf-core/atacseq, as it manages all software installations and allows the pipeline to be run in an identical software environment across a range of systems. +Docker is a great way to run `nf-core/atacseq`, as it manages all software installations and allows the pipeline to be run in an identical software environment across a range of systems. -Nextflow has [excellent integration](https://www.nextflow.io/docs/latest/docker.html) with Docker, and beyond installing the two tools, not much else is required. The nf-core/atacseq profile comes with a configuration profile for docker, making it very easy to use. This also comes with the required presets to use the AWS iGenomes resource, meaning that if using common reference genomes you just specify the reference ID and it will be automatically downloaded from AWS S3. +Nextflow has [excellent integration](https://www.nextflow.io/docs/latest/docker.html) with Docker, and beyond installing the two tools, not much else is required. The `nf-core/atacseq` profile comes with a configuration profile for docker, making it very easy to use. First, install docker on your system: [Docker Installation Instructions](https://docs.docker.com/engine/installation/) Then, simply run the analysis pipeline: + ```bash nextflow run nf-core/atacseq -profile docker --genome '' --design '' ``` -Nextflow will recognise `nf-core/atacseq` and download the pipeline from GitHub. The `-profile docker` configuration lists the [nf-core/atacseq](https://hub.docker.com/r/nf-core/atacseq/) image that we have created and is hosted at dockerhub, and this is downloaded. - -For more information about how to work with reference genomes, see [`docs/configuration/reference_genomes.md`](reference_genomes.md). +Nextflow will recognise `nf-core/atacseq` and download the pipeline from GitHub. The `-profile docker` configuration lists the [nf-core/atacseq](https://hub.docker.com/r/nfcore/atacseq/) image that we have created and is hosted at dockerhub, and this is downloaded. ### Pipeline versions The public docker images are tagged with the same version numbers as the code, which you can use to ensure reproducibility. When running the pipeline, specify the pipeline version with `-r`, for example `-r 1.0`. This uses pipeline code and docker image from this tagged version. ## Singularity image -Many HPC environments are not able to run Docker due to security issues. [Singularity](http://singularity.lbl.gov/) is a tool designed to run on such HPC systems which is very similar to Docker. Even better, it can use create images directly from dockerhub. +Many HPC environments are not able to run Docker due to security issues. [Singularity](http://singularity.lbl.gov/) is a tool designed to run on such HPC systems which is very similar to Docker. Even better, it can use images created directly from dockerhub. To use the singularity image for a single run, use `-with-singularity`. This will download the docker container from dockerhub and create a singularity image for you dynamically. diff --git a/docs/installation.md b/docs/installation.md index c0ce8fb8..fafea453 100755 --- a/docs/installation.md +++ b/docs/installation.md @@ -2,18 +2,19 @@ To start using the nf-core/atacseq pipeline, follow the steps below: -1. [Install Nextflow](#1-install-nextflow) -2. [Install the pipeline](#2-install-the-pipeline) - * [Automatic](#21-automatic) - * [Offline](#22-offline) - * [Development](#23-development) -3. [Pipeline configuration](#3-pipeline-configuration) - * [Software deps: Docker and Singularity](#31-software-deps-docker-and-singularity) - * [Software deps: Bioconda](#32-software-deps-bioconda) - * [Configuration profiles](#33-configuration-profiles) -4. [Reference genomes](#4-reference-genomes) - -## 1) Install NextFlow +* [Install NextFlow](#install-nextflow) +* [Install the pipeline](#install-the-pipeline) + * [Automatic](#automatic) + * [Offline](#offline) + * [Development](#development) +* [Pipeline configuration](#pipeline-configuration) + * [Docker](#docker) + * [Singularity](#singularity) + * [Conda](#conda) + * [Configuration profiles](#configuration-profiles) +* [Reference genomes](#reference-genomes) + +## Install NextFlow Nextflow runs on most POSIX systems (Linux, Mac OSX etc). It can be installed by running the following commands: ```bash @@ -31,12 +32,12 @@ mv nextflow ~/bin/ See [nextflow.io](https://www.nextflow.io/) for further instructions on how to install and configure Nextflow. -## 2) Install the pipeline +## Install the pipeline -#### 2.1) Automatic +### Automatic This pipeline itself needs no installation - NextFlow will automatically fetch it from GitHub if `nf-core/atacseq` is specified as the pipeline name. -#### 2.2) Offline +### Offline The above method requires an internet connection so that Nextflow can download the pipeline files. If you're running on a system that has no internet connection, you'll need to download and transfer the pipeline files manually: ```bash @@ -53,12 +54,15 @@ To stop nextflow from looking for updates online, you can tell it to run in offl export NXF_OFFLINE='TRUE' ``` -#### 2.3) Development +### Development If you would like to make changes to the pipeline, it's best to make a fork on GitHub and then clone the files. Once cloned you can run the pipeline directly as above. -## 3) Pipeline configuration -By default, the pipeline loads a basic server configuration [`conf/base.config`](../conf/base.config). This uses a number of sensible defaults for process requirements and is suitable for running on a simple (if powerful!) basic server. + +## Pipeline configuration +By default, the pipeline loads a basic server configuration [`conf/base.config`](../conf/base.config) +This uses a number of sensible defaults for process requirements and is suitable for running +on a simple (if powerful!) local server. Be warned of two important points about this default configuration: @@ -67,13 +71,13 @@ Be warned of two important points about this default configuration: * See the [nextflow docs](https://www.nextflow.io/docs/latest/executor.html) for information about running with other hardware backends. Most job scheduler systems are natively supported. 2. Nextflow will expect all software to be installed and available on the `PATH` * It's expected to use an additional config profile for docker, singularity or conda support. See below. - -#### 3.1) Software deps: Docker + +### Docker First, install docker on your system: [Docker Installation Instructions](https://docs.docker.com/engine/installation/) Then, running the pipeline with the option `-profile docker` tells Nextflow to enable Docker for this run. An image containing all of the software requirements will be automatically fetched and used from dockerhub (https://hub.docker.com/r/nfcore/atacseq). -#### 3.1) Software deps: Singularity +### Singularity If you're not able to use Docker then [Singularity](http://singularity.lbl.gov/) is a great alternative. The process is very similar: running the pipeline with the option `-profile singularity` tells Nextflow to enable singularity for this run. An image containing all of the software requirements will be automatically fetched and used from singularity hub. @@ -91,16 +95,16 @@ nextflow run /path/to/nf-core-atacseq -with-singularity nf-core-atacseq.simg Remember to pull updated versions of the singularity image if you update the pipeline. -#### 3.2) Software deps: conda +### Conda If you're not able to use Docker _or_ Singularity, you can instead use conda to manage the software requirements. This is slower and less reproducible than the above, but is still better than having to install all requirements yourself! The pipeline ships with a conda environment file and nextflow has built-in support for this. To use it first ensure that you have conda installed (we recommend [miniconda](https://conda.io/miniconda.html)), then follow the same pattern as above and use the flag `-profile conda` -#### 3.3) Configuration profiles +### Configuration profiles See [`docs/configuration/adding_your_own.md`](configuration/adding_your_own.md) -## 4) Reference genomes +## Reference genomes See [`docs/configuration/reference_genomes.md`](configuration/reference_genomes.md) diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index 5a498c3c..0056d472 100755 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -1,7 +1,7 @@ # nf-core/atacseq: Troubleshooting ## Extra resources and getting help -If you still have an issue with running the pipeline then feel free to contact us. +If you still have an issue with running the pipeline then feel free to contact us in the `atacseq` channel on [`Slack`](https://nf-core-invite.herokuapp.com/). Have a look at the [pipeline website](https://github.com/nf-core/atacseq) to find out how. If you have problems that are related to Nextflow and not our pipeline then check out the [Nextflow gitter channel](https://gitter.im/nextflow-io/nextflow) or the [google group](https://groups.google.com/forum/#!forum/nextflow). diff --git a/docs/usage.md b/docs/usage.md index 2fc26e97..58223a97 100755 --- a/docs/usage.md +++ b/docs/usage.md @@ -2,65 +2,61 @@ ## Table of contents -* [Introduction](#general-nextflow-info) +* [Table of contents](#table-of-contents) +* [Introduction](#introduction) * [Running the pipeline](#running-the-pipeline) -* [Updating the pipeline](#updating-the-pipeline) -* [Reproducibility](#reproducibility) + * [Updating the pipeline](#updating-the-pipeline) + * [Reproducibility](#reproducibility) * [Main arguments](#main-arguments) - * [`-profile`](#-profile-single-dash) - * [`awsbatch`](#awsbatch) - * [`conda`](#conda) - * [`docker`](#docker) - * [`singularity`](#singularity) - * [`test`](#test) - * [`--design`](#--design) + * [`-profile`](#-profile) + * [`--design`](#--design) * [Generic arguments](#generic-arguments) - * [`--singleEnd`](#--singleEnd) - * [`--narrowPeak`](#--narrowPeak) - * [`--fragment_size`](#--fragment_size) + * [`--singleEnd`](#--singleend) + * [`--narrowPeak`](#--narrowpeak) + * [`--fragment_size`](#--fragment_size) * [Reference genomes](#reference-genomes) - * [`--genome`](#--genome) - * [`--fasta`](#--fasta) - * [`--gtf`](#--gtf) - * [`--bwa_index`](#--bwa_index) - * [`--gene_bed`](#--gene_bed) - * [`--tss_bed`](#--tss_bed) - * [`--mito_name`](#--mito_name) - * [`--macs_gsize`](#--macs_gsize) - * [`--blacklist`](#--blacklist) - * [`--saveGenomeIndex`](#--saveGenomeIndex) - * [`--igenomesIgnore`](#--igenomesignore) + * [`--genome` (using iGenomes)](#--genome-using-igenomes) + * [`--fasta`](#--fasta) + * [`--gtf`](#--gtf) + * [`--bwa_index`](#--bwa_index) + * [`--gene_bed`](#--gene_bed) + * [`--tss_bed`](#--tss_bed) + * [`--mito_name`](#--mito_name) + * [`--macs_gsize`](#--macs_gsize) + * [`--blacklist`](#--blacklist) + * [`--saveGenomeIndex`](#--savegenomeindex) + * [`--igenomesIgnore`](#--igenomesignore) * [Adapter trimming](#adapter-trimming) - * [`--skipTrimming`](#--skipTrimming) - * [`--saveTrimmed`](#--saveTrimmed) + * [`--skipTrimming`](#--skiptrimming) + * [`--saveTrimmed`](#--savetrimmed) * [Alignments](#alignments) - * [`--keepMito`](#--keepMito) - * [`--keepDups`](#--keepDups) - * [`--keepMultiMap`](#--keepMultiMap) - * [`--skipMergeReplicates`](#--skipMergeReplicates) - * [`--saveAlignedIntermediates`](#--saveAlignedIntermediates) + * [`--keepMito`](#--keepmito) + * [`--keepDups`](#--keepdups) + * [`--keepMultiMap`](#--keepmultimap) + * [`--skipMergeReplicates`](#--skipmergereplicates) + * [`--saveAlignedIntermediates`](#--savealignedintermediates) * [Job resources](#job-resources) -* [Automatic resubmission](#automatic-resubmission) -* [Custom resource requests](#custom-resource-requests) -* [AWS batch specific parameters](#aws-batch-specific-parameters) - * [`-awsbatch`](#-awsbatch) - * [`--awsqueue`](#--awsqueue) - * [`--awsregion`](#--awsregion) + * [Automatic resubmission](#automatic-resubmission) + * [Custom resource requests](#custom-resource-requests) +* [AWS Batch specific parameters](#aws-batch-specific-parameters) + * [`--awsqueue`](#--awsqueue) + * [`--awsregion`](#--awsregion) * [Other command line parameters](#other-command-line-parameters) - * [`--outdir`](#--outdir) - * [`--email`](#--email) - * [`-name`](#-name-single-dash) - * [`-resume`](#-resume-single-dash) - * [`-c`](#-c-single-dash) - * [`--custom_config_version`](#--custom_config_version) - * [`--max_memory`](#--max_memory) - * [`--max_time`](#--max_time) - * [`--max_cpus`](#--max_cpus) - * [`--plaintext_email`](#--plaintext_email) - * [`--monochrome_logs`](#--monochrome_logs) - * [`--multiqc_config`](#--multiqc_config) - -## General Nextflow info + * [`--outdir`](#--outdir) + * [`--email`](#--email) + * [`-name`](#-name) + * [`-resume`](#-resume) + * [`-c`](#-c) + * [`--custom_config_version`](#--custom_config_version) + * [`--custom_config_base`](#--custom_config_base) + * [`--max_memory`](#--max_memory) + * [`--max_time`](#--max_time) + * [`--max_cpus`](#--max_cpus) + * [`--plaintext_email`](#--plaintext_email) + * [`--monochrome_logs`](#--monochrome_logs) + * [`--multiqc_config`](#--multiqc_config) + +## Introduction Nextflow handles job submissions on SLURM or other environments, and supervises running the jobs. Thus the Nextflow process must run until the pipeline is finished. We recommend that you put the process running in the background through `screen` / `tmux` or similar tool. Alternatively you can run nextflow within a cluster job submitted your job scheduler. It is recommended to limit the Nextflow Java virtual machines memory. We recommend adding the following line to your environment (typically in `~/.bashrc` or `~./bash_profile`): diff --git a/environment.yml b/environment.yml index 36e7fabf..9ff2f873 100755 --- a/environment.yml +++ b/environment.yml @@ -1,3 +1,5 @@ +# You can use this file to create a conda environment for this pipeline: +# conda env create -f environment.yml name: nf-core-atacseq-1.0dev channels: - conda-forge diff --git a/main.nf b/main.nf index 35a6fdae..6d65c8ae 100755 --- a/main.nf +++ b/main.nf @@ -67,6 +67,7 @@ def helpMessage() { Other --outdir The output directory where the results will be saved --email Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits + --maxMultiqcEmailFileSize Theshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: 25MB) -name Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic AWSBatch @@ -247,8 +248,6 @@ if (workflow.profile == 'awsbatch') { // Header log info log.info nfcoreHeader() def summary = [:] -summary['Pipeline Name'] = 'nf-core/atacseq' -summary['Pipeline Version'] = workflow.manifest.version summary['Run Name'] = custom_runName ?: workflow.runName summary['Genome'] = params.genome ?: 'Not supplied' summary['Data Type'] = params.singleEnd ? 'Single-End' : 'Paired-End' @@ -278,31 +277,31 @@ summary['Merge Replicates'] = params.skipMergeReplicates ? 'No' : 'Yes summary['Save Genome Index'] = params.saveGenomeIndex ? 'Yes' : 'No' summary['Save Trimmed'] = params.saveTrimmed ? 'Yes' : 'No' summary['Save Intermeds'] = params.saveAlignedIntermediates ? 'Yes' : 'No' -summary['Max Memory'] = params.max_memory -summary['Max CPUs'] = params.max_cpus -summary['Max Time'] = params.max_time -summary['Output Dir'] = params.outdir -summary['Working Dir'] = workflow.workDir -summary['Container Engine'] = workflow.containerEngine -if (workflow.containerEngine) summary['Container'] = workflow.container -summary['Current Home'] = "$HOME" -summary['Current User'] = "$USER" -summary['Current Path'] = "$PWD" -summary['Working Dir'] = workflow.workDir -summary['Output Dir'] = params.outdir -summary['Script Dir'] = workflow.projectDir +summary['Max Resources'] = "$params.max_memory memory, $params.max_cpus cpus, $params.max_time time per job" +if(workflow.containerEngine) summary['Container'] = "$workflow.containerEngine - $workflow.container" +summary['Output Dir'] = params.outdir +summary['Launch Dir'] = workflow.launchDir +summary['Working Dir'] = workflow.workDir +summary['Script Dir'] = workflow.projectDir +summary['User'] = workflow.userName +if (workflow.profile == 'awsbatch'){ + summary['AWS Region'] = params.awsregion + summary['AWS Queue'] = params.awsqueue +} summary['Config Profile'] = workflow.profile if (params.config_profile_description) summary['Config Description'] = params.config_profile_description if (params.config_profile_contact) summary['Config Contact'] = params.config_profile_contact if (params.config_profile_url) summary['Config URL'] = params.config_profile_url -if (workflow.profile == 'awsbatch'){ - summary['AWS Region'] = params.awsregion - summary['AWS Queue'] = params.awsqueue +if(params.email) { + summary['E-mail Address'] = params.email + summary['MultiQC maxsize'] = params.maxMultiqcEmailFileSize } -if (params.email) summary['E-mail Address'] = params.email log.info summary.collect { k,v -> "${k.padRight(21)}: $v" }.join("\n") log.info "\033[2m----------------------------------------------------\033[0m" +// Check the hostnames against configured profiles +checkHostname() + // Show a big warning message if we're not running MACS if (!params.macs_gsize){ def warnstring = params.genome ? "supported for '${params.genome}'" : 'supplied' @@ -1772,8 +1771,8 @@ workflow.onComplete { // Set up the e-mail variables def subject = "[nf-core/atacseq] Successful: $workflow.runName" - if (!workflow.success){ - subject = "[nf-core/atacseq] FAILED: $workflow.runName" + if(!workflow.success){ + subject = "[nf-core/atacseq] FAILED: $workflow.runName" } def email_fields = [:] email_fields['version'] = workflow.manifest.version @@ -1791,13 +1790,28 @@ workflow.onComplete { email_fields['summary']['Date Completed'] = workflow.complete email_fields['summary']['Pipeline script file path'] = workflow.scriptFile email_fields['summary']['Pipeline script hash ID'] = workflow.scriptId - if (workflow.repository) email_fields['summary']['Pipeline repository Git URL'] = workflow.repository - if (workflow.commitId) email_fields['summary']['Pipeline repository Git Commit'] = workflow.commitId - if (workflow.revision) email_fields['summary']['Pipeline Git branch/tag'] = workflow.revision + if(workflow.repository) email_fields['summary']['Pipeline repository Git URL'] = workflow.repository + if(workflow.commitId) email_fields['summary']['Pipeline repository Git Commit'] = workflow.commitId + if(workflow.revision) email_fields['summary']['Pipeline Git branch/tag'] = workflow.revision + if(workflow.container) email_fields['summary']['Docker image'] = workflow.container email_fields['summary']['Nextflow Version'] = workflow.nextflow.version email_fields['summary']['Nextflow Build'] = workflow.nextflow.build email_fields['summary']['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + // On success try attach the multiqc report + def mqc_report = null + try { + if (workflow.success) { + mqc_report = multiqc_report.getVal() + if (mqc_report.getClass() == ArrayList){ + log.warn "[nf-core/atacseq] Found multiple reports from process 'multiqc', will use only one" + mqc_report = mqc_report[0] + } + } + } catch (all) { + log.warn "[nf-core/atacseq] Could not attach MultiQC report to summary email" + } + // Render the TXT template def engine = new groovy.text.GStringTemplateEngine() def tf = new File("$baseDir/assets/email_template.txt") @@ -1810,7 +1824,7 @@ workflow.onComplete { def email_html = html_template.toString() // Render the sendmail template - def smail_fields = [ email: params.email, subject: subject, email_txt: email_txt, email_html: email_html, baseDir: "$baseDir" ] + def smail_fields = [ email: params.email, subject: subject, email_txt: email_txt, email_html: email_html, baseDir: "$baseDir", mqcFile: mqc_report, mqcMaxSize: params.maxMultiqcEmailFileSize.toBytes() ] def sf = new File("$baseDir/assets/sendmail_template.txt") def sendmail_template = engine.createTemplate(sf).make(smail_fields) def sendmail_html = sendmail_template.toString() @@ -1818,28 +1832,37 @@ workflow.onComplete { // Send the HTML e-mail if (params.email) { try { - if (params.plaintext_email){ throw GroovyException('Send plaintext e-mail, not HTML') } - // Try to send HTML e-mail using sendmail - [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "[nf-core/atacseq] Sent summary e-mail to $params.email (sendmail)" + if( params.plaintext_email ){ throw GroovyException('Send plaintext e-mail, not HTML') } + // Try to send HTML e-mail using sendmail + [ 'sendmail', '-t' ].execute() << sendmail_html + log.info "[nf-core/atacseq] Sent summary e-mail to $params.email (sendmail)" } catch (all) { - // Catch failures and try with plaintext - [ 'mail', '-s', subject, params.email ].execute() << email_txt - log.info "[nf-core/atacseq] Sent summary e-mail to $params.email (mail)" + // Catch failures and try with plaintext + [ 'mail', '-s', subject, params.email ].execute() << email_txt + log.info "[nf-core/atacseq] Sent summary e-mail to $params.email (mail)" } } // Write summary e-mail HTML to a file - def output_d = new File( "${params.outdir}/Documentation/" ) - if (!output_d.exists()) { - output_d.mkdirs() + def output_d = new File( "${params.outdir}/pipeline_info/" ) + if( !output_d.exists() ) { + output_d.mkdirs() } def output_hf = new File( output_d, "pipeline_report.html" ) output_hf.withWriter { w -> w << email_html } def output_tf = new File( output_d, "pipeline_report.txt" ) output_tf.withWriter { w -> w << email_txt } - log.info "[nf-core/atacseq] Pipeline Complete" + c_reset = params.monochrome_logs ? '' : "\033[0m"; + c_purple = params.monochrome_logs ? '' : "\033[0;35m"; + c_green = params.monochrome_logs ? '' : "\033[0;32m"; + c_red = params.monochrome_logs ? '' : "\033[0;31m"; + if(workflow.success){ + log.info "${c_purple}[nf-core/atacseq]${c_green} Pipeline complete${c_reset}" + } else { + checkHostname() + log.info "${c_purple}[nf-core/atacseq]${c_red} Pipeline completed with errors${c_reset}" + } } @@ -1869,11 +1892,32 @@ def nfcoreHeader(){ ${c_blue} |\\ | |__ __ / ` / \\ |__) |__ ${c_yellow}} {${c_reset} ${c_blue} | \\| | \\__, \\__/ | \\ |___ ${c_green}\\`-._,-`-,${c_reset} ${c_green}`._,._,\'${c_reset} - ${c_purple} nf-core/atacseq v${workflow.manifest.version}${c_reset} + ${c_purple} {{ cookiecutter.name }} v${workflow.manifest.version}${c_reset} ${c_dim}----------------------------------------------------${c_reset} """.stripIndent() } +def checkHostname(){ + def c_reset = params.monochrome_logs ? '' : "\033[0m" + def c_white = params.monochrome_logs ? '' : "\033[0;37m" + def c_red = params.monochrome_logs ? '' : "\033[1;91m" + def c_yellow_bold = params.monochrome_logs ? '' : "\033[1;93m" + if(params.hostnames){ + def hostname = "hostname".execute().text.trim() + params.hostnames.each { prof, hnames -> + hnames.each { hname -> + if(hostname.contains(hname) && !workflow.profile.contains(prof)){ + log.error "====================================================\n" + + " ${c_red}WARNING!${c_reset} You are running with `-profile $workflow.profile`\n" + + " but your machine hostname is ${c_white}'$hostname'${c_reset}\n" + + " ${c_yellow_bold}It's highly recommended that you use `-profile $prof${c_reset}`\n" + + "============================================================" + } + } + } + } +} + /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// /* -- -- */ diff --git a/nextflow.config b/nextflow.config index f786d731..03087bd4 100755 --- a/nextflow.config +++ b/nextflow.config @@ -3,9 +3,6 @@ * nf-core/atacseq Nextflow config file * ------------------------------------------------- * Default config options for all environments. - * Cluster-specific config options should be saved - * in the conf folder and imported under a profile - * name here. */ // Global default params, used in configs @@ -38,10 +35,12 @@ params { // Options: Other outdir = './results' + igenomes_base = "./iGenomes" email = false plaintext_email = false monochrome_logs = false name = false + maxMultiqcEmailFileSize = 25.MB // Options: AWSBatch awsqueue = false @@ -59,6 +58,7 @@ params { // Options: Default help = false + hostnames = false tracedir = "${params.outdir}/pipeline_info" clusterOptions = false igenomesIgnore = false From 254edd25e5f7bf9c2473a00bb4c9eecbc3a1669e Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 5 Apr 2019 13:18:24 +0100 Subject: [PATCH 08/18] Bugfixes and update email template like rnaseq --- assets/email_template.html | 4 +- assets/email_template.txt | 20 --- assets/multiqc/multiqc_config.yaml | 4 +- assets/sendmail_template.txt | 42 ++++++ bin/get_autosomes.py | 0 bin/reformat_design.py | 4 +- bin/scrape_software_versions.py | 56 ++++--- docs/usage.md | 10 +- environment.yml | 6 +- main.nf | 226 ++++++++++++++--------------- 10 files changed, 202 insertions(+), 170 deletions(-) mode change 100644 => 100755 bin/get_autosomes.py diff --git a/assets/email_template.html b/assets/email_template.html index 1c4f6946..ecab0e13 100755 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -5,12 +5,14 @@ - + nf-core/atacseq Pipeline Report
+ +

nf-core/atacseq v${version}

Run Name: $runName

diff --git a/assets/email_template.txt b/assets/email_template.txt index 9a4dd027..91d3a7a6 100755 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -16,32 +16,12 @@ ${errorReport} """ } %> - -<% if (!success){ - out << """#################################################### -## nf-core/atacseq execution completed unsuccessfully! ## -#################################################### -The exit status of the task that caused the workflow execution to fail was: $exitStatus. -The full error message was: - -${errorReport} -""" -} else { - out << "## nf-core/atacseq execution completed successfully! ##" -} -%> - - - - The workflow was completed at $dateComplete (duration: $duration) The command used to launch the workflow was as follows: $commandLine - - Pipeline Configuration: ----------------------- <% out << summary.collect{ k,v -> " - $k: $v" }.join("\n") %> diff --git a/assets/multiqc/multiqc_config.yaml b/assets/multiqc/multiqc_config.yaml index 6e3dcf8c..d2f44b25 100755 --- a/assets/multiqc/multiqc_config.yaml +++ b/assets/multiqc/multiqc_config.yaml @@ -5,6 +5,8 @@ report_comment: > skip_generalstats: true +export_plots: true + fn_clean_exts: - 'fastq.gz' - '_trimmed' @@ -98,7 +100,7 @@ report_section_order: order: -1800 mrep_deseq2_clustering: order: -1900 - nf-core-atacseq-software-versions: + software_versions: order: -2000 nf-core-atacseq-summary: order: -2100 diff --git a/assets/sendmail_template.txt b/assets/sendmail_template.txt index cd34644e..3777e08a 100755 --- a/assets/sendmail_template.txt +++ b/assets/sendmail_template.txt @@ -8,4 +8,46 @@ Content-Type: text/html; charset=utf-8 $email_html +--nfcoremimeboundary +Content-Type: image/png;name="nfcore-atacseq_logo.png" +Content-Transfer-Encoding: base64 +Content-ID: +Content-Disposition: inline; filename="nfcore-atacseq_logo.png" + +<% out << new File("$baseDir/assets/nfcore-atacseq_logo.png"). + bytes. + encodeBase64(). + toString(). + tokenize( '\n' )*. + toList()*. + collate( 76 )*. + collect { it.join() }. + flatten(). + join( '\n' ) %> + +<% +if (mqcFile){ +def mqcFileObj = new File("$mqcFile") +if (mqcFileObj.length() < mqcMaxSize){ +out << """ +--nfcoremimeboundary +Content-Type: text/html; name=\"multiqc_report\" +Content-Transfer-Encoding: base64 +Content-ID: +Content-Disposition: attachment; filename=\"${mqcFileObj.getName()}\" + +${mqcFileObj. + bytes. + encodeBase64(). + toString(). + tokenize( '\n' )*. + toList()*. + collate( 76 )*. + collect { it.join() }. + flatten(). + join( '\n' )} +""" +}} +%> + --nfcoremimeboundary-- diff --git a/bin/get_autosomes.py b/bin/get_autosomes.py old mode 100644 new mode 100755 diff --git a/bin/reformat_design.py b/bin/reformat_design.py index 4fbca5da..cf7aafa2 100755 --- a/bin/reformat_design.py +++ b/bin/reformat_design.py @@ -72,8 +72,8 @@ def reformat_design(DesignFileIn,DesignFileOut): for fastq in fastQFiles: ## CHECK FASTQ FILE EXTENSION - if fastq[-9:] != '.fastq.gz': - print "{}: FastQ file has incorrect extension (has to be '.fastq.gz') - {}\nLine: '{}'".format(ERROR_STR,fastq,line.strip()) + if fastq[-9:] != '.fastq.gz' and fastq[-6:] != '.fq.gz': + print "{}: FastQ file has incorrect extension (has to be '.fastq.gz' or 'fq.gz') - {}\nLine: '{}'".format(ERROR_STR,fastq,line.strip()) sys.exit(1) ## CHECK FASTQ FILES EXIST PER SAMPLE diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py index f9126e96..5c14829f 100755 --- a/bin/scrape_software_versions.py +++ b/bin/scrape_software_versions.py @@ -23,22 +23,22 @@ } results = OrderedDict() -results['nf-core/atacseq'] = 'NA' -results['Nextflow'] = 'NA' -results['FastQC'] = 'NA' -results['Trim Galore!'] = 'NA' -results['BWA'] = 'NA' -results['Samtools'] = 'NA' -results['BEDTools'] = 'NA' -results['BamTools'] = 'NA' -results['Picard'] = 'NA' -results['R'] = 'NA' -results['Pysam'] = 'NA' -results['MACS2'] = 'NA' -results['HOMER'] = 'NA' -results['ataqv'] = 'NA' -results['featureCounts'] = 'NA' -results['MultiQC'] = 'NA' +results['nf-core/atacseq'] = 'N/A' +results['Nextflow'] = 'N/A' +results['FastQC'] = 'N/A' +results['Trim Galore!'] = 'N/A' +results['BWA'] = 'N/A' +results['Samtools'] = 'N/A' +results['BEDTools'] = 'N/A' +results['BamTools'] = 'N/A' +results['Picard'] = 'N/A' +results['R'] = 'N/A' +results['Pysam'] = 'N/A' +results['MACS2'] = 'N/A' +results['HOMER'] = False +results['ataqv'] = 'N/A' +results['featureCounts'] = 'N/A' +results['MultiQC'] = 'N/A' # Search each file using its regex for k, v in regexes.items(): @@ -48,6 +48,26 @@ if match: results[k] = "v{}".format(match.group(1)) -# Dump to TSV +# Strip HOMER +for k in results: + if not results[k]: + del(results[k]) + +# Dump to YAML +print (''' +id: 'software_versions' +section_name: 'nf-core/atacseq Software Versions' +section_href: 'https://github.com/nf-core/atacseq' +plot_type: 'html' +description: 'are collected at run time from the software output.' +data: | +
+''') for k,v in results.items(): - print("{}\t{}".format(k,v)) + print("
{}
{}
".format(k,v)) +print ("
") + +# Write out regexes as csv file: +with open('software_versions.csv', 'w') as f: + for k,v in results.items(): + f.write("{}\t{}\n".format(k,v)) diff --git a/docs/usage.md b/docs/usage.md index 58223a97..0d6fa616 100755 --- a/docs/usage.md +++ b/docs/usage.md @@ -127,7 +127,7 @@ You will need to create a design file with information about the samples in your #### Multiple replicates -The `group` identifier is the same when you have multiple replicates from the same experimental group, just increment the `replicate` identifier appropriately. Below is an example for a single experimental group in triplicate: +The `group` identifier is the same when you have multiple replicates from the same experimental group, just increment the `replicate` identifier appropriately. The first replicate value for any given sample must be 1. Below is an example for a single experimental group in triplicate: ```bash group,replicate,fastq_1,fastq_2 @@ -138,12 +138,14 @@ control,3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz #### Multiple runs of the same library -The `group` and `replicate` identifiers are the same when you have resequenced the same sample more than once (e.g. to increase sequencing depth). The pipeline will perform the alignments in parallel, and subsequently merge them before further analysis. Below is an example for the same sample sequenced twice: +The `group` and `replicate` identifiers are the same when you have re-sequenced the same sample more than once (e.g. to increase sequencing depth). The pipeline will perform the alignments in parallel, and subsequently merge them before further analysis. Below is an example for two samples sequenced across multiple lanes: ```bash group,replicate,fastq_1,fastq_2 control,1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz control,1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz +treatment,1,AEG588A4_S4_L003_R1_001.fastq.gz,AEG588A4_S4_L003_R2_001.fastq.gz +treatment,1,AEG588A4_S4_L004_R1_001.fastq.gz,AEG588A4_S4_L004_R2_001.fastq.gz ``` #### Full design @@ -165,8 +167,8 @@ treatment,3,AEG588A6_S6_L004_R1_001.fastq.gz,AEG588A6_S6_L004_R2_001.fastq.gz |-------------|-------------------------------------------------------------------------------------------------------------| | `group` | Group identifier for sample. This will be identical for replicate samples from the same experimental group. | | `replicate` | Integer representing replicate number. Must start from `1..`. | -| `fastq_1` | Full path to FastQ file for read 1. File has to be zipped and have the extension ".fastq.gz". | -| `fastq_2` | Full path to FastQ file for read 2. File has to be zipped and have the extension ".fastq.gz". | +| `fastq_1` | Full path to FastQ file for read 1. File has to be zipped and have the extension ".fastq.gz" or ".fq.gz". | +| `fastq_2` | Full path to FastQ file for read 2. File has to be zipped and have the extension ".fastq.gz" or ".fq.gz". | Example design files have been provided with the pipeline for [paired-end](../assets/design_pe.csv) and [single-end](../assets/design_se.csv) data. diff --git a/environment.yml b/environment.yml index 9ff2f873..629d7509 100755 --- a/environment.yml +++ b/environment.yml @@ -22,15 +22,15 @@ dependencies: - trim-galore=0.5.0 - bwa=0.7.17 - samtools=1.9 - - picard=2.18.26 + - picard=2.19.0 - bamtools=2.5.1 - pysam=0.15.2 - - bedtools=2.27.1 + - bedtools=2.28.0 - ucsc-bedgraphtobigwig=377 - macs2=2.1.2 - homer=4.9.1 - ataqv=1.0.0 - - subread=1.6.3 + - subread=1.6.4 - multiqc=1.7 - bioconductor-deseq2=1.20.0 - bioconductor-vsn=3.46.0 diff --git a/main.nf b/main.nf index 6d65c8ae..1add504c 100755 --- a/main.nf +++ b/main.nf @@ -248,53 +248,53 @@ if (workflow.profile == 'awsbatch') { // Header log info log.info nfcoreHeader() def summary = [:] -summary['Run Name'] = custom_runName ?: workflow.runName -summary['Genome'] = params.genome ?: 'Not supplied' -summary['Data Type'] = params.singleEnd ? 'Single-End' : 'Paired-End' -summary['Design File'] = params.design -if (params.bwa_index) summary['BWA Index'] = params.bwa_index ?: 'Not supplied' -summary['Fasta Ref'] = params.fasta -summary['GTF File'] = params.gtf -summary['Gene BED File'] = params.gene_bed ?: 'Not supplied' -summary['TSS BED File'] = params.tss_bed ?: 'Not supplied' +summary['Run Name'] = custom_runName ?: workflow.runName +summary['Genome'] = params.genome ?: 'Not supplied' +summary['Data Type'] = params.singleEnd ? 'Single-End' : 'Paired-End' +summary['Design File'] = params.design +if (params.bwa_index) summary['BWA Index'] = params.bwa_index ?: 'Not supplied' +summary['Fasta Ref'] = params.fasta +summary['GTF File'] = params.gtf +summary['Gene BED File'] = params.gene_bed ?: 'Not supplied' +summary['TSS BED File'] = params.tss_bed ?: 'Not supplied' if (params.blacklist) summary['Blacklist BED'] = params.blacklist -summary['Mitochondrial Contig'] = params.mito_name ?: 'Not supplied' -summary['MACS Genome Size'] = params.macs_gsize ?: 'Not supplied' -if (params.macs_gsize) summary['MACS Narrow Peaks'] = params.narrowPeak ? 'Yes' : 'No' +summary['Mitochondrial Contig'] = params.mito_name ?: 'Not supplied' +summary['MACS Genome Size'] = params.macs_gsize ?: 'Not supplied' +if (params.macs_gsize) summary['MACS Narrow Peaks'] = params.narrowPeak ? 'Yes' : 'No' if (params.skipTrimming){ - summary['Trimming Step'] = 'Skipped' + summary['Trimming Step'] = 'Skipped' } else { - summary['Trim R1'] = "$params.clip_r1 bp" - summary['Trim R2'] = "$params.clip_r2 bp" - summary["Trim 3' R1"] = "$params.three_prime_clip_r1 bp" - summary["Trim 3' R2"] = "$params.three_prime_clip_r2 bp" + summary['Trim R1'] = "$params.clip_r1 bp" + summary['Trim R2'] = "$params.clip_r2 bp" + summary["Trim 3' R1"] = "$params.three_prime_clip_r1 bp" + summary["Trim 3' R2"] = "$params.three_prime_clip_r2 bp" } -summary['Fragment Size'] = "$params.fragment_size bp" -summary['Keep Mitochondrial'] = params.keepMito ? 'Yes' : 'No' -summary['Keep Duplicates'] = params.keepDups ? 'Yes' : 'No' -summary['Keep Multi-mapped'] = params.keepMultiMap ? 'Yes' : 'No' -summary['Merge Replicates'] = params.skipMergeReplicates ? 'No' : 'Yes' -summary['Save Genome Index'] = params.saveGenomeIndex ? 'Yes' : 'No' -summary['Save Trimmed'] = params.saveTrimmed ? 'Yes' : 'No' -summary['Save Intermeds'] = params.saveAlignedIntermediates ? 'Yes' : 'No' -summary['Max Resources'] = "$params.max_memory memory, $params.max_cpus cpus, $params.max_time time per job" +summary['Fragment Size'] = "$params.fragment_size bp" +summary['Keep Mitochondrial'] = params.keepMito ? 'Yes' : 'No' +summary['Keep Duplicates'] = params.keepDups ? 'Yes' : 'No' +summary['Keep Multi-mapped'] = params.keepMultiMap ? 'Yes' : 'No' +summary['Merge Replicates'] = params.skipMergeReplicates ? 'No' : 'Yes' +summary['Save Genome Index'] = params.saveGenomeIndex ? 'Yes' : 'No' +summary['Save Trimmed'] = params.saveTrimmed ? 'Yes' : 'No' +summary['Save Intermeds'] = params.saveAlignedIntermediates ? 'Yes' : 'No' +summary['Max Resources'] = "$params.max_memory memory, $params.max_cpus cpus, $params.max_time time per job" if(workflow.containerEngine) summary['Container'] = "$workflow.containerEngine - $workflow.container" -summary['Output Dir'] = params.outdir -summary['Launch Dir'] = workflow.launchDir -summary['Working Dir'] = workflow.workDir -summary['Script Dir'] = workflow.projectDir -summary['User'] = workflow.userName +summary['Output Dir'] = params.outdir +summary['Launch Dir'] = workflow.launchDir +summary['Working Dir'] = workflow.workDir +summary['Script Dir'] = workflow.projectDir +summary['User'] = workflow.userName if (workflow.profile == 'awsbatch'){ - summary['AWS Region'] = params.awsregion - summary['AWS Queue'] = params.awsqueue + summary['AWS Region'] = params.awsregion + summary['AWS Queue'] = params.awsqueue } -summary['Config Profile'] = workflow.profile +summary['Config Profile'] = workflow.profile if (params.config_profile_description) summary['Config Description'] = params.config_profile_description if (params.config_profile_contact) summary['Config Contact'] = params.config_profile_contact if (params.config_profile_url) summary['Config URL'] = params.config_profile_url if(params.email) { - summary['E-mail Address'] = params.email - summary['MultiQC maxsize'] = params.maxMultiqcEmailFileSize + summary['E-mail Address'] = params.email + summary['MultiQC maxsize'] = params.maxMultiqcEmailFileSize } log.info summary.collect { k,v -> "${k.padRight(21)}: $v" }.join("\n") log.info "\033[2m----------------------------------------------------\033[0m" @@ -1244,7 +1244,7 @@ process merge_replicate { file "*.{idxstats,stats}" into mrep_stats_mqc file "*.txt" into mrep_metrics_mqc - when: !skipMergeReplicates && replicates_exist + when: !params.skipMergeReplicates && replicates_exist script: prefix="${name}.mRp.clN" @@ -1320,7 +1320,7 @@ process merge_replicate_bigwig { file "*.bigWig" into mrep_bigwig_igv file "*.txt" into mrep_bigwig_scale - when: !skipMergeReplicates && replicates_exist + when: !params.skipMergeReplicates && replicates_exist script: prefix="${name}.mRp.clN" @@ -1360,7 +1360,7 @@ process merge_replicate_macs { mrep_macs_peaks_igv file "*_mqc.tsv" into mrep_macs_peak_mqc - when: !skipMergeReplicates && replicates_exist && params.macs_gsize + when: !params.skipMergeReplicates && replicates_exist && params.macs_gsize script: prefix="${name}.mRp.clN" @@ -1399,7 +1399,7 @@ process merge_replicate_macs_annotate { output: file "*.txt" into mrep_macs_annotate - when: !skipMergeReplicates && replicates_exist && params.macs_gsize + when: !params.skipMergeReplicates && replicates_exist && params.macs_gsize script: prefix="${name}.mRp.clN" @@ -1428,7 +1428,7 @@ process merge_replicate_macs_qc { file "*.{txt,pdf}" into mrep_macs_qc file "*.tsv" into mrep_macs_qc_mqc - when: !skipMergeReplicates && replicates_exist && params.macs_gsize + when: !params.skipMergeReplicates && replicates_exist && params.macs_gsize script: // This script is bundled with the pipeline, in nf-core/atacseq/bin/ suffix='mRp.clN' @@ -1464,7 +1464,7 @@ process merge_replicate_macs_consensus { file "*.saf" into mrep_macs_consensus_saf file "*.intersect.{txt,plot.pdf}" into mrep_macs_consensus_intersect - when: !skipMergeReplicates && replicates_exist && params.macs_gsize && multiple_samples + when: !params.skipMergeReplicates && replicates_exist && params.macs_gsize && multiple_samples script: // scripts are bundled with the pipeline, in nf-core/atacseq/bin/ suffix='mRp.clN' @@ -1508,7 +1508,7 @@ process merge_replicate_macs_consensus_annotate { output: file "*.annotatePeaks.txt" into mrep_macs_consensus_annotate - when: !skipMergeReplicates && replicates_exist && params.macs_gsize && multiple_samples + when: !params.skipMergeReplicates && replicates_exist && params.macs_gsize && multiple_samples script: prefix="consensus_peaks.mRp.clN" @@ -1546,7 +1546,7 @@ process merge_replicate_macs_consensus_deseq { file "*vs*/*.bed" into mrep_macs_consensus_deseq_comp_bed_igv file "*.tsv" into mrep_macs_consensus_deseq_mqc - when: !skipMergeReplicates && replicates_exist && params.macs_gsize && multiple_samples + when: !params.skipMergeReplicates && replicates_exist && params.macs_gsize && multiple_samples script: prefix="consensus_peaks.mRp.clN" @@ -1570,6 +1570,45 @@ process merge_replicate_macs_consensus_deseq { """ } +/////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// +/* -- -- */ +/* -- IGV -- */ +/* -- -- */ +/////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// + +/* + * STEP 6 - Create IGV session file + */ +process igv { + publishDir "${params.outdir}/igv", mode: 'copy' + + input: + file fasta from fasta_igv.collect() + + file ('bwa/mergedLibrary/bigwig/*') from mlib_bigwig_igv.collect() + file ('bwa/mergedLibrary/macs/*') from mlib_macs_peaks_igv.collect{it[1]}.ifEmpty([]) + file ('bwa/mergedLibrary/macs/consensus/*') from mlib_macs_consensus_bed_igv.collect().ifEmpty([]) + file ('bwa/mergedLibrary/macs/consensus/deseq2/*') from mlib_macs_consensus_deseq_comp_bed_igv.collect().ifEmpty([]) + + file ('bwa/mergedReplicate/bigwig/*') from mrep_bigwig_igv.collect().ifEmpty([]) + file ('bwa/mergedReplicate/macs/*') from mrep_macs_peaks_igv.collect{it[1]}.ifEmpty([]) + file ('bwa/mergedReplicate/macs/consensus/*') from mrep_macs_consensus_bed_igv.collect().ifEmpty([]) + file ('bwa/mergedReplicate/macs/consensus/deseq2/*') from mrep_macs_consensus_deseq_comp_bed_igv.collect().ifEmpty([]) + + output: + file "*.{txt,xml}" into igv_session + + script: // scripts are bundled with the pipeline, in nf-core/atacseq/bin/ + outdir_abspath = new File(params.outdir).getCanonicalPath().toString() + """ + igv_get_files.sh ./ mLb $outdir_abspath > igv_files.txt + igv_get_files.sh ./ mRp $outdir_abspath >> igv_files.txt + igv_files_to_session.py igv_session.xml igv_files.txt ${outdir_abspath}/reference_genome/${fasta.getName()} + """ +} + /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// /* -- -- */ @@ -1582,11 +1621,15 @@ process merge_replicate_macs_consensus_deseq { * Parse software version numbers */ process get_software_versions { - publishDir "${params.outdir}/Documentation", mode: 'copy' + publishDir "${params.outdir}/pipeline_info", mode: 'copy', + saveAs: {filename -> + if (filename.indexOf(".csv") > 0) filename + else null + } output: - file "software_versions.txt" into software_versions_mqc, - software_versions_methods + file 'software_versions_mqc.yaml' into software_versions_mqc + file "software_versions.csv" script: """ @@ -1606,32 +1649,10 @@ process get_software_versions { echo \$(ataqv --version 2>&1) > v_ataqv.txt echo \$(featureCounts -v 2>&1) > v_featurecounts.txt multiqc --version > v_multiqc.txt - scrape_software_versions.py > software_versions.txt + scrape_software_versions.py &> software_versions_mqc.yaml """ } -def create_software_summary(software_version_file) { - - def yaml_file = workDir.resolve('software_summary_mqc.yaml') - yaml_file.text = """ - id: 'nf-core-atacseq-software-versions' - section_name: 'nf-core/atacseq Software Versions' - section_href: 'https://github.com/nf-core/atacseq' - plot_type: 'html' - description: 'are collected at run time from the software output.' - data: | -
-${software_version_file.splitCsv(sep: '\t') - .map{ it -> "
${it[0]}
${it[1] != 'NA' ? it[1] : 'N/A'}
" } - .collect() - .get() - .join("\n") } -
- """.stripIndent() - - return yaml_file -} - def create_workflow_summary(summary) { def yaml_file = workDir.resolve('workflow_summary_mqc.yaml') @@ -1651,7 +1672,7 @@ ${summary.collect { k,v -> "
$k
${v ?: ' igv_files.txt - igv_get_files.sh ./ mRp $outdir_abspath >> igv_files.txt - igv_files_to_session.py igv_session.xml igv_files.txt ${outdir_abspath}/reference_genome/${fasta.getName()} - """ -} - /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// /* -- -- */ @@ -1772,7 +1755,7 @@ workflow.onComplete { // Set up the e-mail variables def subject = "[nf-core/atacseq] Successful: $workflow.runName" if(!workflow.success){ - subject = "[nf-core/atacseq] FAILED: $workflow.runName" + subject = "[nf-core/atacseq] FAILED: $workflow.runName" } def email_fields = [:] email_fields['version'] = workflow.manifest.version @@ -1794,6 +1777,7 @@ workflow.onComplete { if(workflow.commitId) email_fields['summary']['Pipeline repository Git Commit'] = workflow.commitId if(workflow.revision) email_fields['summary']['Pipeline Git branch/tag'] = workflow.revision if(workflow.container) email_fields['summary']['Docker image'] = workflow.container + email_fields['skipped_poor_alignment'] = skipped_poor_alignment email_fields['summary']['Nextflow Version'] = workflow.nextflow.version email_fields['summary']['Nextflow Build'] = workflow.nextflow.build email_fields['summary']['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp @@ -1832,14 +1816,14 @@ workflow.onComplete { // Send the HTML e-mail if (params.email) { try { - if( params.plaintext_email ){ throw GroovyException('Send plaintext e-mail, not HTML') } - // Try to send HTML e-mail using sendmail - [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "[nf-core/atacseq] Sent summary e-mail to $params.email (sendmail)" + if( params.plaintext_email ){ throw GroovyException('Send plaintext e-mail, not HTML') } + // Try to send HTML e-mail using sendmail + [ 'sendmail', '-t' ].execute() << sendmail_html + log.info "[nf-core/atacseq] Sent summary e-mail to $params.email (sendmail)" } catch (all) { - // Catch failures and try with plaintext - [ 'mail', '-s', subject, params.email ].execute() << email_txt - log.info "[nf-core/atacseq] Sent summary e-mail to $params.email (mail)" + // Catch failures and try with plaintext + [ 'mail', '-s', subject, params.email ].execute() << email_txt + log.info "[nf-core/atacseq] Sent summary e-mail to $params.email (mail)" } } @@ -1858,7 +1842,7 @@ workflow.onComplete { c_green = params.monochrome_logs ? '' : "\033[0;32m"; c_red = params.monochrome_logs ? '' : "\033[0;31m"; if(workflow.success){ - log.info "${c_purple}[nf-core/atacseq]${c_green} Pipeline complete${c_reset}" + log.info "${c_purple}[nf-core/atacseq]${c_green} Pipeline completed successfully${c_reset}" } else { checkHostname() log.info "${c_purple}[nf-core/atacseq]${c_red} Pipeline completed with errors${c_reset}" @@ -1892,7 +1876,7 @@ def nfcoreHeader(){ ${c_blue} |\\ | |__ __ / ` / \\ |__) |__ ${c_yellow}} {${c_reset} ${c_blue} | \\| | \\__, \\__/ | \\ |___ ${c_green}\\`-._,-`-,${c_reset} ${c_green}`._,._,\'${c_reset} - ${c_purple} {{ cookiecutter.name }} v${workflow.manifest.version}${c_reset} + ${c_purple} nf-core/atacseq v${workflow.manifest.version}${c_reset} ${c_dim}----------------------------------------------------${c_reset} """.stripIndent() } From f5efff11a881bfdbcba8d33692c8240e05cfc6b8 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 5 Apr 2019 14:03:49 +0100 Subject: [PATCH 09/18] Fix to handle more that single-digit replicates --- assets/bamtools_filter_pe.json | 2 +- assets/bamtools_filter_se.json | 2 +- main.nf | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/assets/bamtools_filter_pe.json b/assets/bamtools_filter_pe.json index 34e2c70e..c22b6fca 100755 --- a/assets/bamtools_filter_pe.json +++ b/assets/bamtools_filter_pe.json @@ -9,7 +9,7 @@ }, { "id" : "mismatch", - "tag" : "NM:<=3" + "tag" : "NM:<=4" }, { "id" : "cigar", diff --git a/assets/bamtools_filter_se.json b/assets/bamtools_filter_se.json index 9311b7f1..8c4e3834 100755 --- a/assets/bamtools_filter_se.json +++ b/assets/bamtools_filter_se.json @@ -1,7 +1,7 @@ { "filters" : [ { "id" : "mismatch", - "tag" : "NM:<=3" + "tag" : "NM:<=4" }, { "id" : "cigar", diff --git a/main.nf b/main.nf index 1add504c..fd01377c 100755 --- a/main.nf +++ b/main.nf @@ -358,13 +358,13 @@ if (params.singleEnd) { } // Boolean value for replicates existing in design -replicates_exist = design_replicates_exist.map { it -> it[0][-4].toInteger() } +replicates_exist = design_replicates_exist.map { it -> it[0].split('_')[-2].replaceAll('R','').toInteger() } .flatten() .max() .val > 1 // Boolean value for multiple samples existing in design -multiple_samples = design_multiple_samples.map { it -> it[0][0..-7] } +multiple_samples = design_multiple_samples.map { it -> it[0].split('_')[0..-3].join('_') } .flatten() .unique() .count() @@ -594,7 +594,7 @@ process bwa_mem { script: prefix="${name}.Lb" - rg="\'@RG\\tID:${name}\\tSM:${name.toString().subSequence(0, name.length() - 3)}\\tPL:ILLUMINA\\tLB:${name}\\tPU:1\'" + rg="\'@RG\\tID:${name}\\tSM:${name.split('_')[0..-2].join('_')}\\tPL:ILLUMINA\\tLB:${name}\\tPU:1\'" """ bwa mem -t $task.cpus -M -R $rg ${index}/${bwa_base} $reads | samtools view -@ $task.cpus -b -h -F 0x0100 -O BAM -o ${prefix}.bam - """ @@ -644,7 +644,7 @@ process sort_bam { /* * STEP 4.1 Merge BAM files for all libraries from same replicate */ -sort_bam_mlib.map { it -> [ it[0].toString().subSequence(0, it[0].length() - 3), it[1] ] } +sort_bam_mlib.map { it -> [ it[0].split('_')[0..-2].join('_'), it[1] ] } .groupTuple(by: [0]) .map { it -> [ it[0], it[1].flatten() ] } .set { sort_bam_mlib } @@ -1215,7 +1215,7 @@ process merge_library_ataqv_mkarv { /* * STEP 5.1 Merge library BAM files across all replicates */ -mlib_rm_orphan_bam_mrep.map { it -> [ it[0].toString().subSequence(0, it[0].length() - 3), it[1] ] } +mlib_rm_orphan_bam_mrep.map { it -> [ it[0].split('_')[0..-3].join('_'), it[1] ] } .groupTuple(by: [0]) .map { it -> [ it[0], it[1].flatten() ] } .set { mlib_rm_orphan_bam_mrep } From 1c30bfa369eba80526e905a59c33fd33b14293f5 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 5 Apr 2019 14:28:40 +0100 Subject: [PATCH 10/18] Fix to handle more that single-digit replicates --- main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.nf b/main.nf index fd01377c..c4f4c391 100755 --- a/main.nf +++ b/main.nf @@ -294,7 +294,7 @@ if (params.config_profile_contact) summary['Config Contact'] = params.co if (params.config_profile_url) summary['Config URL'] = params.config_profile_url if(params.email) { summary['E-mail Address'] = params.email - summary['MultiQC maxsize'] = params.maxMultiqcEmailFileSize + summary['MultiQC Max Size'] = params.maxMultiqcEmailFileSize } log.info summary.collect { k,v -> "${k.padRight(21)}: $v" }.join("\n") log.info "\033[2m----------------------------------------------------\033[0m" @@ -1215,7 +1215,7 @@ process merge_library_ataqv_mkarv { /* * STEP 5.1 Merge library BAM files across all replicates */ -mlib_rm_orphan_bam_mrep.map { it -> [ it[0].split('_')[0..-3].join('_'), it[1] ] } +mlib_rm_orphan_bam_mrep.map { it -> [ it[0].split('_')[0..-2].join('_'), it[1] ] } .groupTuple(by: [0]) .map { it -> [ it[0], it[1].flatten() ] } .set { mlib_rm_orphan_bam_mrep } From 177d5d04a2d871c89dc9a8204e762e8e25c37925 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 5 Apr 2019 15:17:39 +0100 Subject: [PATCH 11/18] Forgot to sync .github directory --- .github/CONTRIBUTING.md | 32 +++++++++++++++---- .github/ISSUE_TEMPLATE/bug_report.md | 31 ++++++++++++++++++ .github/ISSUE_TEMPLATE/feature_request.md | 16 ++++++++++ ...ll_request.md => PULL_REQUEST_TEMPLATE.md} | 2 +- .github/bug_report.md | 29 ----------------- .github/feature_request.md | 16 ---------- .github/markdownlint.yml | 9 ++++++ 7 files changed, 83 insertions(+), 52 deletions(-) mode change 100755 => 100644 .github/CONTRIBUTING.md create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md create mode 100644 .github/ISSUE_TEMPLATE/feature_request.md rename .github/{pull_request.md => PULL_REQUEST_TEMPLATE.md} (95%) mode change 100755 => 100644 delete mode 100755 .github/bug_report.md delete mode 100755 .github/feature_request.md create mode 100644 .github/markdownlint.yml diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md old mode 100755 new mode 100644 index db81055a..1a6d196b --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -1,12 +1,12 @@ -# nf-core/atacseq Contributing Guidelines +# nf-core/atacseq: Contributing Guidelines Hi there! Many thanks for taking an interest in improving nf-core/atacseq. -We try to manage the required tasks for nf-core/atacseq using GitHub issues, you probably came to this page when creating one. Please use the prefilled template to save time. +We try to manage the required tasks for nf-core/atacseq using GitHub issues, you probably came to this page when creating one. Please use the pre-filled template to save time. However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;) -> If you need help using nf-core/atacseq then the best place to go is the Gitter chatroom where you can ask us questions directly: https://gitter.im/nf-core/Lobby +> If you need help using or modifying nf-core/atacseq then the best place to ask is the nf-core `atacseq` channel on [`Slack`](https://nf-core-invite.herokuapp.com/). ## Contribution workflow If you'd like to write some code for nf-core/atacseq, the standard workflow @@ -15,11 +15,31 @@ is as follows: 1. Check that there isn't already an issue about your idea in the [nf-core/atacseq issues](https://github.com/nf-core/atacseq/issues) to avoid duplicating work. - * Feel free to add a new issue here for the same reason. + * If there isn't one already, please create one so that others know you're working on this 2. Fork the [nf-core/atacseq repository](https://github.com/nf-core/atacseq) to your GitHub account 3. Make the necessary changes / additions within your forked repository -4. Submit a Pull Request against the master branch and wait for the code to be reviewed and merged. +4. Submit a Pull Request against the `dev` branch and wait for the code to be reviewed and merged. If you're not used to this workflow with git, you can start with some [basic docs from GitHub](https://help.github.com/articles/fork-a-repo/) or even their [excellent interactive tutorial](https://try.github.io/). -For further information/help, please consult the [nf-core/atacseq documentation](https://github.com/nf-core/atacseq#documentation) and don't hesitate to get in touch on [Gitter](https://gitter.im/nf-core/Lobby) + +## Tests +When you create a pull request with changes, [Travis CI](https://travis-ci.org/) will run automatic tests. +Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. + +There are typically two types of tests that run: + +### Lint Tests +The nf-core has a [set of guidelines](http://nf-co.re/guidelines) which all pipelines must adhere to. +To enforce these and ensure that all pipelines stay in sync, we have developed a helper tool which runs checks on the pipeline code. This is in the [nf-core/tools repository](https://github.com/nf-core/tools) and once installed can be run locally with the `nf-core lint ` command. + +If any failures or warnings are encountered, please follow the listed URL for more documentation. + +### Pipeline Tests +Each nf-core pipeline should be set up with a minimal set of test-data. +Travis CI then runs the pipeline on this data to ensure that it exists successfully. +If there are any failures then the automated tests fail. +These tests are run both with the latest available version of Nextflow and also the minimum required version that is stated in the pipeline code. + +## Getting help +For further information/help, please consult the [nf-core/atacseq documentation](https://github.com/nf-core/atacseq#documentation) and don't hesitate to get in touch on the nf-core `atacseq` channel on [`Slack`](https://nf-core-invite.herokuapp.com/). diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 00000000..e244181c --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,31 @@ +Hi there! + +Thanks for telling us about a problem with the pipeline. Please delete this text and anything that's not relevant from the template below: + +#### Describe the bug +A clear and concise description of what the bug is. + +#### Steps to reproduce +Steps to reproduce the behaviour: +1. Command line: `nextflow run ...` +2. See error: _Please provide your error message_ + +#### Expected behaviour +A clear and concise description of what you expected to happen. + +#### System: + - Hardware: [e.g. HPC, Desktop, Cloud...] + - Executor: [e.g. slurm, local, awsbatch...] + - OS: [e.g. CentOS Linux, macOS, Linux Mint...] + - Version [e.g. 7, 10.13.6, 18.3...] + +#### Nextflow Installation: + - Version: [e.g. 0.31.0] + +#### Container engine: + - Engine: [e.g. Conda, Docker or Singularity] + - version: [e.g. 1.0.0] + - Image tag: [e.g. nfcore/imcyto:1.0.0] + +#### Additional context +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 00000000..1f025b77 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,16 @@ +Hi there! + +Thanks for suggesting a new feature for the pipeline! Please delete this text and anything that's not relevant from the template below: + +#### Is your feature request related to a problem? Please describe. +A clear and concise description of what the problem is. +Ex. I'm always frustrated when [...] + +#### Describe the solution you'd like +A clear and concise description of what you want to happen. + +#### Describe alternatives you've considered +A clear and concise description of any alternative solutions or features you've considered. + +#### Additional context +Add any other context about the feature request here. diff --git a/.github/pull_request.md b/.github/PULL_REQUEST_TEMPLATE.md old mode 100755 new mode 100644 similarity index 95% rename from .github/pull_request.md rename to .github/PULL_REQUEST_TEMPLATE.md index 8d1609a1..2784b969 --- a/.github/pull_request.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -5,7 +5,7 @@ Please fill in the appropriate checklist below (delete whatever is not relevant) ## PR checklist - [ ] This comment contains a description of changes (with reason) - [ ] If you've fixed a bug or added code that should be tested, add tests! - - [ ] If necessary, also make a PR on the [nf-core/atacseq branch on the nf-core/test-datasets repo]( https://github.com/nf-core/test-datasets/pull/newnf-core/atacseq) + - [ ] If necessary, also make a PR on the [nf-core/atacseq branch on the nf-core/test-datasets repo]( https://github.com/nf-core/test-datasets/pull/new/nf-core/atacseq) - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker`). - [ ] Make sure your code lints (`nf-core lint .`). - [ ] Documentation in `docs` is updated diff --git a/.github/bug_report.md b/.github/bug_report.md deleted file mode 100755 index d0405d12..00000000 --- a/.github/bug_report.md +++ /dev/null @@ -1,29 +0,0 @@ -**Describe the bug** -A clear and concise description of what the bug is. - -**To Reproduce** -Steps to reproduce the behavior: -1. Command line '...' -2. See error **Please provide your error message** - -**Expected behavior** -A clear and concise description of what you expected to happen. - -**System (please complete the following information):** - - Hardware: [e.g. HPC, Desktop, Cloud...] - - Executor: [e.g. slurm, local, awsbatch...] - - OS: [e.g. CentOS Linux, macOS, Linux Mint...] - - Version [e.g. 7, 10.13.6, 18.3...] - -**Nextflow (please complete the following information):** - - Version: [e.g. 0.31.0] - -**Container engine (please complete the following information):** - - Engine: [e.g. Conda, Docker or Singularity] - - version: [e.g. 1.0.0] - -**Container (please complete the following information):** - - tag: [e.g. 1.0.0] - -**Additional context** -Add any other context about the problem here. diff --git a/.github/feature_request.md b/.github/feature_request.md deleted file mode 100755 index 3616d75c..00000000 --- a/.github/feature_request.md +++ /dev/null @@ -1,16 +0,0 @@ -**Is your feature request related to a problem? Please describe.** - -A clear and concise description of what the problem is. -Ex. I'm always frustrated when [...] - -**Describe the solution you'd like** - -A clear and concise description of what you want to happen. - -**Describe alternatives you've considered** - -A clear and concise description of any alternative solutions or features you've considered. - -**Additional context** - -Add any other context about the feature request here. diff --git a/.github/markdownlint.yml b/.github/markdownlint.yml new file mode 100644 index 00000000..e052a635 --- /dev/null +++ b/.github/markdownlint.yml @@ -0,0 +1,9 @@ +# Markdownlint configuration file +default: true, +line-length: false +no-multiple-blanks: 0 +blanks-around-headers: false +blanks-around-lists: false +header-increment: false +no-duplicate-header: + siblings_only: true From 99cda0db36450d37e08ddb0699107c5fee32c4c1 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 5 Apr 2019 16:14:09 +0100 Subject: [PATCH 12/18] Fixed markdown errors --- .github/CONTRIBUTING.md | 4 ++-- CHANGELOG.md | 3 ++- CODE_OF_CONDUCT.md | 2 +- docs/configuration/reference_genomes.md | 3 ++- docs/installation.md | 2 +- docs/output.md | 30 ++++++++++++------------- docs/troubleshooting.md | 3 +-- docs/usage.md | 29 +++++++++++++++--------- 8 files changed, 43 insertions(+), 33 deletions(-) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 1a6d196b..bf0cdf38 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -6,7 +6,7 @@ We try to manage the required tasks for nf-core/atacseq using GitHub issues, you However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;) -> If you need help using or modifying nf-core/atacseq then the best place to ask is the nf-core `atacseq` channel on [`Slack`](https://nf-core-invite.herokuapp.com/). +> If you need help using or modifying nf-core/atacseq then the best place to ask is the nf-core `atacseq` channel on [Slack](https://nf-core-invite.herokuapp.com/). ## Contribution workflow If you'd like to write some code for nf-core/atacseq, the standard workflow @@ -42,4 +42,4 @@ If there are any failures then the automated tests fail. These tests are run both with the latest available version of Nextflow and also the minimum required version that is stated in the pipeline code. ## Getting help -For further information/help, please consult the [nf-core/atacseq documentation](https://github.com/nf-core/atacseq#documentation) and don't hesitate to get in touch on the nf-core `atacseq` channel on [`Slack`](https://nf-core-invite.herokuapp.com/). +For further information/help, please consult the [nf-core/atacseq documentation](https://github.com/nf-core/atacseq#documentation) and don't hesitate to get in touch on the nf-core `atacseq` channel on [Slack](https://nf-core-invite.herokuapp.com/). diff --git a/CHANGELOG.md b/CHANGELOG.md index 7dbdf2f7..553f9e34 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,4 @@ +# nf-core/atacseq: Changelog -## nf-core/atacseq version 1.0dev - +## v1.0dev - [date] Initial release of nf-core/atacseq, created with the [nf-core](http://nf-co.re/) template. diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 8e7ae058..09226d0d 100755 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -34,7 +34,7 @@ This Code of Conduct applies both within project spaces and in public spaces whe ## Enforcement -Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team on [`Slack`](https://nf-core-invite.herokuapp.com/). The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. +Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team on [Slack](https://nf-core-invite.herokuapp.com/). The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. diff --git a/docs/configuration/reference_genomes.md b/docs/configuration/reference_genomes.md index 73272533..44c70b69 100755 --- a/docs/configuration/reference_genomes.md +++ b/docs/configuration/reference_genomes.md @@ -39,11 +39,12 @@ Multiple reference index types are held together with consistent structure for m We have put a copy of iGenomes up onto AWS S3 hosting and this pipeline is configured to use this by default. The hosting fees for AWS iGenomes are currently kindly funded by a grant from Amazon. The pipeline will automatically download the required reference files when you run the pipeline. -For more information about the AWS iGenomes, see https://ewels.github.io/AWS-iGenomes/ +For more information about the AWS iGenomes, see Downloading the files takes time and bandwidth, so we recommend making a local copy of the iGenomes resource. Once downloaded, you can customise the variable `params.igenomes_base` in your custom configuration file to point to the reference location. For example: + ```nextflow params.igenomes_base = '/path/to/data/igenomes/' ``` diff --git a/docs/installation.md b/docs/installation.md index fafea453..d3e3cd32 100755 --- a/docs/installation.md +++ b/docs/installation.md @@ -75,7 +75,7 @@ Be warned of two important points about this default configuration: ### Docker First, install docker on your system: [Docker Installation Instructions](https://docs.docker.com/engine/installation/) -Then, running the pipeline with the option `-profile docker` tells Nextflow to enable Docker for this run. An image containing all of the software requirements will be automatically fetched and used from dockerhub (https://hub.docker.com/r/nfcore/atacseq). +Then, running the pipeline with the option `-profile docker` tells Nextflow to enable Docker for this run. An image containing all of the software requirements will be automatically fetched and used from dockerhub (). ### Singularity If you're not able to use Docker then [Singularity](http://singularity.lbl.gov/) is a great alternative. diff --git a/docs/output.md b/docs/output.md index f7d4cb6b..d5eaba0c 100755 --- a/docs/output.md +++ b/docs/output.md @@ -23,9 +23,9 @@ The initial QC and alignments are performed at the library-level e.g. if the sam *Output directories*: * `fastqc/` - FastQC `*.html` files for read 1 (*and read2 if paired-end*) **before** adapter trimming. - * `fastqc/zips/` - FastQC `*.zip` files for read 1 (*and read2 if paired-end*) **before** adapter trimming. + FastQC `*.html` files for read 1 (*and read2 if paired-end*) **before** adapter trimming. + * `fastqc/zips/` + FastQC `*.zip` files for read 1 (*and read2 if paired-end*) **before** adapter trimming. 2. **Adapter trimming** @@ -38,11 +38,11 @@ The initial QC and alignments are performed at the library-level e.g. if the sam *Output directories*: * `trim_galore/` If `--saveTrimmed` is specified `*.fastq.gz` files **after** adapter trimming will be placed in this directory. - * `trim_galore/logs/` + * `trim_galore/logs/` `*.log` files generated by Trim Galore!. - * `trim_galore/fastqc/` + * `trim_galore/fastqc/` FastQC `*.html` files for read 1 (*and read2 if paired-end*) **after** adapter trimming. - * `trim_galore/fastqc/zips/` + * `trim_galore/fastqc/zips/` FastQC `*.zip` files for read 1 (*and read2 if paired-end*) **after** adapter trimming. *Plots*: @@ -90,11 +90,11 @@ The library-level alignments associated with any given replicate are merged and *Output directories*: * `bwa/mergedLibrary/` Merged library-level, coordinate sorted `*.bam` files after the marking of duplicates, and filtering based on various criteria. The file suffix for the final filtered files will be `*.mLb.clN.*`. If you specify the `--saveAlignedIntermediates` parameter then two additional sets of files will be present. These represent the unfiltered alignments with duplicates marked (`*.mLb.mkD.*`), and in the case of paired-end datasets the filtered alignments before the removal of orphan read pairs (`*.mLb.flT.*`). - * `bwa/mergedLibrary/samtools_stats/` + * `bwa/mergedLibrary/samtools_stats/` SAMtools `*.flagstat`, `*.idxstats` and `*.stats` files generated from the alignment files. - * `bwa/mergedLibrary/picard_metrics/` + * `bwa/mergedLibrary/picard_metrics/` Alignment QC files from picard CollectMultipleMetrics and the metrics file from MarkDuplicates: `*_metrics` and `*.metrics.txt`, respectively. - * `bwa/mergedLibrary/picard_metrics/pdf/` + * `bwa/mergedLibrary/picard_metrics/pdf/` Alignment QC plot files in `*.pdf` format from picard CollectMultipleMetrics. *Plots*: @@ -118,7 +118,7 @@ The library-level alignments associated with any given replicate are merged and *Software*: [MACS2](https://github.com/taoliu/MACS), [HOMER](http://homer.ucsd.edu/homer/download.html), [R](https://www.r-project.org/) - *Description*: + *Description*: MACS2 is one of the most popular peak-calling algorithms for ChIPSeq data. For ATAC-seq data we are also looking for genome-wide regions of enrichment but in this case without comparison to a standard control sample (e.g. input DNA). By default, the peaks are called with the MACS2 `--broad` parameter, and this is recommended for ATAC-seq data. If, however, you would like to call narrow peaks then please provide the `--narrowPeak` parameter when running the pipeline. @@ -158,7 +158,7 @@ The library-level alignments associated with any given replicate are merged and * `bwa/mergedLibrary/macs2/consensus/` * Consensus peakset across all samples in `*.bed` format. * Consensus peakset across all samples in `*.saf` format. Required by featureCounts for read quantification. - * HOMER `*.annotatePeaks.txt` peak-to-gene annotation file for consensus peaks. + * HOMER `*.annotatePeaks.txt` peak-to-gene annotation file for consensus peaks. * Spreadsheet representation of consensus peakset across samples **with** gene annotation columns: `*.boolean.annotatePeaks.txt`. The columns from individual peak files are included in this file along with the ability to filter peaks based on their presence or absence in multiple replicates/conditions. * Spreadsheet representation of consensus peakset across samples **without** gene annotation columns: `*.boolean.txt`. @@ -180,7 +180,7 @@ The library-level alignments associated with any given replicate are merged and This pipeline uses a standardised DESeq2 analysis script to get an idea of the reproducibility within the experiment, and to assess the overall differential accessibility. Please note that this will not suit every experimental design, and if there are other problems with the experiment then it may not work as well as expected. By default, the peak sets are not filtered, therefore, the consensus peaks will be generated across all peaks. However, it is possible to filter the consensus peaks and the corresponding read counts based on user-defined criteria (outlined in the section above), and then to use the same scripts to re-generate the results for a more refined analysis. In future iterations of the pipeline more formal analyses such as [IDR](https://projecteuclid.org/euclid.aoas/1318514284) may be implemented to obtain reproducible and high confidence peak sets with which to perform this sort of analysis. - By default, all possible pairwise comparisons across the groups within the experiment are performed. The DESeq2 results are generated by the pipeline in various ways. You can load up the results across all of the comparisons in a single spreadsheet, or individual folders will also be created that contain the results specific to a particular comparison. For the latter, additional files will also be generated where the intervals have been pre-filtered based on a couple of standard FDR thresholds. + By default, all possible pairwise comparisons across the groups within the experiment are performed. The DESeq2 results are generated by the pipeline in various ways. You can load up the results across all of the comparisons in a single spreadsheet, or individual folders will also be created that contain the results specific to a particular comparison. For the latter, additional files will also be generated where the intervals have been pre-filtered based on a couple of standard FDR thresholds. Please see [DESeq2 output](http://bioconductor.org/packages/release/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#differential-expression-analysis) for a description of the columns generated by DESeq2. @@ -193,7 +193,7 @@ The library-level alignments associated with any given replicate are merged and * `*.dds.rld.RData` file containing R `dds` and `rld` objects generated by DESeq2. * `R_sessionInfo.log` file containing information about R, the OS and attached or loaded packages. * `bwa/mergedLibrary/macs2/consensus//` - * `*.results.txt` spreadsheet containing comparison-specific DESeq2 output for differential accessibility results across all peaks. + * `*.results.txt` spreadsheet containing comparison-specific DESeq2 output for differential accessibility results across all peaks. * Subset of above file for peaks that pass FDR <= 0.01 (`*FDR0.01.results.txt`) and FDR <= 0.05 (`*FDR0.05.results.txt`). * BED files for peaks that pass FDR <= 0.01 (`*FDR0.01.results.bed`) and FDR <= 0.05 (`*FDR0.05.results.bed`). * MA, Volcano, clustering and scatterplots at FDR <= 0.01 and FDR <= 0.05: `*deseq2.plots.pdf`. @@ -247,7 +247,7 @@ You can skip this portion of the analysis by specifying the `--skipMergeReplicat The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. - For more information about how to use MultiQC reports, see http://multiqc.info + For more information about how to use MultiQC reports, see . *Output directories*: * `multiqc/` @@ -275,7 +275,7 @@ You can skip this portion of the analysis by specifying the `--skipMergeReplicat * `igv_session.xml` file. * `igv_files.txt` file containing a listing of the files used to create the IGV session, and their associated colours. - *Plots*: + *Plots*: [IGV screenshot](images/igv_screenshot.png) ## Other results diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index 0056d472..b37e7197 100755 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -1,7 +1,6 @@ # nf-core/atacseq: Troubleshooting ## Extra resources and getting help -If you still have an issue with running the pipeline then feel free to contact us in the `atacseq` channel on [`Slack`](https://nf-core-invite.herokuapp.com/). -Have a look at the [pipeline website](https://github.com/nf-core/atacseq) to find out how. +If you still have an issue with running the pipeline then feel free to contact us in the `atacseq` channel on [Slack](https://nf-core-invite.herokuapp.com/). If you have problems that are related to Nextflow and not our pipeline then check out the [Nextflow gitter channel](https://gitter.im/nextflow-io/nextflow) or the [google group](https://groups.google.com/forum/#!forum/nextflow). diff --git a/docs/usage.md b/docs/usage.md index 0d6fa616..d9457b62 100755 --- a/docs/usage.md +++ b/docs/usage.md @@ -67,6 +67,7 @@ NXF_OPTS='-Xms1g -Xmx4g' ## Running the pipeline The typical command for running the pipeline is as follows: + ```bash nextflow run nf-core/atacseq --design design.csv --genome GRCh37 -profile docker ``` @@ -104,19 +105,19 @@ Use this parameter to choose a configuration profile. Profiles can give configur If `-profile` is not specified at all the pipeline will be run locally and expects all software to be installed and available on the `PATH`. * `awsbatch` - * A generic configuration profile to be used with AWS Batch. + * A generic configuration profile to be used with AWS Batch. * `conda` - * A generic configuration profile to be used with [conda](https://conda.io/docs/) - * Pulls most software from [Bioconda](https://bioconda.github.io/) + * A generic configuration profile to be used with [conda](https://conda.io/docs/) + * Pulls most software from [Bioconda](https://bioconda.github.io/) * `docker` - * A generic configuration profile to be used with [Docker](http://docker.com/) - * Pulls software from dockerhub: [`nfcore/atacseq`](http://hub.docker.com/r/nfcore/atacseq/) + * A generic configuration profile to be used with [Docker](http://docker.com/) + * Pulls software from dockerhub: [`nfcore/atacseq`](http://hub.docker.com/r/nfcore/atacseq/) * `singularity` - * A generic configuration profile to be used with [Singularity](http://singularity.lbl.gov/) - * Pulls software from dockerhub: [`nfcore/atacseq`](http://hub.docker.com/r/nfcore/atacseq/) + * A generic configuration profile to be used with [Singularity](http://singularity.lbl.gov/) + * Pulls software from dockerhub: [`nfcore/atacseq`](http://hub.docker.com/r/nfcore/atacseq/) * `test` - * A profile with a complete configuration for automated testing - * Includes links to test data so needs no other parameters + * A profile with a complete configuration for automated testing + * Includes links to test data so needs no other parameters ### `--design` You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 4 columns, and a header row as shown in the examples below. @@ -224,48 +225,56 @@ params { ### `--fasta` Full path to fasta file containing reference genome (*mandatory* if `--genome` is not specified). If you don't have a BWA index available this will be generated for you automatically. Combine with `--saveGenomeIndex` to save BWA index for future runs. + ```bash --fasta '[path to FASTA reference]' ``` ### `--gtf` The full path to GTF file for annotating peaks (*mandatory* if `--genome` is not specified). Note that the GTF file should resemble the Ensembl format. + ```bash --gtf '[path to GTF file]' ``` ### `--bwa_index` Full path to an existing BWA index for your reference genome including the base name for the index. + ```bash --bwa_index '[directory containing BWA index]/genome.fa' ``` ### `--gene_bed` The full path to BED file for genome-wide gene intervals. This will be created from the GTF file if it isnt specified. + ```bash --gene_bed '[path to gene BED file]' ``` ### `--tss_bed` The full path to BED file for genome-wide transcription start sites. This will be created from the gene BED file if it isnt specified. + ```bash --tss_bed '[path to tss BED file]' ``` ### `--macs_gsize` [Effective genome size](https://github.com/taoliu/MACS#-g--gsize) parameter required by MACS2. These have been provided when `--genome` is set as *GRCh37*, *GRCh38*, *GRCm38*, *WBcel235*, *BDGP6*, *R64-1-1*, *EF2*, *hg38*, *hg19* and *mm10*. For other genomes, if this parameter isnt specified then the MACS2 peak-calling and differential analysis will be skipped. + ```bash --macs_gsize 2.7e9 ``` ### `--mito_name` Name of mitochondrial chomosome in reference assembly. Reads aligning to this contig are filtered out if a valid identifier is provided otherwise this step is skipped. Where possible these have been provided in the [`igenomes.config`](../conf/igenomes.config). + ```bash --mito_name chrM ``` ### `--blacklist` If provided, alignments that overlap with the regions in this file will be filtered out (see [ENCODE blacklists](https://sites.google.com/site/anshulkundaje/projects/blacklists)). The file should be in BED format. Blacklisted regions for *GRCh37*, *GRCh38*, *GRCm38*, *hg19*, *hg38*, *mm10* are bundled with the pipeline in the [`blacklists`](../assets/blacklists/) directory, and as such will be automatically used if any of those genomes are specified with the `--genome` parameter. + ```bash --blacklist '[path to blacklisted regions]' ``` @@ -321,7 +330,7 @@ Wherever process-specific requirements are set in the pipeline, the default valu If you are likely to be running `nf-core` pipelines regularly it may be a good idea to request that your custom config file is uploaded to the `nf-core/configs` git repository. Before you do this please can you test that the config file works with your pipeline of choice using the `-c` parameter (see definition below). You can then create a pull request to the `nf-core/configs` repository with the addition of your config file, associated documentation file (see examples in [`nf-core/configs/docs`](https://github.com/nf-core/configs/tree/master/docs)), and amending [`nfcore_custom.config`](https://github.com/nf-core/configs/blob/master/nfcore_custom.config) to include your custom profile. -If you have any questions or issues please send us a message on [`Slack`](https://nf-core-invite.herokuapp.com/). +If you have any questions or issues please send us a message on [Slack](https://nf-core-invite.herokuapp.com/). ## AWS Batch specific parameters Running the pipeline on AWS Batch requires a couple of specific parameters to be set according to your AWS Batch configuration. Please use the `-awsbatch` profile and then specify all of the following parameters. From 90a47b9cdeaa82e89f6b986f437055023556e114 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Sun, 7 Apr 2019 14:29:47 +0100 Subject: [PATCH 13/18] Fix conda package conflict --- README.md | 2 +- environment.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 716882ea..29b7b620 100755 --- a/README.md +++ b/README.md @@ -73,4 +73,4 @@ The pipeline was developed by [Harshil Patel](mailto:harshil.patel@crick.ac.uk). The [nf-core/rnaseq](https://github.com/nf-core/rnaseq) and [nf-core/chipseq](https://github.com/nf-core/chipseq) pipelines developed by Phil Ewels were initially used as a template for this pipeline. Many thanks to Phil for all of his help and advice, and the team at SciLifeLab. -Many thanks to other who have helped out along the way too, including (but not limited to): [@apeltzer](https://github.com/apeltzer), [@sven1103](https://github.com/sven1103), [@MaxUlysse](https://github.com/MaxUlysse), [@micans](https://github.com/micans), [@pditommaso](https://github.com/pditommaso). +Many thanks to others who have helped out along the way too, including (but not limited to): [@apeltzer](https://github.com/apeltzer), [@sven1103](https://github.com/sven1103), [@MaxUlysse](https://github.com/MaxUlysse), [@micans](https://github.com/micans), [@pditommaso](https://github.com/pditommaso). diff --git a/environment.yml b/environment.yml index 629d7509..1e6a9fba 100755 --- a/environment.yml +++ b/environment.yml @@ -25,7 +25,7 @@ dependencies: - picard=2.19.0 - bamtools=2.5.1 - pysam=0.15.2 - - bedtools=2.28.0 + - bedtools=2.27.1 - ucsc-bedgraphtobigwig=377 - macs2=2.1.2 - homer=4.9.1 From 75eed7ec324989686c71873f543a4eb40957d17a Mon Sep 17 00:00:00 2001 From: drpatelh Date: Sun, 7 Apr 2019 14:40:49 +0100 Subject: [PATCH 14/18] AWSBatch fix from PR 296 from tools --- main.nf | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/main.nf b/main.nf index c4f4c391..1403a7f7 100755 --- a/main.nf +++ b/main.nf @@ -225,16 +225,14 @@ if (params.blacklist) { /* -- AWS -- */ //////////////////////////////////////////////////// -// AWSBatch sanity checking -if (workflow.profile == 'awsbatch'){ - if (!params.awsqueue || !params.awsregion) exit 1, "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" - if (!workflow.workDir.startsWith('s3') || !params.outdir.startsWith('s3')) exit 1, "Specify S3 URLs for workDir and outdir parameters on AWSBatch!" -} - -// Check workDir/outdir paths to be S3 buckets if running on AWSBatch -// related: https://github.com/nextflow-io/nextflow/issues/813 -if (workflow.profile == 'awsbatch') { - if (!workflow.workDir.startsWith('s3:') || !params.outdir.startsWith('s3:')) exit 1, "Workdir or Outdir not on S3 - specify S3 Buckets for each to run on AWSBatch!" +if( workflow.profile == 'awsbatch') { + // AWSBatch sanity checking + if (!params.awsqueue || !params.awsregion) exit 1, "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" + // Check outdir paths to be S3 buckets if running on AWSBatch + // related: https://github.com/nextflow-io/nextflow/issues/813 + if (!params.outdir.startsWith('s3:')) exit 1, "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" + // Prevent trace files to be stored on S3 since S3 does not support rolling files. + if (workflow.tracedir.startsWith('s3:')) exit 1, "Specify a local tracedir or run without trace! S3 cannot be used for tracefiles." } /////////////////////////////////////////////////////////////////////////////// From 955e69bd8ff0f2f7c18bd4d0d4648cda1117e7f9 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 8 Apr 2019 05:37:15 +0100 Subject: [PATCH 15/18] Link docs moved to website --- README.md | 10 +-- docs/README.md | 10 +-- docs/configuration/adding_your_own.md | 86 ------------------ docs/configuration/local.md | 45 ---------- docs/configuration/reference_genomes.md | 50 ----------- docs/installation.md | 110 ------------------------ docs/troubleshooting.md | 6 -- 7 files changed, 10 insertions(+), 307 deletions(-) delete mode 100755 docs/configuration/adding_your_own.md delete mode 100644 docs/configuration/local.md delete mode 100755 docs/configuration/reference_genomes.md delete mode 100755 docs/installation.md delete mode 100755 docs/troubleshooting.md diff --git a/README.md b/README.md index 29b7b620..aa4461f6 100755 --- a/README.md +++ b/README.md @@ -56,14 +56,14 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool ### Documentation The nf-core/atacseq pipeline comes with documentation about the pipeline, found in the `docs/` directory: -1. [Installation](docs/installation.md) +1. [Installation](https://nf-co.re/usage/installation) 2. Pipeline configuration - * [Local installation](docs/configuration/local.md) - * [Adding your own system](docs/configuration/adding_your_own.md) - * [Reference genomes](docs/configuration/reference_genomes.md) + * [Local installation](https://nf-co.re/usage/local_installation) + * [Adding your own system config](https://nf-co.re/usage/adding_own_config) + * [Reference genomes](https://nf-co.re/usage/reference_genomes) 3. [Running the pipeline](docs/usage.md) 4. [Output and how to interpret the results](docs/output.md) -5. [Troubleshooting](docs/troubleshooting.md) +5. [Troubleshooting](https://nf-co.re/usage/troubleshooting) ### Credits diff --git a/docs/README.md b/docs/README.md index 2119a6b6..6ebe6ecc 100755 --- a/docs/README.md +++ b/docs/README.md @@ -2,11 +2,11 @@ The nf-core/atacseq documentation is split into the following files: -1. [Installation](installation.md) +1. [Installation](https://nf-co.re/usage/installation) 2. Pipeline configuration - * [Local installation](configuration/local.md) - * [Adding your own system](configuration/adding_your_own.md) - * [Reference genomes](configuration/reference_genomes.md) + * [Local installation](https://nf-co.re/usage/local_installation) + * [Adding your own system config](https://nf-co.re/usage/adding_own_config) + * [Reference genomes](https://nf-co.re/usage/reference_genomes) 3. [Running the pipeline](usage.md) 4. [Output and how to interpret the results](output.md) -5. [Troubleshooting](troubleshooting.md) +5. [Troubleshooting](https://nf-co.re/usage/troubleshooting) diff --git a/docs/configuration/adding_your_own.md b/docs/configuration/adding_your_own.md deleted file mode 100755 index c84edaad..00000000 --- a/docs/configuration/adding_your_own.md +++ /dev/null @@ -1,86 +0,0 @@ -# nf-core/atacseq: Configuration for other clusters - -It is entirely possible to run this pipeline on other clusters, though you will need to set up your own config file so that the pipeline knows how to work with your cluster. - -> If you think that there are other people using the pipeline who would benefit from your configuration (eg. other common cluster setups), please let us know. We can add a new configuration and profile which can used by specifying `-profile ` when running the pipeline. The config file will then be hosted at `nf-core/configs` and will be pulled automatically before the pipeline is executed. - -If you are the only person to be running this pipeline, you can create your config file as `~/.nextflow/config` and it will be applied every time you run Nextflow. Alternatively, save the file anywhere and reference it when running the pipeline with `-c path/to/config` (see the [Nextflow documentation](https://www.nextflow.io/docs/latest/config.html) for more). - -A basic configuration comes with the pipeline, which loads the [`conf/base.config`](../../conf/base.config) by default. This means that you only need to configure the specifics for your system and overwrite any defaults that you want to change. - -## Cluster Environment -By default, pipeline uses the `local` Nextflow executor - in other words, all jobs are run in the login session. If you're using a simple server, this may be fine. If you're using a compute cluster, this is bad as all jobs will run on the head node. - -To specify your cluster environment, add the following line to your config file: - -```nextflow -process.executor = 'YOUR_SYSTEM_TYPE' -``` - -Many different cluster types are supported by Nextflow. For more information, please see the [Nextflow documentation](https://www.nextflow.io/docs/latest/executor.html). - -Note that you may need to specify cluster options, such as a project or queue. To do so, use the `clusterOptions` config option: - -```nextflow -process { - executor = 'SLURM' - clusterOptions = '-A myproject' -} -``` - - -## Software Requirements -To run the pipeline, several software packages are required. How you satisfy these requirements is essentially up to you and depends on your system. If possible, we _highly_ recommend using either Docker or Singularity. - -Please see the [`installation documentation`](../installation.md) for how to run using the below as a one-off. These instructions are about configuring a config file for repeated use. - -### Docker -Docker is a great way to run nf-core/atacseq, as it manages all software installations and allows the pipeline to be run in an identical software environment across a range of systems. - -Nextflow has [excellent integration](https://www.nextflow.io/docs/latest/docker.html) with Docker, and beyond installing the two tools, not much else is required - nextflow will automatically fetch the [nfcore/atacseq](https://hub.docker.com/r/nfcore/atacseq/) image that we have created and is hosted at dockerhub at run time. - -To add docker support to your own config file, add the following: - -```nextflow -docker.enabled = true -process.container = "nfcore/atacseq" -``` - -Note that the dockerhub organisation name annoyingly can't have a hyphen, so is `nfcore` and not `nf-core`. - - -### Singularity image -Many HPC environments are not able to run Docker due to security issues. -[Singularity](http://singularity.lbl.gov/) is a tool designed to run on such HPC systems which is very similar to Docker. - -To specify singularity usage in your pipeline config file, add the following: - -```nextflow -singularity.enabled = true -process.container = "nf-core/atacseq" -``` - -If you intend to run the pipeline offline, nextflow will not be able to automatically download the singularity image for you. -Instead, you'll have to do this yourself manually first, transfer the image file and then point to that. - -First, pull the image file where you have an internet connection: - -```bash -singularity pull --name nf-core-atacseq.simg nf-core/atacseq -``` - -Then transfer this file and point the config file to the image: - -```nextflow -singularity.enabled = true -process.container = "/path/to/nf-core-atacseq.simg" -``` - - -### Conda -If you're not able to use Docker or Singularity, you can instead use conda to manage the software requirements. -To use conda in your own config file, add the following: - -```nextflow -process.conda = "$baseDir/environment.yml" -``` diff --git a/docs/configuration/local.md b/docs/configuration/local.md deleted file mode 100644 index 81382053..00000000 --- a/docs/configuration/local.md +++ /dev/null @@ -1,45 +0,0 @@ -# nf-core/atacseq: Local Configuration - -If running the pipeline in a local environment, we highly recommend using either Docker or Singularity. - -## Docker -Docker is a great way to run `nf-core/atacseq`, as it manages all software installations and allows the pipeline to be run in an identical software environment across a range of systems. - -Nextflow has [excellent integration](https://www.nextflow.io/docs/latest/docker.html) with Docker, and beyond installing the two tools, not much else is required. The `nf-core/atacseq` profile comes with a configuration profile for docker, making it very easy to use. - -First, install docker on your system: [Docker Installation Instructions](https://docs.docker.com/engine/installation/) - -Then, simply run the analysis pipeline: - -```bash -nextflow run nf-core/atacseq -profile docker --genome '' --design '' -``` - -Nextflow will recognise `nf-core/atacseq` and download the pipeline from GitHub. The `-profile docker` configuration lists the [nf-core/atacseq](https://hub.docker.com/r/nfcore/atacseq/) image that we have created and is hosted at dockerhub, and this is downloaded. - -### Pipeline versions -The public docker images are tagged with the same version numbers as the code, which you can use to ensure reproducibility. When running the pipeline, specify the pipeline version with `-r`, for example `-r 1.0`. This uses pipeline code and docker image from this tagged version. - - -## Singularity image -Many HPC environments are not able to run Docker due to security issues. [Singularity](http://singularity.lbl.gov/) is a tool designed to run on such HPC systems which is very similar to Docker. Even better, it can use images created directly from dockerhub. - -To use the singularity image for a single run, use `-with-singularity`. This will download the docker container from dockerhub and create a singularity image for you dynamically. - -If you intend to run the pipeline offline, nextflow will not be able to automatically download the singularity image for you. Instead, you'll have to do this yourself manually first, transfer the image file and then point to that. - -First, pull the image file where you have an internet connection: - -> NB: The "tag" at the end of this command corresponds to the pipeline version. -> Here, we're pulling the docker image for version 1.0 of the nf-core/atacseq pipeline -> Make sure that this tag corresponds to the version of the pipeline that you're using - -```bash -singularity pull --name nf-core-atacseq-1.0.img docker://nf-core/atacseq:1.0 -``` - -Then transfer this file and run the pipeline with this path: - -```bash -nextflow run /path/to/nf-core-atacseq -with-singularity /path/to/nf-core-atacseq-1.0.img -``` diff --git a/docs/configuration/reference_genomes.md b/docs/configuration/reference_genomes.md deleted file mode 100755 index 44c70b69..00000000 --- a/docs/configuration/reference_genomes.md +++ /dev/null @@ -1,50 +0,0 @@ -# nf-core/atacseq: Reference Genomes Configuration - -The nf-core/atacseq pipeline needs a reference genome for alignment and annotation. - -These paths can be supplied on the command line at run time (see the [usage docs](../usage.md)), -but for convenience it's often better to save these paths in a nextflow config file. -See below for instructions on how to do this. -Read [Adding your own system](adding_your_own.md) to find out how to set up custom config files. - -## Adding paths to a config file -Specifying long paths every time you run the pipeline is a pain. -To make this easier, the pipeline comes configured to understand reference genome keywords which correspond to preconfigured paths, meaning that you can just specify `--genome ID` when running the pipeline. - -Note that this genome key can also be specified in a config file if you always use the same genome. - -To use this system, add paths to your config file using the following template: - -```nextflow -params { - genomes { - 'YOUR-ID' { - fasta = '/genome.fa' - } - 'OTHER-GENOME' { - // [..] - } - } - // Optional - default genome. Ignored if --genome 'OTHER-GENOME' specified on command line - genome = 'YOUR-ID' -} -``` - -You can add as many genomes as you like as long as they have unique IDs. - -## illumina iGenomes -To make the use of reference genomes easier, illumina has developed a centralised resource called [iGenomes](https://support.illumina.com/sequencing/sequencing_software/igenome.html). -Multiple reference index types are held together with consistent structure for multiple genomes. - -We have put a copy of iGenomes up onto AWS S3 hosting and this pipeline is configured to use this by default. -The hosting fees for AWS iGenomes are currently kindly funded by a grant from Amazon. -The pipeline will automatically download the required reference files when you run the pipeline. -For more information about the AWS iGenomes, see - -Downloading the files takes time and bandwidth, so we recommend making a local copy of the iGenomes resource. -Once downloaded, you can customise the variable `params.igenomes_base` in your custom configuration file to point to the reference location. -For example: - -```nextflow -params.igenomes_base = '/path/to/data/igenomes/' -``` diff --git a/docs/installation.md b/docs/installation.md deleted file mode 100755 index d3e3cd32..00000000 --- a/docs/installation.md +++ /dev/null @@ -1,110 +0,0 @@ -# nf-core/atacseq: Installation - -To start using the nf-core/atacseq pipeline, follow the steps below: - -* [Install NextFlow](#install-nextflow) -* [Install the pipeline](#install-the-pipeline) - * [Automatic](#automatic) - * [Offline](#offline) - * [Development](#development) -* [Pipeline configuration](#pipeline-configuration) - * [Docker](#docker) - * [Singularity](#singularity) - * [Conda](#conda) - * [Configuration profiles](#configuration-profiles) -* [Reference genomes](#reference-genomes) - -## Install NextFlow -Nextflow runs on most POSIX systems (Linux, Mac OSX etc). It can be installed by running the following commands: - -```bash -# Make sure that Java v8+ is installed: -java -version - -# Install Nextflow -curl -fsSL get.nextflow.io | bash - -# Add Nextflow binary to your PATH: -mv nextflow ~/bin/ -# OR system-wide installation: -# sudo mv nextflow /usr/local/bin -``` - -See [nextflow.io](https://www.nextflow.io/) for further instructions on how to install and configure Nextflow. - -## Install the pipeline - -### Automatic -This pipeline itself needs no installation - NextFlow will automatically fetch it from GitHub if `nf-core/atacseq` is specified as the pipeline name. - -### Offline -The above method requires an internet connection so that Nextflow can download the pipeline files. If you're running on a system that has no internet connection, you'll need to download and transfer the pipeline files manually: - -```bash -wget https://github.com/nf-core/atacseq/archive/master.zip -mkdir -p ~/my-pipelines/nf-core/ -unzip master.zip -d ~/my-pipelines/nf-core/ -cd ~/my_data/ -nextflow run ~/my-pipelines/nf-core/atacseq-master -``` - -To stop nextflow from looking for updates online, you can tell it to run in offline mode by specifying the following environment variable in your ~/.bashrc file: - -```bash -export NXF_OFFLINE='TRUE' -``` - -### Development - -If you would like to make changes to the pipeline, it's best to make a fork on GitHub and then clone the files. Once cloned you can run the pipeline directly as above. - - -## Pipeline configuration -By default, the pipeline loads a basic server configuration [`conf/base.config`](../conf/base.config) -This uses a number of sensible defaults for process requirements and is suitable for running -on a simple (if powerful!) local server. - -Be warned of two important points about this default configuration: - -1. The default profile uses the `local` executor - * All jobs are run in the login session. If you're using a simple server, this may be fine. If you're using a compute cluster, this is bad as all jobs will run on the head node. - * See the [nextflow docs](https://www.nextflow.io/docs/latest/executor.html) for information about running with other hardware backends. Most job scheduler systems are natively supported. -2. Nextflow will expect all software to be installed and available on the `PATH` - * It's expected to use an additional config profile for docker, singularity or conda support. See below. - -### Docker -First, install docker on your system: [Docker Installation Instructions](https://docs.docker.com/engine/installation/) - -Then, running the pipeline with the option `-profile docker` tells Nextflow to enable Docker for this run. An image containing all of the software requirements will be automatically fetched and used from dockerhub (). - -### Singularity -If you're not able to use Docker then [Singularity](http://singularity.lbl.gov/) is a great alternative. -The process is very similar: running the pipeline with the option `-profile singularity` tells Nextflow to enable singularity for this run. An image containing all of the software requirements will be automatically fetched and used from singularity hub. - -If running offline with Singularity, you'll need to download and transfer the Singularity image first: - -```bash -singularity pull --name nf-core-atacseq.simg shub://nf-core/atacseq -``` - -Once transferred, use `-with-singularity` and specify the path to the image file: - -```bash -nextflow run /path/to/nf-core-atacseq -with-singularity nf-core-atacseq.simg -``` - -Remember to pull updated versions of the singularity image if you update the pipeline. - -### Conda -If you're not able to use Docker _or_ Singularity, you can instead use conda to manage the software requirements. -This is slower and less reproducible than the above, but is still better than having to install all requirements yourself! -The pipeline ships with a conda environment file and nextflow has built-in support for this. -To use it first ensure that you have conda installed (we recommend [miniconda](https://conda.io/miniconda.html)), then follow the same pattern as above and use the flag `-profile conda` - -### Configuration profiles - -See [`docs/configuration/adding_your_own.md`](configuration/adding_your_own.md) - -## Reference genomes - -See [`docs/configuration/reference_genomes.md`](configuration/reference_genomes.md) diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md deleted file mode 100755 index b37e7197..00000000 --- a/docs/troubleshooting.md +++ /dev/null @@ -1,6 +0,0 @@ -# nf-core/atacseq: Troubleshooting - -## Extra resources and getting help -If you still have an issue with running the pipeline then feel free to contact us in the `atacseq` channel on [Slack](https://nf-core-invite.herokuapp.com/). - -If you have problems that are related to Nextflow and not our pipeline then check out the [Nextflow gitter channel](https://gitter.im/nextflow-io/nextflow) or the [google group](https://groups.google.com/forum/#!forum/nextflow). From 817d71643db8199ea2b21034c3c72670c84452f7 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 8 Apr 2019 05:38:38 +0100 Subject: [PATCH 16/18] Removed Singularity container reference --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index aa4461f6..9c853408 100755 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ **nfcore/atacseq** is a bioinformatics analysis pipeline used for ATAC-seq data. -The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with docker / singularity containers making installation trivial and results highly reproducible. +The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with docker containers making installation trivial and results highly reproducible. ### Pipeline summary From 7ae555078564c75c16099c6be620ac33efc4cd67 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 8 Apr 2019 14:11:12 +0100 Subject: [PATCH 17/18] Update docs --- docs/output.md | 31 +++++++++++++++++-------------- docs/usage.md | 12 ++++-------- main.nf | 12 ++++++------ 3 files changed, 27 insertions(+), 28 deletions(-) diff --git a/docs/output.md b/docs/output.md index d5eaba0c..a25625f5 100755 --- a/docs/output.md +++ b/docs/output.md @@ -3,7 +3,7 @@ This document describes the output produced by the pipeline. Most of the plots are taken from the MultiQC report, which summarises results at the end of the pipeline. ## Pipeline overview -The pipeline is built using [Nextflow](https://www.nextflow.io/). See [`main README.md`](../README.md) for a condensed listing of the pipeline, and the bioinformatics tools used at each step. +The pipeline is built using [Nextflow](https://www.nextflow.io/). See [`main README.md`](../README.md) for a condensed overview of the steps in the pipeline, and the bioinformatics tools used at each step. See [Illumina website](https://emea.illumina.com/science/sequencing-method-explorer/kits-and-arrays/atac-seq.html) for more information regarding the ATAC-seq protocol, and for an extensive list of publications. @@ -19,7 +19,7 @@ The initial QC and alignments are performed at the library-level e.g. if the sam [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/) *Description*: - FastQC gives general quality metrics about your reads. It provides information about the quality score distribution across your reads, the per base sequence content (%T/A/G/C). You get information about adapter contamination and other overrepresented sequences. For further reading and documentation see the [FastQC help](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). + FastQC gives general quality metrics about your reads. It provides information about the quality score distribution across your reads, the per base sequence content (%A/C/G/T). You get information about adapter contamination and other overrepresented sequences. For further reading and documentation see the [FastQC help](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). *Output directories*: * `fastqc/` @@ -69,7 +69,7 @@ The initial QC and alignments are performed at the library-level e.g. if the sam ## Merged library-level analysis -The library-level alignments associated with any given replicate are merged and subsequently used for the downstream analyses. +The library-level alignments associated with the same sample are merged and subsequently used for the downstream analyses. 1. **Alignment merging, duplicate marking and filtering** @@ -79,7 +79,7 @@ The library-level alignments associated with any given replicate are merged and *Description*: Picard MergeSamFiles and MarkDuplicates are used in combination to merge the alignments, and for the marking of duplicates, respectively. If you only have one library for any given replicate then the merging step isnt carried out because the library-level and merged library-level BAM files will be exactly the same. - Read duplicate marking is carried out using the Picard MarkDuplicates command. Duplicate reads are generally removed from the aligned reads to mitigate for fragments in the library that may have been sequenced more than once due to PCR biases. There is an option to keep duplicate reads with the `--keepDups` parameter but its generally recommend to remove them to avoid the wrong interpretation of the results. A similar option has been provided to keep reads that are multi-mapped - `--keepMultiMap`. + Read duplicate marking is carried out using the Picard MarkDuplicates command. Duplicate reads are generally removed from the aligned reads to mitigate for fragments in the library that may have been sequenced more than once due to PCR biases. There is an option to keep duplicate reads with the `--keepDups` parameter but its generally recommended to remove them to avoid the wrong interpretation of the results. A similar option has been provided to keep reads that are multi-mapped - `--keepMultiMap`. Certain cell types and tissues yield an enormous fraction (typically 20–80%) of unusable sequences of mitochondrial origin. This is a known problem that is specific to ATAC-seq library preps - see [Montefiori et al. 2017](https://www.nature.com/articles/s41598-017-02547-w). There is an option to keep these reads using the `--keepMito` parameter but its generally recommended to remove these in order to get a more reliable assessment of the duplication rate from the rest of the genome, and to avoid any biases in the downstream analyses. @@ -121,9 +121,9 @@ The library-level alignments associated with any given replicate are merged and *Description*: MACS2 is one of the most popular peak-calling algorithms for ChIPSeq data. For ATAC-seq data we are also looking for genome-wide regions of enrichment but in this case without comparison to a standard control sample (e.g. input DNA). - By default, the peaks are called with the MACS2 `--broad` parameter, and this is recommended for ATAC-seq data. If, however, you would like to call narrow peaks then please provide the `--narrowPeak` parameter when running the pipeline. + By default, the peaks are called with the MACS2 `--broad` parameter as this is recommended for ATAC-seq data. If, however, you would like to call narrow peaks then please provide the `--narrowPeak` parameter when running the pipeline. - [HOMER annotatePeaks.pl](http://homer.ucsd.edu/homer/ngs/annotation.html) is used to annotate the peaks relative to known genomic features. HOMER is able to use the annotation provided to the pipeline in the form of the `--gtf` file. Please note that some of the output columns will be blank because the annotation isnt provided using HOMER's in-built database format. However, the more important fields required for downstream analysis will be populated i.e. *Annotation*, *Distance to TSS* and *Nearest Promoter ID*. + [HOMER annotatePeaks.pl](http://homer.ucsd.edu/homer/ngs/annotation.html) is used to annotate the peaks relative to known genomic features. HOMER is able to use the `--gtf` annotation file which is provided to the pipeline. Please note that some of the output columns will be blank because the annotation isnt provided using HOMER's in-built database format. However, the more important fields required for downstream analysis will be populated i.e. *Annotation*, *Distance to TSS* and *Nearest Promoter ID*. Various QC plots per sample including number of peaks, fold-change distribution, FRiP score and peak-to-gene feature annotation are also generated by the pipeline. Where possible these have been integrated into the MultiQC report. @@ -137,7 +137,7 @@ The library-level alignments associated with any given replicate are merged and * `bwa/mergedLibrary/macs2/qc` * QC plots for MACS2 peaks: `macs_peak.mLb.clN.plots.pdf` * QC plots for peak-to-gene feature annotation: `macs_annotatePeaks.mLb.clN.plots.pdf` - * MultiQC custom-content files for [FRiP score](https://genome.cshlp.org/content/22/9/1813.full.pdf+html), peak count and peak-to-gene ratios: `*.FRiP_mqc.tsv` and `*.count_mqc.tsv` and `macs_annotatePeaks.mLb.clN.summary_mqc.tsv` respectively. + * MultiQC custom-content files for [FRiP score](https://genome.cshlp.org/content/22/9/1813.full.pdf+html), peak count and peak-to-gene ratios: `*.FRiP_mqc.tsv`, `*.count_mqc.tsv` and `macs_annotatePeaks.mLb.clN.summary_mqc.tsv` respectively. *Plots*: [MultiQC - MACS2 total peak count plot](images/mqc_macs2_peak_count_plot.png) @@ -180,7 +180,7 @@ The library-level alignments associated with any given replicate are merged and This pipeline uses a standardised DESeq2 analysis script to get an idea of the reproducibility within the experiment, and to assess the overall differential accessibility. Please note that this will not suit every experimental design, and if there are other problems with the experiment then it may not work as well as expected. By default, the peak sets are not filtered, therefore, the consensus peaks will be generated across all peaks. However, it is possible to filter the consensus peaks and the corresponding read counts based on user-defined criteria (outlined in the section above), and then to use the same scripts to re-generate the results for a more refined analysis. In future iterations of the pipeline more formal analyses such as [IDR](https://projecteuclid.org/euclid.aoas/1318514284) may be implemented to obtain reproducible and high confidence peak sets with which to perform this sort of analysis. - By default, all possible pairwise comparisons across the groups within the experiment are performed. The DESeq2 results are generated by the pipeline in various ways. You can load up the results across all of the comparisons in a single spreadsheet, or individual folders will also be created that contain the results specific to a particular comparison. For the latter, additional files will also be generated where the intervals have been pre-filtered based on a couple of standard FDR thresholds. + By default, all possible pairwise comparisons across the groups within the experiment are performed. The DESeq2 results are outputted by the pipeline in various ways. You can load up the results across all of the comparisons in a single spreadsheet, or individual folders will also be created that contain the results specific to a particular comparison. For the latter, additional files will also be generated where the intervals have been pre-filtered based on a couple of standard FDR thresholds. Please see [DESeq2 output](http://bioconductor.org/packages/release/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#differential-expression-analysis) for a description of the columns generated by DESeq2. @@ -251,8 +251,9 @@ You can skip this portion of the analysis by specifying the `--skipMergeReplicat *Output directories*: * `multiqc/` - * `Project_multiqc_report.html` - a standalone HTML file that can be viewed in your web browser. - * `Project_multiqc_data/` - directory containing parsed statistics from the different tools used in the pipeline. + * `multiqc_report.html` - a standalone HTML file that can be viewed in your web browser. + * `multiqc_data/` - directory containing parsed statistics from the different tools used in the pipeline. + * `multiqc_plots/` - directory containing static images from the report in various formats. 2. **Create IGV session file** @@ -260,7 +261,7 @@ You can skip this portion of the analysis by specifying the `--skipMergeReplicat [IGV](https://software.broadinstitute.org/software/igv/) *Description*: - An IGV session file will be created at the end of the pipeline containing the file names for the normalised bigWig tracks, peaks and differential sites generated by the pipeline. This avoids having to load all the data individually into IGV for visualisation. + An IGV session file will be created at the end of the pipeline containing the normalised bigWig tracks, peaks and differential sites. This avoids having to load all of the data individually into IGV for visualisation. The genome fasta file required for the IGV session will be the same as the one that was provided to the pipeline. This will be copied into `reference_genome/` to overcome any loading issues. If you prefer to use another path or an in-built genome provided by IGV just change the `genome` entry in the second-line of the session file. @@ -292,7 +293,7 @@ You can skip this portion of the analysis by specifying the `--skipMergeReplicat * `reference_genome/` A number of genome-specific files are generated by the pipeline in order to aid in the filtering of the data, and because they are required by standard tools such as BEDTools. These can be found in this directory along with the genome fasta file which is required by IGV. * `reference_genome/BWAIndex/` - If they dont exist already and if the `--saveGenomeIndex` parameter is provided then the alignment indices generated by the pipeline will be saved in this directory. This can be quite a time-consuming process so it permits their reuse for reruns of this pipeline or for other purposes. + If they dont exist already and if the `--saveGenomeIndex` parameter is provided then the alignment indices generated by the pipeline will be saved in this directory. This can be quite a time-consuming process so it permits their reuse for future runs of the pipeline or for other purposes. 2. **Pipeline information** @@ -306,6 +307,8 @@ You can skip this portion of the analysis by specifying the `--skipMergeReplicat *Output directories*: * `pipeline_info/` - Default reports generated by the pipeline are `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`. + Reports generated by the pipeline - `pipeline_report.html`, `pipeline_report.txt` and `software_versions.csv`. + * `pipeline_info/nf-core/` + Default reports generated by Nextflow - `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.svg`. * `Documentation/` - Additional reports and documentation generated by the pipeline i.e. `pipeline_report.html`, `pipeline_report.txt`, `results_description.html`. + Documentation for interpretation of results in HTML format - `results_description.html`. diff --git a/docs/usage.md b/docs/usage.md index d9457b62..3ae6cdc6 100755 --- a/docs/usage.md +++ b/docs/usage.md @@ -2,7 +2,6 @@ ## Table of contents -* [Table of contents](#table-of-contents) * [Introduction](#introduction) * [Running the pipeline](#running-the-pipeline) * [Updating the pipeline](#updating-the-pipeline) @@ -105,7 +104,7 @@ Use this parameter to choose a configuration profile. Profiles can give configur If `-profile` is not specified at all the pipeline will be run locally and expects all software to be installed and available on the `PATH`. * `awsbatch` - * A generic configuration profile to be used with AWS Batch. + * A generic configuration profile to be used with AWS Batch * `conda` * A generic configuration profile to be used with [conda](https://conda.io/docs/) * Pulls most software from [Bioconda](https://bioconda.github.io/) @@ -128,7 +127,7 @@ You will need to create a design file with information about the samples in your #### Multiple replicates -The `group` identifier is the same when you have multiple replicates from the same experimental group, just increment the `replicate` identifier appropriately. The first replicate value for any given sample must be 1. Below is an example for a single experimental group in triplicate: +The `group` identifier is the same when you have multiple replicates from the same experimental group, just increment the `replicate` identifier appropriately. The first replicate value for any given experimental group must be 1. Below is an example for a single experimental group in triplicate: ```bash group,replicate,fastq_1,fastq_2 @@ -173,8 +172,6 @@ treatment,3,AEG588A6_S6_L004_R1_001.fastq.gz,AEG588A6_S6_L004_R2_001.fastq.gz Example design files have been provided with the pipeline for [paired-end](../assets/design_pe.csv) and [single-end](../assets/design_se.csv) data. ->NB: The pipeline will assume that the design file is in the correct format so please double-check this before execution. - ## Generic arguments ### `--singleEnd` @@ -204,7 +201,7 @@ You can find the keys to specify the genomes in the [`iGenomes config file`](../ * _Drosophila_ * `--genome BDGP6` * _S. cerevisiae_ - * `--genome 'R64-1-1'` + * `--genome R64-1-1` > There are numerous others - check the config file for more. @@ -367,8 +364,7 @@ You can also supply a run name to resume a specific run: `-resume [run-name]`. U Specify the path to a specific config file (this is a core NextFlow command). **NB:** Single hyphen (core Nextflow option) - -Note - you can use this to override pipeline defaults. +**NB:** You can use this config to override pipeline defaults. ### `--custom_config_version` Provide git commit id for custom Institutional configs hosted at `nf-core/configs`. This was implemented for reproducibility purposes. Default is set to `master`. diff --git a/main.nf b/main.nf index 1403a7f7..5a732493 100755 --- a/main.nf +++ b/main.nf @@ -1619,11 +1619,11 @@ process igv { * Parse software version numbers */ process get_software_versions { - publishDir "${params.outdir}/pipeline_info", mode: 'copy', - saveAs: {filename -> - if (filename.indexOf(".csv") > 0) filename - else null - } + publishDir "${params.outdir}/pipeline_info", mode: 'copy', + saveAs: {filename -> + if (filename.indexOf(".csv") > 0) filename + else null + } output: file 'software_versions_mqc.yaml' into software_versions_mqc @@ -1702,7 +1702,7 @@ process multiqc { file ('macs/mergedReplicate/consensus/*') from mrep_macs_consensus_counts_mqc.collect().ifEmpty([]) file ('macs/mergedReplicate/consensus/*') from mrep_macs_consensus_deseq_mqc.collect().ifEmpty([]) - file ('software_versions/*') from software_versions_mqc + file ('software_versions/*') from software_versions_mqc.collect() file ('workflow_summary/*') from create_workflow_summary(summary) output: From 9eee4a518edc0407691d3fb18dc853f123285d08 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 8 Apr 2019 14:32:11 +0100 Subject: [PATCH 18/18] Bump version to 1.0.0 --- .travis.yml | 2 +- Dockerfile | 2 +- environment.yml | 2 +- nextflow.config | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index b00e5355..110d7d41 100755 --- a/.travis.yml +++ b/.travis.yml @@ -14,7 +14,7 @@ before_install: - docker pull nfcore/atacseq:dev # Fake the tag locally so that the pipeline runs properly # Looks weird when this is :dev to :dev, but makes sense when testing code for a release (:dev to :1.0.1) - - docker tag nfcore/atacseq:dev nfcore/atacseq:dev + - docker tag nfcore/atacseq:dev nfcore/atacseq:1.0.0 install: # Install Nextflow diff --git a/Dockerfile b/Dockerfile index 3ffeca97..280237c5 100755 --- a/Dockerfile +++ b/Dockerfile @@ -4,4 +4,4 @@ LABEL authors="harshil.patel@crick.ac.uk" \ COPY environment.yml / RUN conda env create -f /environment.yml && conda clean -a -ENV PATH /opt/conda/envs/nf-core-atacseq-1.0dev/bin:$PATH +ENV PATH /opt/conda/envs/nf-core-atacseq-1.0.0/bin:$PATH diff --git a/environment.yml b/environment.yml index 1e6a9fba..0e032c0e 100755 --- a/environment.yml +++ b/environment.yml @@ -1,6 +1,6 @@ # You can use this file to create a conda environment for this pipeline: # conda env create -f environment.yml -name: nf-core-atacseq-1.0dev +name: nf-core-atacseq-1.0.0 channels: - conda-forge - bioconda diff --git a/nextflow.config b/nextflow.config index 03087bd4..b8063f07 100755 --- a/nextflow.config +++ b/nextflow.config @@ -69,7 +69,7 @@ params { // Container slug. Stable releases should specify release tag! // Developmental code should specify :dev -process.container = 'nfcore/atacseq:dev' +process.container = 'nfcore/atacseq:1.0.0' // Load base.config by default for all pipelines includeConfig 'conf/base.config' @@ -127,7 +127,7 @@ manifest { description = 'ATACSeq peak-calling and differential analysis pipeline.' mainScript = 'main.nf' nextflowVersion = '>=0.32.0' - version = '1.0dev' + version = '1.0.0' } // Function to ensure that resource requirements don't go beyond