diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 00000000..b290e090 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,20 @@ +{ + "name": "nfcore", + "image": "nfcore/gitpod:latest", + "remoteUser": "gitpod", + "runArgs": ["--privileged"], + + // Configure tool-specific properties. + "customizations": { + // Configure properties specific to VS Code. + "vscode": { + // Set *default* container specific settings.json values on container create. + "settings": { + "python.defaultInterpreterPath": "/opt/conda/bin/python" + }, + + // Add the IDs of extensions you want installed when the container is created. + "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] + } + } +} diff --git a/.editorconfig b/.editorconfig index a9229959..157ecd15 100644 --- a/.editorconfig +++ b/.editorconfig @@ -8,7 +8,7 @@ trim_trailing_whitespace = true indent_size = 4 indent_style = space -[*.{md,yml,yaml,html,css,scss,js,cff}] +[*.{md,yml,yaml,html,css,scss,js}] indent_size = 2 # These files are edited and tested upstream in nf-core/modules @@ -18,10 +18,19 @@ end_of_line = unset insert_final_newline = unset trim_trailing_whitespace = unset indent_style = unset -indent_size = unset +[/subworkflows/nf-core/**] +charset = unset +end_of_line = unset +insert_final_newline = unset +trim_trailing_whitespace = unset +indent_style = unset [/assets/email*] indent_size = unset [/assets/blacklists/GRCh37-blacklist.bed] trim_trailing_whitespace = unset + +# ignore python and markdown +[*.{py,md}] +indent_style = unset diff --git a/.gitattributes b/.gitattributes index 050bb120..7a2dabc2 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,4 @@ *.config linguist-language=nextflow +*.nf.test linguist-language=nextflow modules/nf-core/** linguist-generated subworkflows/nf-core/** linguist-generated diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 2bfaa681..3b9272be 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -9,6 +9,7 @@ Please use the pre-filled template to save time. However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;) +> [!NOTE] > If you need help using or modifying nf-core/chipseq then the best place to ask is on the nf-core Slack [#chipseq](https://nfcore.slack.com/channels/chipseq) channel ([join our Slack here](https://nf-co.re/join/slack)). ## Contribution workflow @@ -25,6 +26,12 @@ If you're not used to this workflow with git, you can start with some [docs from ## Tests +You have the option to test your changes locally by running the pipeline. For receiving warnings about process selectors and other `debug` information, it is recommended to use the debug profile. Execute all the tests with the following command: + +```bash +nf-test test --profile debug,test,docker --verbose +``` + When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. @@ -85,7 +92,7 @@ Once there, use `nf-core schema build` to add to `nextflow_schema.json`. Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/master/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. -The process resources can be passed on to the tool dynamically within the process with the `${task.cpu}` and `${task.memory}` variables in the `script:` block. +The process resources can be passed on to the tool dynamically within the process with the `${task.cpus}` and `${task.memory}` variables in the `script:` block. ### Naming schemes @@ -101,3 +108,18 @@ If you are using a new feature from core Nextflow, you may bump the minimum requ ### Images and figures For overview images and other documents we follow the nf-core [style guidelines and examples](https://nf-co.re/developers/design_guidelines). + +## GitHub Codespaces + +This repo includes a devcontainer configuration which will create a GitHub Codespaces for Nextflow development! This is an online developer environment that runs in your browser, complete with VSCode and a terminal. + +To get started: + +- Open the repo in [Codespaces](https://github.com/nf-core/chipseq/codespaces) +- Tools installed + - nf-core + - Nextflow + +Devcontainer specs: + +- [DevContainer config](.devcontainer/devcontainer.json) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 1d09e717..e2bb24d5 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -42,9 +42,9 @@ body: attributes: label: System information description: | - * Nextflow version _(eg. 21.10.3)_ + * Nextflow version _(eg. 23.04.0)_ * Hardware _(eg. HPC, Desktop, Cloud)_ * Executor _(eg. slurm, local, awsbatch)_ - * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter or Charliecloud)_ + * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, or Apptainer)_ * OS _(eg. CentOS Linux, macOS, Linux Mint)_ * Version of nf-core/chipseq _(eg. 1.1, 1.5, 1.8.2)_ diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 8da27af0..4cd6ca4f 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -15,9 +15,11 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/chip - [ ] This comment contains a description of changes (with reason). - [ ] If you've fixed a bug or added code that should be tested, add tests! -- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/chipseq/tree/master/.github/CONTRIBUTING.md)- [ ] If necessary, also make a PR on the nf-core/chipseq _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. +- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/chipseq/tree/master/.github/CONTRIBUTING.md) +- [ ] If necessary, also make a PR on the nf-core/chipseq _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. - [ ] `CHANGELOG.md` is updated. diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 5a1c3622..b40d8bfb 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -8,7 +8,7 @@ on: types: [published] workflow_dispatch: jobs: - run-tower: + run-platform: name: Run AWS full tests if: github.repository == 'nf-core/chipseq' runs-on: ubuntu-latest @@ -16,16 +16,26 @@ jobs: matrix: aligner: ["bwa", "bowtie2", "chromap", "star"] steps: - - name: Launch workflow via tower - uses: nf-core/tower-action@v3 + - name: Launch workflow via Seqera Platform + uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/chipseq/work-${{ github.sha }} parameters: | { - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/chipseq/results-${{ github.sha }}" + "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/chipseq/results-${{ github.sha }}", "aligner": "${{ matrix.aligner }}" } - profiles: test_full,aws_tower + profiles: test_full + + - uses: actions/upload-artifact@v4 + if: success() || failure() + with: + name: Seqera Platform debug log file + path: | + seqera_platform_action_*.log + seqera_platform_action_*.json diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 73640cc6..bf518006 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -5,21 +5,29 @@ name: nf-core AWS test on: workflow_dispatch: jobs: - run-tower: + run-platform: name: Run AWS tests if: github.repository == 'nf-core/chipseq' runs-on: ubuntu-latest steps: - # Launch workflow using Tower CLI tool action - - name: Launch workflow via tower - uses: nf-core/tower-action@v3 + # Launch workflow using Seqera Platform CLI tool action + - name: Launch workflow via Seqera Platform + uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/chipseq/work-${{ github.sha }} parameters: | { "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/chipseq/results-test-${{ github.sha }}" } - profiles: test,aws_tower + profiles: test + + - uses: actions/upload-artifact@v4 + with: + name: Seqera Platform debug log file + path: | + seqera_platform_action_*.log + seqera_platform_action_*.json diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index 1e03d409..68c8deab 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -13,13 +13,13 @@ jobs: - name: Check PRs if: github.repository == 'nf-core/chipseq' run: | - { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/chipseq ]] && [[ $GITHUB_HEAD_REF = "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] + { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/chipseq ]] && [[ $GITHUB_HEAD_REF == "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] # If the above check failed, post a comment on the PR explaining the failure # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets - name: Post PR comment if: failure() - uses: mshick/add-pr-comment@v1 + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 with: message: | ## This PR is against the `master` branch :x: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 520b9d2b..de8700bc 100755 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,6 +11,10 @@ on: env: NXF_ANSI_LOG: false +concurrency: + group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" + cancel-in-progress: true + jobs: test: name: Run pipeline with test data @@ -20,17 +24,20 @@ jobs: strategy: matrix: NXF_VER: - - "21.10.3" + - "23.04.0" - "latest-everything" steps: - name: Check out pipeline code - uses: actions/checkout@v2 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + uses: nf-core/setup-nextflow@v2 with: version: "${{ matrix.NXF_VER }}" + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + - name: Run pipeline with test data run: | nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml new file mode 100644 index 00000000..0b6b1f27 --- /dev/null +++ b/.github/workflows/clean-up.yml @@ -0,0 +1,24 @@ +name: "Close user-tagged issues and PRs" +on: + schedule: + - cron: "0 0 * * 0" # Once a week + +jobs: + clean-up: + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + steps: + - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9 + with: + stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." + stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." + close-issue-message: "This issue was closed because it has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor and then staled for 20 days with no activity." + days-before-stale: 30 + days-before-close: 20 + days-before-pr-close: -1 + any-of-labels: "awaiting-changes,awaiting-feedback" + exempt-issue-labels: "WIP" + exempt-pr-labels: "WIP" + repo-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml new file mode 100644 index 00000000..2d20d644 --- /dev/null +++ b/.github/workflows/download_pipeline.yml @@ -0,0 +1,86 @@ +name: Test successful pipeline download with 'nf-core download' + +# Run the workflow when: +# - dispatched manually +# - when a PR is opened or reopened to master branch +# - the head branch of the pull request is updated, i.e. if fixes for a release are pushed last minute to dev. +on: + workflow_dispatch: + inputs: + testbranch: + description: "The specific branch you wish to utilize for the test execution of nf-core download." + required: true + default: "dev" + pull_request: + types: + - opened + - edited + - synchronize + branches: + - master + pull_request_target: + branches: + - master + +env: + NXF_ANSI_LOG: false + +jobs: + download: + runs-on: ubuntu-latest + steps: + - name: Install Nextflow + uses: nf-core/setup-nextflow@v2 + + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" + architecture: "x64" + - uses: eWaterCycle/setup-singularity@931d4e31109e875b13309ae1d07c70ca8fbc8537 # v7 + with: + singularity-version: 3.8.3 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install git+https://github.com/nf-core/tools.git@dev + + - name: Get the repository name and current branch set as environment variable + run: | + echo "REPO_LOWERCASE=${GITHUB_REPOSITORY,,}" >> ${GITHUB_ENV} + echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> ${GITHUB_ENV} + echo "REPO_BRANCH=${{ github.event.inputs.testbranch || 'dev' }}" >> ${GITHUB_ENV} + + - name: Download the pipeline + env: + NXF_SINGULARITY_CACHEDIR: ./ + run: | + nf-core download ${{ env.REPO_LOWERCASE }} \ + --revision ${{ env.REPO_BRANCH }} \ + --outdir ./${{ env.REPOTITLE_LOWERCASE }} \ + --compress "none" \ + --container-system 'singularity' \ + --container-library "quay.io" -l "docker.io" -l "ghcr.io" \ + --container-cache-utilisation 'amend' \ + --download-configuration + + - name: Inspect download + run: tree ./${{ env.REPOTITLE_LOWERCASE }} + + - name: Run the downloaded pipeline (stub) + id: stub_run_pipeline + continue-on-error: true + env: + NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results + - name: Run the downloaded pipeline (stub run not supported) + id: run_pipeline + if: ${{ job.steps.stub_run_pipeline.status == failure() }} + env: + NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -profile test,singularity --outdir ./results diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml index b4f90734..7b84c0be 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix-linting.yml @@ -4,7 +4,7 @@ on: types: [created] jobs: - deploy: + fix-linting: # Only run if comment is on a PR with the main repo, and if it contains the magic keywords if: > contains(github.event.comment.html_url, '/pull/') && @@ -13,10 +13,17 @@ jobs: runs-on: ubuntu-latest steps: # Use the @nf-core-bot token to check out so we can push later - - uses: actions/checkout@v3 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 with: token: ${{ secrets.nf_core_bot_auth_token }} + # indication that the linting is being fixed + - name: React on comment + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: eyes + # Action runs on the issue comment, so we don't get the PR by default # Use the gh cli to check out the PR - name: Checkout Pull Request @@ -24,32 +31,59 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} - - uses: actions/setup-node@v2 + # Install and run pre-commit + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" - - name: Install Prettier - run: npm install -g prettier @prettier/plugin-php + - name: Install pre-commit + run: pip install pre-commit - # Check that we actually need to fix something - - name: Run 'prettier --check' - id: prettier_status - run: | - if prettier --check ${GITHUB_WORKSPACE}; then - echo "::set-output name=result::pass" - else - echo "::set-output name=result::fail" - fi + - name: Run pre-commit + id: pre-commit + run: pre-commit run --all-files + continue-on-error: true - - name: Run 'prettier --write' - if: steps.prettier_status.outputs.result == 'fail' - run: prettier --write ${GITHUB_WORKSPACE} + # indication that the linting has finished + - name: react if linting finished succesfully + if: steps.pre-commit.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: "+1" - name: Commit & push changes - if: steps.prettier_status.outputs.result == 'fail' + id: commit-and-push + if: steps.pre-commit.outcome == 'failure' run: | git config user.email "core@nf-co.re" git config user.name "nf-core-bot" git config push.default upstream git add . git status - git commit -m "[automated] Fix linting with Prettier" + git commit -m "[automated] Fix code linting" git push + + - name: react if linting errors were fixed + id: react-if-fixed + if: steps.commit-and-push.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: hooray + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: confused + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + issue-number: ${{ github.event.issue.number }} + body: | + @${{ github.actor }} I tried to fix the linting errors, but it didn't work. Please fix them manually. + See [CI log](https://github.com/nf-core/chipseq/actions/runs/${{ github.run_id }}) for more details. diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 8a5ce69b..1fcafe88 100755 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -4,79 +4,41 @@ name: nf-core linting # that the code meets the nf-core guidelines. on: push: + branches: + - dev pull_request: release: types: [published] jobs: - EditorConfig: + pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - - uses: actions/setup-node@v2 - - - name: Install editorconfig-checker - run: npm install -g editorconfig-checker - - - name: Run ECLint check - run: editorconfig-checker -exclude README.md $(find .* -type f | grep -v '.git\|.py\|.md\|json\|yml\|yaml\|html\|css\|work\|.nextflow\|build\|nf_core.egg-info\|log.txt\|Makefile') - - Prettier: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - - uses: actions/setup-node@v2 - - - name: Install Prettier - run: npm install -g prettier - - - name: Run Prettier --check - run: prettier --check ${GITHUB_WORKSPACE} - - PythonBlack: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - - name: Check code lints with Black - uses: psf/black@stable - - # If the above check failed, post a comment on the PR explaining the failure - - name: Post PR comment - if: failure() - uses: mshick/add-pr-comment@v1 + - name: Set up Python 3.12 + uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - message: | - ## Python linting (`black`) is failing - - To keep the code consistent with lots of contributors, we run automated code consistency checks. - To fix this CI test, please run: - - * Install [`black`](https://black.readthedocs.io/en/stable/): `pip install black` - * Fix formatting errors in your pipeline: `black .` - - Once you push these changes the test should pass, and you can hide this comment :+1: + python-version: "3.12" - We highly recommend setting up Black in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! + - name: Install pre-commit + run: pip install pre-commit - Thanks again for your contribution! - repo-token: ${{ secrets.GITHUB_TOKEN }} - allow-repeats: false + - name: Run pre-commit + run: pre-commit run --all-files nf-core: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@v2 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + uses: nf-core/setup-nextflow@v2 - - uses: actions/setup-python@v3 + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - python-version: "3.7" + python-version: "3.12" architecture: "x64" - name: Install dependencies @@ -97,7 +59,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 04758f61..40acc23f 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,17 +11,17 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@v2 + uses: dawidd6/action-download-artifact@09f2f74827fd3a8607589e5ad7f9398816f540fe # v3 with: workflow: linting.yml workflow_conclusion: completed - name: Get PR number id: pr_number - run: echo "::set-output name=pr_number::$(cat linting-logs/PR_number.txt)" + run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment - uses: marocchino/sticky-pull-request-comment@v2 + uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # v2 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} number: ${{ steps.pr_number.outputs.pr_number }} diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml new file mode 100644 index 00000000..03ecfcf7 --- /dev/null +++ b/.github/workflows/release-announcements.yml @@ -0,0 +1,75 @@ +name: release-announcements +# Automatic release toot and tweet anouncements +on: + release: + types: [published] + workflow_dispatch: + +jobs: + toot: + runs-on: ubuntu-latest + steps: + - name: get topics and convert to hashtags + id: get_topics + run: | + echo "topics=$(curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ')" >> $GITHUB_OUTPUT + + - uses: rzr/fediverse-action@master + with: + access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} + host: "mstdn.science" # custom host if not "mastodon.social" (default) + # GitHub event payload + # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#release + message: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + + ${{ steps.get_topics.outputs.topics }} #nfcore #openscience #nextflow #bioinformatics + + send-tweet: + runs-on: ubuntu-latest + + steps: + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.10" + - name: Install dependencies + run: pip install tweepy==4.14.0 + - name: Send tweet + shell: python + run: | + import os + import tweepy + + client = tweepy.Client( + access_token=os.getenv("TWITTER_ACCESS_TOKEN"), + access_token_secret=os.getenv("TWITTER_ACCESS_TOKEN_SECRET"), + consumer_key=os.getenv("TWITTER_CONSUMER_KEY"), + consumer_secret=os.getenv("TWITTER_CONSUMER_SECRET"), + ) + tweet = os.getenv("TWEET") + client.create_tweet(text=tweet) + env: + TWEET: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + TWITTER_CONSUMER_KEY: ${{ secrets.TWITTER_CONSUMER_KEY }} + TWITTER_CONSUMER_SECRET: ${{ secrets.TWITTER_CONSUMER_SECRET }} + TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }} + TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }} + + bsky-post: + runs-on: ubuntu-latest + steps: + - uses: zentered/bluesky-post-action@80dbe0a7697de18c15ad22f4619919ceb5ccf597 # v0.1.0 + with: + post: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + env: + BSKY_IDENTIFIER: ${{ secrets.BSKY_IDENTIFIER }} + BSKY_PASSWORD: ${{ secrets.BSKY_PASSWORD }} + # diff --git a/.gitpod.yml b/.gitpod.yml index 85d95ecc..105a1821 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -1,14 +1,20 @@ image: nfcore/gitpod:latest +tasks: + - name: Update Nextflow and setup pre-commit + command: | + pre-commit install --install-hooks + nextflow self-update + - name: unset JAVA_TOOL_OPTIONS + command: | + unset JAVA_TOOL_OPTIONS vscode: extensions: # based on nf-core.nf-core-extensionpack - - codezombiech.gitignore # Language support for .gitignore files - # - cssho.vscode-svgviewer # SVG viewer - esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code - - eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar - mechatroner.rainbow-csv # Highlight columns in csv files in different colors - # - nextflow.nextflow # Nextflow syntax highlighting + # - nextflow.nextflow # Nextflow syntax highlighting - oderwat.indent-rainbow # Highlight indentation level - streetsidesoftware.code-spell-checker # Spelling checker for source code + - charliermarsh.ruff # Code linter Ruff diff --git a/.nf-core.yml b/.nf-core.yml index 3805dc81..add014ed 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1 +1,7 @@ repository_type: pipeline +nf_core_version: "2.14.1" +lint: + nextflow_config: + - config_defaults: + - params.bamtools_filter_se_config + - params.bamtools_filter_pe_config diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..4dc0f1dc --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,13 @@ +repos: + - repo: https://github.com/pre-commit/mirrors-prettier + rev: "v3.1.0" + hooks: + - id: prettier + additional_dependencies: + - prettier@3.2.5 + + - repo: https://github.com/editorconfig-checker/editorconfig-checker.python + rev: "2.7.3" + hooks: + - id: editorconfig-checker + alias: ec diff --git a/.prettierignore b/.prettierignore index d0e7ae58..437d763d 100644 --- a/.prettierignore +++ b/.prettierignore @@ -1,4 +1,6 @@ email_template.html +adaptivecard.json +slackreport.json .nextflow* work/ data/ @@ -7,3 +9,4 @@ results/ testing/ testing* *.pyc +bin/ diff --git a/CHANGELOG.md b/CHANGELOG.md index ea9b4606..7cc0d258 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,51 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## v2.1.0dev - [date] + +### Enhancements & fixes + +- Updated pipeline template to [nf-core/tools 2.7.2](https://github.com/nf-core/tools/releases/tag/2.7.2) +- [[#317](https://github.com/nf-core/chipseq/issues/317)] - Added metro map +- [[#288](https://github.com/nf-core/chipseq/issues/291)] - Bump `chromap` version 2 and enable all the steps below chromap again when paired-end data is processed. +- [[#311](https://github.com/nf-core/chipseq/issues/311)] - Add back `--skip_spp` parameter which was unintentionally removed from the code. +- Install available nf-core subworkflows and refactor code accordingly +- [[#318](https://github.com/nf-core/chipseq/issues/318)] - Update `bowtie2/align` module to fix issue when downloading its singularity image. +- [[#320](https://github.com/nf-core/chipseq/issues/320)] - Fix samplesheet control column in documentation examples. +- [[#328](https://github.com/nf-core/chipseq/issues/328)] - Modify documentation to clarify that is necessary to provide the `--read_length` when `--genome` is set and `--macs_gsize` has not provided. +- Remove `enable_conda` param from local modules. +- Fix the path where `chromap` index is stored when `--save_reference` is set. +- Fix untar of `chromap` index when using `--chromap_index` param. +- [nf-core/tools#2286](https://github.com/nf-core/tools/issues/2286) - Set default container registry outside profile scope. +- [[#343](https://github.com/nf-core/chipseq/issues/343)] - Provide replicate information explicitly in samplesheet. +- Updated pipeline template to [nf-core/tools 2.10](https://github.com/nf-core/tools/releases/tag/2.10). +- [[#367](https://github.com/nf-core/chipseq/issues/367)] - Get rid of `CheckIfExists` for params paths. +- [[#370](https://github.com/nf-core/chipseq/issues/370)] - Fix stack overflow exceptions in phantompeakqualtools ([see here](https://github.com/kundajelab/phantompeakqualtools/issues/3)). +- [[#387](https://github.com/nf-core/chipseq/issues/387)] - Get rid of the `lib` folder and rearrange the pipeline accordingly. +- [[#385](https://github.com/nf-core/chipseq/issues/385)] - Fix `--save_unaligned` description in schema. +- [[PR #392](https://github.com/nf-core/chipseq/pull/392)] - Adding line numbers to warnings/errors messages in `bin/check_samplesheet.py`. +- [[#396](https://github.com/nf-core/chipseq/issues/396)] - Check that samplesheet samples IDs do only have alphanumeric characters, dots, dashes or underscores. +- [[#378](https://github.com/nf-core/chipseq/issues/378)] - Switch from macs2 to macs3. +- [[#347](https://github.com/nf-core/chipseq/issues/347)] - Add read group tag to bam files processed by bowtie2. +- [[PR #406](https://github.com/nf-core/chipseq/pull/406)] - Update metro map to show macs3 instead of macs2. + +### Software dependencies + +Note, since the pipeline is now using Nextflow DSL2, each process will be run with its own [Biocontainer](https://biocontainers.pro/#/registry). This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. + +| Dependency | Old version | New version | +| ---------- | ----------- | ----------- | +| `chromap` | 0.2.1 | 0.2.4 | +| `macs2` | 2.2.7.1 | | +| `macs3` | | 3.0.1 | +| `multiqc` | 1.13 | 1.14 | +| `picard` | 2.27.4 | 3.0.0 | +| `samtools` | 1.15.1 | 1.17 | + +> **NB:** Dependency has been **updated** if both old and new version information is present. +> **NB:** Dependency has been **added** if just the new version information is present. +> **NB:** Dependency has been **removed** if version information isn't present. + ## [[2.0.0](https://github.com/nf-core/chipseq/releases/tag/2.0.0)] - 2022-10-03 ### Enhancements & fixes diff --git a/CITATION.cff b/CITATION.cff deleted file mode 100644 index 4533e2f2..00000000 --- a/CITATION.cff +++ /dev/null @@ -1,56 +0,0 @@ -cff-version: 1.2.0 -message: "If you use `nf-core tools` in your work, please cite the `nf-core` publication" -authors: - - family-names: Ewels - given-names: Philip - - family-names: Peltzer - given-names: Alexander - - family-names: Fillinger - given-names: Sven - - family-names: Patel - given-names: Harshil - - family-names: Alneberg - given-names: Johannes - - family-names: Wilm - given-names: Andreas - - family-names: Ulysse Garcia - given-names: Maxime - - family-names: Di Tommaso - given-names: Paolo - - family-names: Nahnsen - given-names: Sven -title: "The nf-core framework for community-curated bioinformatics pipelines." -version: 2.4.1 -doi: 10.1038/s41587-020-0439-x -date-released: 2022-05-16 -url: https://github.com/nf-core/tools -prefered-citation: - type: article - authors: - - family-names: Ewels - given-names: Philip - - family-names: Peltzer - given-names: Alexander - - family-names: Fillinger - given-names: Sven - - family-names: Patel - given-names: Harshil - - family-names: Alneberg - given-names: Johannes - - family-names: Wilm - given-names: Andreas - - family-names: Ulysse Garcia - given-names: Maxime - - family-names: Di Tommaso - given-names: Paolo - - family-names: Nahnsen - given-names: Sven - doi: 10.1038/s41587-020-0439-x - journal: nature biotechnology - start: 276 - end: 278 - title: "The nf-core framework for community-curated bioinformatics pipelines." - issue: 3 - volume: 38 - year: 2020 - url: https://dx.doi.org/10.1038/s41587-020-0439-x diff --git a/CITATIONS.md b/CITATIONS.md index 63188ca8..d75a5154 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -36,6 +36,8 @@ - [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) +> Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. Available online https://www.bioinformatics.babraham.ac.uk/projects/fastqc/. + - [featureCounts](https://www.ncbi.nlm.nih.gov/pubmed/24227677/) > Liao Y, Smyth GK, Shi W. featureCounts: an efficient general purpose program for assigning sequence reads to genomic features. Bioinformatics. 2014 Apr 1;30(7):923-30. doi: 10.1093/bioinformatics/btt656. Epub 2013 Nov 13. PubMed PMID: 24227677. @@ -44,7 +46,7 @@ > Heinz S, Benner C, Spann N, Bertolino E, Lin YC, Laslo P, Cheng JX, Murre C, Singh H, Glass CK. Simple combinations of lineage-determining transcription factors prime cis-regulatory elements required for macrophage and B cell identities. Mol Cell. 2010 May 28;38(4):576-89. doi: 10.1016/j.molcel.2010.05.004. PubMed PMID: 20513432; PubMed Central PMCID: PMC2898526. -- [MACS2](https://www.ncbi.nlm.nih.gov/pubmed/18798982/) +- [MACS3](https://www.ncbi.nlm.nih.gov/pubmed/18798982/) > Zhang Y, Liu T, Meyer CA, Eeckhoute J, Johnson DS, Bernstein BE, Nusbaum C, Myers RM, Brown M, Li W, Liu XS. Model-based analysis of ChIP-Seq (MACS). Genome Biol. 2008;9(9):R137. doi: 10.1186/gb-2008-9-9-r137. Epub 2008 Sep 17. PubMed PMID: 18798982; PubMed Central PMCID: PMC2592715. @@ -131,5 +133,8 @@ - [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) + > Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2. doi: 10.5555/2600239.2600241. + - [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index f4fd052f..c089ec78 100755 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,18 +1,20 @@ -# Code of Conduct at nf-core (v1.0) +# Code of Conduct at nf-core (v1.4) ## Our Pledge -In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core, pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: +In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: - Age +- Ability - Body size +- Caste - Familial status - Gender identity and expression - Geographical location - Level of experience - Nationality and national origins - Native language -- Physical and neurological ability +- Neurodiversity - Race or ethnicity - Religion - Sexual identity and orientation @@ -22,80 +24,133 @@ Please note that the list above is alphabetised and is therefore not ranked in a ## Preamble -> Note: This Code of Conduct (CoC) has been drafted by the nf-core Safety Officer and been edited after input from members of the nf-core team and others. "We", in this document, refers to the Safety Officer and members of the nf-core core team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will amended periodically to keep it up-to-date, and in case of any dispute, the most current version will apply. +:::note +This Code of Conduct (CoC) has been drafted by Renuka Kudva, Cris Tuñí, and Michael Heuer, with input from the nf-core Core Team and Susanna Marquez from the nf-core community. "We", in this document, refers to the Safety Officers and members of the nf-core Core Team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will be amended periodically to keep it up-to-date. In case of any dispute, the most current version will apply. +::: -An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). Our current safety officer is Renuka Kudva. +An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). + +Our Safety Officers are Saba Nafees, Cris Tuñí, and Michael Heuer. nf-core is a young and growing community that welcomes contributions from anyone with a shared vision for [Open Science Policies](https://www.fosteropenscience.eu/taxonomy/term/8). Open science policies encompass inclusive behaviours and we strive to build and maintain a safe and inclusive environment for all individuals. -We have therefore adopted this code of conduct (CoC), which we require all members of our community and attendees in nf-core events to adhere to in all our workspaces at all times. Workspaces include but are not limited to Slack, meetings on Zoom, Jitsi, YouTube live etc. +We have therefore adopted this CoC, which we require all members of our community and attendees of nf-core events to adhere to in all our workspaces at all times. Workspaces include, but are not limited to, Slack, meetings on Zoom, gather.town, YouTube live etc. -Our CoC will be strictly enforced and the nf-core team reserve the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. +Our CoC will be strictly enforced and the nf-core team reserves the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. -We ask all members of our community to help maintain a supportive and productive workspace and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. +We ask all members of our community to help maintain supportive and productive workspaces and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. -Questions, concerns or ideas on what we can include? Contact safety [at] nf-co [dot] re +Questions, concerns, or ideas on what we can include? Contact members of the Safety Team on Slack or email safety [at] nf-co [dot] re. ## Our Responsibilities -The safety officer is responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. +Members of the Safety Team (the Safety Officers) are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. -The safety officer in consultation with the nf-core core team have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. +The Safety Team, in consultation with the nf-core core team, have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this CoC, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. -Members of the core team or the safety officer who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and be subject to the same actions as others in violation of the CoC. +Members of the core team or the Safety Team who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and will be subject to the same actions as others in violation of the CoC. -## When are where does this Code of Conduct apply? +## When and where does this Code of Conduct apply? -Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events. This includes but is not limited to the following listed alphabetically and therefore in no order of preference: +Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events, such as hackathons, workshops, bytesize, and collaborative workspaces on gather.town. These guidelines include, but are not limited to, the following (listed alphabetically and therefore in no order of preference): - Communicating with an official project email address. - Communicating with community members within the nf-core Slack channel. - Participating in hackathons organised by nf-core (both online and in-person events). -- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence. -- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, Jitsi, YouTube live etc. +- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence, and on the nf-core gather.town workspace. +- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, gather.town, Jitsi, YouTube live etc. - Representing nf-core on social media. This includes both official and personal accounts. ## nf-core cares 😊 -nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include but are not limited to the following (listed in alphabetical order): +nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include, but are not limited to, the following (listed in alphabetical order): - Ask for consent before sharing another community member’s personal information (including photographs) on social media. - Be respectful of differing viewpoints and experiences. We are all here to learn from one another and a difference in opinion can present a good learning opportunity. -- Celebrate your accomplishments at events! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) +- Celebrate your accomplishments! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) - Demonstrate empathy towards other community members. (We don’t all have the same amount of time to dedicate to nf-core. If tasks are pending, don’t hesitate to gently remind members of your team. If you are leading a task, ask for help if you feel overwhelmed.) - Engage with and enquire after others. (This is especially important given the geographically remote nature of the nf-core community, so let’s do this the best we can) - Focus on what is best for the team and the community. (When in doubt, ask) -- Graciously accept constructive criticism, yet be unafraid to question, deliberate, and learn. +- Accept feedback, yet be unafraid to question, deliberate, and learn. - Introduce yourself to members of the community. (We’ve all been outsiders and we know that talking to strangers can be hard for some, but remember we’re interested in getting to know you and your visions for open science!) -- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communications to be kind.**) +- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communication to be kind.**) - Take breaks when you feel like you need them. -- Using welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack.) +- Use welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack) ## nf-core frowns on 😕 -The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this code of conduct. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces. +The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this CoC. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces: - Deliberate intimidation, stalking or following and sustained disruption of communication among participants of the community. This includes hijacking shared screens through actions such as using the annotate tool in conferencing software such as Zoom. - “Doxing” i.e. posting (or threatening to post) another person’s personal identifying information online. - Spamming or trolling of individuals on social media. -- Use of sexual or discriminatory imagery, comments, or jokes and unwelcome sexual attention. -- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion or work experience. +- Use of sexual or discriminatory imagery, comments, jokes, or unwelcome sexual attention. +- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion, or work experience. ### Online Trolling -The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the added issue of online trolling. This is unacceptable, reports of such behaviour will be taken very seriously, and perpetrators will be excluded from activities immediately. +The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the risk of online trolling. This is unacceptable — reports of such behaviour will be taken very seriously and perpetrators will be excluded from activities immediately. -All community members are required to ask members of the group they are working within for explicit consent prior to taking screenshots of individuals during video calls. +All community members are **required** to ask members of the group they are working with for explicit consent prior to taking screenshots of individuals during video calls. -## Procedures for Reporting CoC violations +## Procedures for reporting CoC violations If someone makes you feel uncomfortable through their behaviours or actions, report it as soon as possible. -You can reach out to members of the [nf-core core team](https://nf-co.re/about) and they will forward your concerns to the safety officer(s). +You can reach out to members of the Safety Team (Saba Nafees, Cris Tuñí, and Michael Heuer) on Slack. Alternatively, contact a member of the nf-core core team [nf-core core team](https://nf-co.re/about), and they will forward your concerns to the Safety Team. + +Issues directly concerning members of the Core Team or the Safety Team will be dealt with by other members of the core team and the safety manager — possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson and details will be shared in due course. + +All reports will be handled with the utmost discretion and confidentiality. + +You can also report any CoC violations to safety [at] nf-co [dot] re. In your email report, please do your best to include: + +- Your contact information. +- Identifying information (e.g. names, nicknames, pseudonyms) of the participant who has violated the Code of Conduct. +- The behaviour that was in violation and the circumstances surrounding the incident. +- The approximate time of the behaviour (if different than the time the report was made). +- Other people involved in the incident, if applicable. +- If you believe the incident is ongoing. +- If there is a publicly available record (e.g. mailing list record, a screenshot). +- Any additional information. + +After you file a report, one or more members of our Safety Team will contact you to follow up on your report. + +## Who will read and handle reports + +All reports will be read and handled by the members of the Safety Team at nf-core. + +If members of the Safety Team are deemed to have a conflict of interest with a report, they will be required to recuse themselves as per our Code of Conduct and will not have access to any follow-ups. + +To keep this first report confidential from any of the Safety Team members, please submit your first report by direct messaging on Slack/direct email to any of the nf-core members you are comfortable disclosing the information to, and be explicit about which member(s) you do not consent to sharing the information with. + +## Reviewing reports + +After receiving the report, members of the Safety Team will review the incident report to determine whether immediate action is required, for example, whether there is immediate threat to participants’ safety. + +The Safety Team, in consultation with members of the nf-core core team, will assess the information to determine whether the report constitutes a Code of Conduct violation, for them to decide on a course of action. + +In the case of insufficient information, one or more members of the Safety Team may contact the reporter, the reportee, or any other attendees to obtain more information. -Issues directly concerning members of the core team will be dealt with by other members of the core team and the safety manager, and possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson, and details will be shared in due course. +Once additional information is gathered, the Safety Team will collectively review and decide on the best course of action to take, if any. The Safety Team reserves the right to not act on a report. -All reports will be handled with utmost discretion and confidentially. +## Confidentiality + +All reports, and any additional information included, are only shared with the team of safety officers (and possibly members of the core team, in case the safety officer is in violation of the CoC). We will respect confidentiality requests for the purpose of protecting victims of abuse. + +We will not name harassment victims, beyond discussions between the safety officer and members of the nf-core team, without the explicit consent of the individuals involved. + +## Enforcement + +Actions taken by the nf-core’s Safety Team may include, but are not limited to: + +- Asking anyone to stop a behaviour. +- Asking anyone to leave the event and online spaces either temporarily, for the remainder of the event, or permanently. +- Removing access to the gather.town and Slack, either temporarily or permanently. +- Communicating to all participants to reinforce our expectations for conduct and remind what is unacceptable behaviour; this may be public for practical reasons. +- Communicating to all participants that an incident has taken place and how we will act or have acted — this may be for the purpose of letting event participants know we are aware of and dealing with the incident. +- Banning anyone from participating in nf-core-managed spaces, future events, and activities, either temporarily or permanently. +- No action. ## Attribution and Acknowledgements @@ -106,6 +161,22 @@ All reports will be handled with utmost discretion and confidentially. ## Changelog -### v1.0 - March 12th, 2021 +### v1.4 - February 8th, 2022 + +- Included a new member of the Safety Team. Corrected a typographical error in the text. + +### v1.3 - December 10th, 2021 + +- Added a statement that the CoC applies to nf-core gather.town workspaces. Corrected typographical errors in the text. + +### v1.2 - November 12th, 2021 + +- Removed information specific to reporting CoC violations at the Hackathon in October 2021. + +### v1.1 - October 14th, 2021 + +- Updated with names of new Safety Officers and specific information for the hackathon in October 2021. + +### v1.0 - March 15th, 2021 - Complete rewrite from original [Contributor Covenant](http://contributor-covenant.org/) CoC. diff --git a/README.md b/README.md index 519a93eb..305f0632 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,23 @@ -# ![nf-core/chipseq](docs/images/nf-core-chipseq_logo_light.png#gh-light-mode-only) ![nf-core/chipseq](docs/images/nf-core-chipseq_logo_dark.png#gh-dark-mode-only) - -[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/chipseq/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.3240506-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.3240506) - -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg)](https://www.nextflow.io/) +

+ + + nf-core/chipseq + +

+[![GitHub Actions CI Status](https://github.com/nf-core/chipseq/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/chipseq/actions?query=workflow%3A%22nf-core+CI%22) +[![GitHub Actions Linting Status](https://github.com/nf-core/chipseq/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/chipseq/actions?query=workflow%3A%22nf-core+linting%22)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/chipseq/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.3240506-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.3240506) + +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) -[![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/chipseq) +[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/chipseq) -[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23chipseq-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/chipseq)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23chipseq-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/chipseq)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) ## Introduction -**nfcore/chipseq** is a bioinformatics analysis pipeline used for Chromatin ImmunopreciPitation sequencing (ChIP-seq) data. +**nfcore/chipseq** is a bioinformatics analysis pipeline used for Chromatin ImmunoPrecipitation sequencing (ChIP-seq) data. On release, automated continuous integration tests run the pipeline on a [full-sized dataset](https://github.com/nf-core/test-datasets/tree/chipseq#full-test-dataset-origin) on the AWS cloud infrastructure. The dataset consists of FoxA1 (transcription factor) and EZH2 (histone,mark) IP experiments from _Franco et al. 2015_ ([GEO: GSE59530](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE59530), [PMID: 25752574](https://pubmed.ncbi.nlm.nih.gov/25752574/)) and _Popovic et al. 2014_ ([GEO: GSE57632](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE57632), [PMID: 25188243](https://pubmed.ncbi.nlm.nih.gov/25188243/)), respectively. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from running the full-sized tests can be viewed on the [nf-core website](https://nf-co.re/chipseq/results). @@ -26,11 +31,13 @@ You can find numerous talks on the [nf-core events page](https://nf-co.re/events ## Pipeline summary +![nf-core/chipseq metro map](docs/images/nf-core-chipseq_metro_map_grey.png) + 1. Raw read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) 2. Adapter trimming ([`Trim Galore!`](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/)) 3. Choice of multiple aligners 1.([`BWA`](https://sourceforge.net/projects/bio-bwa/files/)) - 2.([`Chromap`](https://github.com/haowenz/chromap)). **For paired-end reads only working until mapping steps, see [here](https://github.com/nf-core/chipseq/issues/291)** + 2.([`Chromap`](https://github.com/haowenz/chromap)) 3.([`Bowtie2`](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml)) 4.([`STAR`](https://github.com/alexdobin/STAR)) 4. Mark duplicates ([`picard`](https://broadinstitute.github.io/picard/)) @@ -52,7 +59,7 @@ You can find numerous talks on the [nf-core events page](https://nf-co.re/events 5. Generate gene-body meta-profile from bigWig files ([`deepTools`](https://deeptools.readthedocs.io/en/develop/content/tools/plotProfile.html)) 6. Calculate genome-wide IP enrichment relative to control ([`deepTools`](https://deeptools.readthedocs.io/en/develop/content/tools/plotFingerprint.html)) 7. Calculate strand cross-correlation peak and ChIP-seq quality measures including NSC and RSC ([`phantompeakqualtools`](https://github.com/kundajelab/phantompeakqualtools)) - 8. Call broad/narrow peaks ([`MACS2`](https://github.com/macs3-project/MACS)) + 8. Call broad/narrow peaks ([`MACS3`](https://github.com/macs3-project/MACS)) 9. Annotate peaks relative to gene features ([`HOMER`](http://homer.ucsd.edu/homer/download.html)) 10. Create consensus peakset across all samples and create tabular file to aid in the filtering of the data ([`BEDTools`](https://github.com/arq5x/bedtools2/)) 11. Count reads in consensus peaks ([`featureCounts`](http://bioinf.wehi.edu.au/featureCounts/)) @@ -60,41 +67,52 @@ You can find numerous talks on the [nf-core events page](https://nf-co.re/events 6. Create IGV session file containing bigWig tracks, peaks and differential sites for data visualisation ([`IGV`](https://software.broadinstitute.org/software/igv/)). 7. Present QC for raw read, alignment, peak-calling and differential binding results ([`MultiQC`](http://multiqc.info/), [`R`](https://www.r-project.org/)) -## Quick Start - -1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`>=21.10.3`) +## Usage -2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) (you can follow [this tutorial](https://singularity-tutorial.github.io/01-installation/)), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility _(you can use [`Conda`](https://conda.io/miniconda.html) both to install Nextflow itself and also to manage software within pipelines. Please only use it within pipelines as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_. +> [!NOTE] +> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. -3. Download the pipeline and test it on a minimal dataset with a single command: +To run on your data, prepare a tab-separated samplesheet with your input data. Please follow the [documentation on samplesheets](https://nf-co.re/chipseq/usage#samplesheet-input) for more details. An example samplesheet for running the pipeline looks as follows: - ```bash - nextflow run nf-core/chipseq -profile test,YOURPROFILE --outdir - ``` +```csv +sample,fastq_1,fastq_2,antibody,control +WT_BCATENIN_IP_REP1,BLA203A1_S27_L006_R1_001.fastq.gz,,BCATENIN,WT_INPUT_REP1 +WT_BCATENIN_IP_REP2,BLA203A25_S16_L001_R1_001.fastq.gz,,BCATENIN,WT_INPUT_REP2 +WT_BCATENIN_IP_REP2,BLA203A25_S16_L002_R1_001.fastq.gz,,BCATENIN,WT_INPUT_REP2 +WT_BCATENIN_IP_REP2,BLA203A25_S16_L003_R1_001.fastq.gz,,BCATENIN,WT_INPUT_REP2 +WT_BCATENIN_IP_REP3,BLA203A49_S40_L001_R1_001.fastq.gz,,BCATENIN,WT_INPUT_REP3 +WT_INPUT_REP1,BLA203A6_S32_L006_R1_001.fastq.gz,,, +WT_INPUT_REP2,BLA203A30_S21_L001_R1_001.fastq.gz,,, +WT_INPUT_REP2,BLA203A30_S21_L002_R1_001.fastq.gz,,, +WT_INPUT_REP3,BLA203A31_S21_L003_R1_001.fastq.gz,,, +``` - Note that some form of configuration will be needed so that Nextflow knows how to fetch the required software. This is usually done in the form of a config profile (`YOURPROFILE` in the example command above). You can chain multiple config profiles in a comma-separated string. +Now, you can run the pipeline using: - > - The pipeline comes with config profiles called `docker`, `singularity`, `podman`, `shifter`, `charliecloud` and `conda` which instruct the pipeline to use the named tool for software management. For example, `-profile test,docker`. - > - Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile ` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment. - > - If you are using `singularity`, please use the [`nf-core download`](https://nf-co.re/tools/#downloading-pipelines-for-offline-use) command to download images first, before running the pipeline. Setting the [`NXF_SINGULARITY_CACHEDIR` or `singularity.cacheDir`](https://www.nextflow.io/docs/latest/singularity.html?#singularity-docker-hub) Nextflow options enables you to store and re-use the images from a central location for future pipeline runs. - > - If you are using `conda`, it is highly recommended to use the [`NXF_CONDA_CACHEDIR` or `conda.cacheDir`](https://www.nextflow.io/docs/latest/conda.html) settings to store the environments in a central location for future pipeline runs. +```bash +nextflow run nf-core/chipseq --input samplesheet.csv --outdir --genome GRCh37 -profile +``` -4. Start running your own analysis! +See [usage docs](https://nf-co.re/chipseq/usage) for all of the available options when running the pipeline. - ```bash - nextflow run nf-core/chipseq --input samplesheet.csv --outdir --genome GRCh37 -profile - ``` +> [!WARNING] +> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; +> see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). -See [usage docs](https://nf-co.re/chipseq/usage) for all of the available options when running the pipeline. +For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/chipseq/usage) and the [parameter documentation](https://nf-co.re/chipseq/parameters). -## Documentation +## Pipeline output -The nf-core/chipseq pipeline comes with documentation about the pipeline: [usage](https://nf-co.re/chipseq/usage) and [output](https://nf-co.re/chipseq/output). +To see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/chipseq/results) tab on the nf-core website pipeline page. +For more details about the output files and reports, please refer to the +[output documentation](https://nf-co.re/chipseq/output). ## Credits These scripts were originally written by Chuan Wang ([@chuan-wang](https://github.com/chuan-wang)) and Phil Ewels ([@ewels](https://github.com/ewels)) for use at the [National Genomics Infrastructure](https://portal.scilifelab.se/genomics/) at [SciLifeLab](http://www.scilifelab.se/) in Stockholm, Sweden. The pipeline was re-implemented by Harshil Patel ([@drpatelh](https://github.com/drpatelh)) from [Seqera Labs, Spain](https://seqera.io/) and converted to Nextflow DSL2 by Jose Espinosa-Carrasco ([@JoseEspinosa](https://github.com/JoseEspinosa)) from [The Comparative Bioinformatics Group](https://www.crg.eu/en/cedric_notredame) at [The Centre for Genomic Regulation, Spain](https://www.crg.eu/). +The pipeline workflow diagram was designe by Sarah Guinchard ([@G-Sarah](https://github.com/G-Sarah)). + Many thanks to others who have helped out and contributed along the way too, including (but not limited to): [@apeltzer](https://github.com/apeltzer), [@bc2zb](https://github.com/bc2zb), [@crickbabs](https://github.com/crickbabs), [@drejom](https://github.com/drejom), [@houghtos](https://github.com/houghtos), [@KevinMenden](https://github.com/KevinMenden), [@mashehu](https://github.com/mashehu), [@pditommaso](https://github.com/pditommaso), [@Rotholandus](https://github.com/Rotholandus), [@sofiahaglund](https://github.com/sofiahaglund), [@tiagochst](https://github.com/tiagochst) and [@winni2k](https://github.com/winni2k). ## Contributions and Support diff --git a/assets/adaptivecard.json b/assets/adaptivecard.json new file mode 100644 index 00000000..8c6fb5a7 --- /dev/null +++ b/assets/adaptivecard.json @@ -0,0 +1,67 @@ +{ + "type": "message", + "attachments": [ + { + "contentType": "application/vnd.microsoft.card.adaptive", + "contentUrl": null, + "content": { + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "msteams": { + "width": "Full" + }, + "type": "AdaptiveCard", + "version": "1.2", + "body": [ + { + "type": "TextBlock", + "size": "Large", + "weight": "Bolder", + "color": "<% if (success) { %>Good<% } else { %>Attention<%} %>", + "text": "nf-core/chipseq v${version} - ${runName}", + "wrap": true + }, + { + "type": "TextBlock", + "spacing": "None", + "text": "Completed at ${dateComplete} (duration: ${duration})", + "isSubtle": true, + "wrap": true + }, + { + "type": "TextBlock", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors. The full error message was: ${errorReport}.<% } %>", + "wrap": true + }, + { + "type": "TextBlock", + "text": "The command used to launch the workflow was as follows:", + "wrap": true + }, + { + "type": "TextBlock", + "text": "${commandLine}", + "isSubtle": true, + "wrap": true + } + ], + "actions": [ + { + "type": "Action.ShowCard", + "title": "Pipeline Configuration", + "card": { + "type": "AdaptiveCard", + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "body": [ + { + "type": "FactSet", + "facts": [<% out << summary.collect{ k,v -> "{\"title\": \"$k\", \"value\" : \"$v\"}"}.join(",\n") %> + ] + } + ] + } + } + ] + } + } + ] +} diff --git a/assets/email_template.html b/assets/email_template.html index 5a4548bb..4398d399 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -12,7 +12,7 @@ -

nf-core/chipseq v${version}

+

nf-core/chipseq ${version}

Run Name: $runName

<% if (!success){ diff --git a/assets/email_template.txt b/assets/email_template.txt index e1b78526..5d158118 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -4,7 +4,7 @@ |\\ | |__ __ / ` / \\ |__) |__ } { | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, `._,._,' - nf-core/chipseq v${version} + nf-core/chipseq ${version} ---------------------------------------------------- Run Name: $runName diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml new file mode 100644 index 00000000..4ff6034f --- /dev/null +++ b/assets/methods_description_template.yml @@ -0,0 +1,29 @@ +id: "nf-core-chipseq-methods-description" +description: "Suggested text and references to use when describing pipeline usage within the methods section of a publication." +section_name: "nf-core/chipseq Methods Description" +section_href: "https://github.com/nf-core/chipseq" +plot_type: "html" +## TODO nf-core: Update the HTML below to your preferred methods description, e.g. add publication citation for this pipeline +## You inject any metadata in the Nextflow '${workflow}' object +data: | +

Methods

+

Data was processed using nf-core/chipseq v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

+

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

+
${workflow.commandLine}
+

${tool_citations}

+

References

+
    +
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. doi: 10.1038/nbt.3820
  • +
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. doi: 10.1038/s41587-020-0439-x
  • +
  • Grüning, B., Dale, R., Sjödin, A., Chapman, B. A., Rowe, J., Tomkins-Tinch, C. H., Valieris, R., Köster, J., & Bioconda Team. (2018). Bioconda: sustainable and comprehensive software distribution for the life sciences. Nature Methods, 15(7), 475–476. doi: 10.1038/s41592-018-0046-7
  • +
  • da Veiga Leprevost, F., Grüning, B. A., Alves Aflitos, S., Röst, H. L., Uszkoreit, J., Barsnes, H., Vaudel, M., Moreno, P., Gatto, L., Weber, J., Bai, M., Jimenez, R. C., Sachsenberg, T., Pfeuffer, J., Vera Alvarez, R., Griss, J., Nesvizhskii, A. I., & Perez-Riverol, Y. (2017). BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics (Oxford, England), 33(16), 2580–2582. doi: 10.1093/bioinformatics/btx192
  • + ${tool_bibliography} +
+
+
Notes:
+
    + ${nodoi_text} +
  • The command above does not include parameters contained in any configs or profiles that may have been used. Ensure the config file is also uploaded with your publication!
  • +
  • You should also cite all software used within this run. Check the "Software Versions" of this report to get version information.
  • +
+
diff --git a/assets/multiqc/frip_score_header.txt b/assets/multiqc/frip_score_header.txt index 82902115..b8c35cd7 100644 --- a/assets/multiqc/frip_score_header.txt +++ b/assets/multiqc/frip_score_header.txt @@ -1,7 +1,7 @@ #id: 'frip_score' -#section_name: 'MERGED LIB: MACS2 FRiP score' +#section_name: 'MERGED LIB: MACS3 FRiP score' #description: "is generated by calculating the fraction of all mapped reads that fall -# into the MACS2 called peak regions. A read must overlap a peak by at least 20% to be counted. +# into the MACS3 called peak regions. A read must overlap a peak by at least 20% to be counted. # See FRiP score." #plot_type: 'bargraph' #anchor: 'frip_score' diff --git a/assets/multiqc/peak_count_header.txt b/assets/multiqc/peak_count_header.txt index aa4dd346..66f6d2d9 100644 --- a/assets/multiqc/peak_count_header.txt +++ b/assets/multiqc/peak_count_header.txt @@ -1,7 +1,7 @@ #id: 'peak_count' -#section_name: 'MERGED LIB: MACS2 peak count' +#section_name: 'MERGED LIB: MACS3 peak count' #description: "is calculated from total number of peaks called by -# MACS2" +# MACS3" #plot_type: 'bargraph' #anchor: 'peak_count' #pconfig: diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 4493905e..4f32837d 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > - This report has been generated by the nf-core/chipseq + This report has been generated by the nf-core/chipseq analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. data_format: "yaml" @@ -44,12 +44,12 @@ module_order: name: "MERGED LIB: SAMTools (unfiltered)" info: "This section of the report shows SAMTools results after merging libraries and before filtering." path_filters: - - "./alignment/mergedLibrary/unfiltered/*.mLb.mkD.sorted.bam*" + - "./alignment/merged_library/unfiltered/*.mLb.mkD.sorted.bam*" - picard: name: "MERGED LIB: Picard (unfiltered)" info: "This section of the report shows picard results after merging libraries and before filtering." path_filters: - - "./alignment/mergedLibrary/unfiltered/picard_metrics/*" + - "./alignment/merged_library/unfiltered/picard_metrics/*" - preseq: name: "MERGED LIB: Preseq (unfiltered)" info: "This section of the report shows Preseq results after merging libraries and before filtering." @@ -57,12 +57,12 @@ module_order: name: "MERGED LIB: SAMTools (filtered)" info: "This section of the report shows SAMTools results after merging libraries and after filtering." path_filters: - - "./alignment/mergedLibrary/filtered/*.mLb.clN.sorted.bam*" + - "./alignment/merged_library/filtered/*.mLb.clN.sorted.bam*" - picard: name: "MERGED LIB: Picard (filtered)" info: "This section of the report shows picard results after merging libraries and after filtering." path_filters: - - "./alignment/mergedLibrary/filtered/picard_metrics/*" + - "./alignment/merged_library/filtered/picard_metrics/*" - deeptools: name: "MERGED LIB: deepTools" anchor: "mlib_deeptools" @@ -72,7 +72,7 @@ module_order: anchor: "mlib_featurecounts" info: "This section of the report shows featureCounts results for the number of reads assigned to merged library consensus peaks." path_filters: - - "./macs2/featurecounts/*.summary" + - "./macs3/featurecounts/*.summary" report_section_order: peak_count: @@ -129,10 +129,12 @@ report_section_order: order: -3400 deseq2_clustering_10: order: -3500 - software_versions: + "nf-core-chipseq-methods-description": order: -3600 - nf-core-chipseq-summary: + software_versions: order: -3700 + "nf-core-chipseq-summary": + order: -3800 custom_plot_config: picard_insert_size: @@ -173,3 +175,5 @@ sp: fn: "*plotProfile*" phantompeakqualtools/out: fn: "*.spp.out" + +disable_version_detection: true diff --git a/assets/nf-core-chipseq_logo_light.png b/assets/nf-core-chipseq_logo_light.png index 21300d1d..ef93395e 100644 Binary files a/assets/nf-core-chipseq_logo_light.png and b/assets/nf-core-chipseq_logo_light.png differ diff --git a/assets/samplesheet_pe.csv b/assets/samplesheet_pe.csv index 3a304f12..17ea2e86 100644 --- a/assets/samplesheet_pe.csv +++ b/assets/samplesheet_pe.csv @@ -1,21 +1,21 @@ -sample,fastq_1,fastq_2,antibody,control -WT_BCATENIN_IP_REP1,BLA203A1_S27_L006_R1_001.fastq.gz,BLA203A1_S27_L006_R2_001.fastq.gz,BCATENIN,WT_INPUT -WT_BCATENIN_IP_REP2,BLA203A25_S16_L001_R1_001.fastq.gz,BLA203A25_S16_L001_R2_001.fastq.gz,BCATENIN,WT_INPUT -WT_BCATENIN_IP_REP2,BLA203A25_S16_L002_R1_001.fastq.gz,BLA203A25_S16_L002_R2_001.fastq.gz,BCATENIN,WT_INPUT -WT_BCATENIN_IP_REP3,BLA203A49_S40_L001_R1_001.fastq.gz,BLA203A49_S40_L001_R2_001.fastq.gz,BCATENIN,WT_INPUT -NAIVE_BCATENIN_IP_REP1,BLA203A7_S60_L001_R1_001.fastq.gz,BLA203A7_S60_L001_R2_001.fastq.gz,BCATENIN,NAIVE_INPUT -NAIVE_BCATENIN_IP_REP2,BLA203A43_S34_L001_R1_001.fastq.gz,BLA203A43_S34_L001_R2_001.fastq.gz,BCATENIN,NAIVE_INPUT -NAIVE_BCATENIN_IP_REP2,BLA203A43_S34_L002_R1_001.fastq.gz,BLA203A43_S34_L002_R2_001.fastq.gz,BCATENIN,NAIVE_INPUT -NAIVE_BCATENIN_IP_REP3,BLA203A64_S55_L001_R1_001.fastq.gz,BLA203A64_S55_L001_R2_001.fastq.gz,BCATENIN,NAIVE_INPUT -WT_TCF4_IP_REP1,BLA203A3_S29_L006_R1_001.fastq.gz,BLA203A3_S29_L006_R2_001.fastq.gz,TCF4,WT_INPUT -WT_TCF4_IP_REP2,BLA203A27_S18_L001_R1_001.fastq.gz,BLA203A27_S18_L001_R2_001.fastq.gz,TCF4,WT_INPUT -WT_TCF4_IP_REP2,BLA203A51_S42_L001_R1_001.fastq.gz,BLA203A51_S42_L001_R2_001.fastq.gz,TCF4,WT_INPUT -NAIVE_TCF4_IP_REP1,BLA203A9_S62_L001_R1_001.fastq.gz,BLA203A9_S62_L001_R2_001.fastq.gz,TCF4,NAIVE_INPUT -NAIVE_TCF4_IP_REP2,BLA203A45_S36_L001_R1_001.fastq.gz,BLA203A45_S36_L001_R2_001.fastq.gz,TCF4,NAIVE_INPUT -NAIVE_TCF4_IP_REP3,BLA203A66_S57_L001_R1_001.fastq.gz,BLA203A66_S57_L001_R2_001.fastq.gz,TCF4,NAIVE_INPUT -WT_INPUT_REP1,BLA203A6_S32_L006_R1_001.fastq.gz,BLA203A6_S32_L006_R2_001.fastq.gz,, -WT_INPUT_REP2,BLA203A30_S21_L001_R1_001.fastq.gz,BLA203A30_S21_L001_R2_001.fastq.gz,, -WT_INPUT_REP3,BLA203A31_S21_L003_R1_001.fastq.gz,BLA203A31_S21_L003_R2_001.fastq.gz,, -NAIVE_INPUT_REP1,BLA203A12_S3_L001_R1_001.fastq.gz,BLA203A12_S3_L001_R2_001.fastq.gz,, -NAIVE_INPUT_REP2,BLA203A48_S39_L001_R1_001.fastq.gz,BLA203A48_S39_L001_R2_001.fastq.gz,, -NAIVE_INPUT_REP3,BLA203A49_S1_L006_R1_001.fastq.gz,BLA203A49_S1_L006_R2_001.fastq.gz,, +sample,fastq_1,fastq_2,replicate,antibody,control,control_replicate +WT_BCATENIN_IP,BLA203A1_S27_L006_R1_001.fastq.gz,BLA203A1_S27_L006_R2_001.fastq.gz,1,BCATENIN,WT_INPUT,1 +WT_BCATENIN_IP,BLA203A25_S16_L001_R1_001.fastq.gz,BLA203A25_S16_L001_R2_001.fastq.gz,2,BCATENIN,WT_INPUT,2 +WT_BCATENIN_IP,BLA203A25_S16_L002_R1_001.fastq.gz,BLA203A25_S16_L002_R2_001.fastq.gz,2,BCATENIN,WT_INPUT,2 +WT_BCATENIN_IP,BLA203A49_S40_L001_R1_001.fastq.gz,BLA203A49_S40_L001_R2_001.fastq.gz,3,BCATENIN,WT_INPUT,3 +NAIVE_BCATENIN_IP,BLA203A7_S60_L001_R1_001.fastq.gz,BLA203A7_S60_L001_R2_001.fastq.gz,1,BCATENIN,NAIVE_INPUT,1 +NAIVE_BCATENIN_IP,BLA203A43_S34_L001_R1_001.fastq.gz,BLA203A43_S34_L001_R2_001.fastq.gz,2,BCATENIN,NAIVE_INPUT,2 +NAIVE_BCATENIN_IP,BLA203A43_S34_L002_R1_001.fastq.gz,BLA203A43_S34_L002_R2_001.fastq.gz,2,BCATENIN,NAIVE_INPUT,2 +NAIVE_BCATENIN_IP,BLA203A64_S55_L001_R1_001.fastq.gz,BLA203A64_S55_L001_R2_001.fastq.gz,3,BCATENIN,NAIVE_INPUT,3 +WT_TCF4_IP,BLA203A3_S29_L006_R1_001.fastq.gz,BLA203A3_S29_L006_R2_001.fastq.gz,1,TCF4,WT_INPUT,1 +WT_TCF4_IP,BLA203A27_S18_L001_R1_001.fastq.gz,BLA203A27_S18_L001_R2_001.fastq.gz,2,TCF4,WT_INPUT,2 +WT_TCF4_IP,BLA203A51_S42_L001_R1_001.fastq.gz,BLA203A51_S42_L001_R2_001.fastq.gz,2,TCF4,WT_INPUT,2 +NAIVE_TCF4_IP,BLA203A9_S62_L001_R1_001.fastq.gz,BLA203A9_S62_L001_R2_001.fastq.gz,1,TCF4,NAIVE_INPUT,1 +NAIVE_TCF4_IP,BLA203A45_S36_L001_R1_001.fastq.gz,BLA203A45_S36_L001_R2_001.fastq.gz,2,TCF4,NAIVE_INPUT,2 +NAIVE_TCF4_IP,BLA203A66_S57_L001_R1_001.fastq.gz,BLA203A66_S57_L001_R2_001.fastq.gz,3,TCF4,NAIVE_INPUT,3 +WT_INPUT,BLA203A6_S32_L006_R1_001.fastq.gz,BLA203A6_S32_L006_R2_001.fastq.gz,1,,, +WT_INPUT,BLA203A30_S21_L001_R1_001.fastq.gz,BLA203A30_S21_L001_R2_001.fastq.gz,2,,, +WT_INPUT,BLA203A31_S21_L003_R1_001.fastq.gz,BLA203A31_S21_L003_R2_001.fastq.gz,3,,, +NAIVE_INPUT,BLA203A12_S3_L001_R1_001.fastq.gz,BLA203A12_S3_L001_R2_001.fastq.gz,1,,, +NAIVE_INPUT,BLA203A48_S39_L001_R1_001.fastq.gz,BLA203A48_S39_L001_R2_001.fastq.gz,2,,, +NAIVE_INPUT,BLA203A49_S1_L006_R1_001.fastq.gz,BLA203A49_S1_L006_R2_001.fastq.gz,3,,, diff --git a/assets/samplesheet_se.csv b/assets/samplesheet_se.csv index a9581d6e..8a0297d7 100644 --- a/assets/samplesheet_se.csv +++ b/assets/samplesheet_se.csv @@ -1,21 +1,21 @@ -sample,fastq_1,fastq_2,antibody,control -WT_BCATENIN_IP_REP1,BLA203A1_S27_L006_R1_001.fastq.gz,,BCATENIN,WT_INPUT -WT_BCATENIN_IP_REP2,BLA203A25_S16_L001_R1_001.fastq.gz,,BCATENIN,WT_INPUT -WT_BCATENIN_IP_REP2,BLA203A25_S16_L002_R1_001.fastq.gz,,BCATENIN,WT_INPUT -WT_BCATENIN_IP_REP3,BLA203A49_S40_L001_R1_001.fastq.gz,,BCATENIN,WT_INPUT -NAIVE_BCATENIN_IP_REP1,BLA203A7_S60_L001_R1_001.fastq.gz,,BCATENIN,NAIVE_INPUT -NAIVE_BCATENIN_IP_REP2,BLA203A43_S34_L001_R1_001.fastq.gz,,BCATENIN,NAIVE_INPUT -NAIVE_BCATENIN_IP_REP2,BLA203A43_S34_L002_R1_001.fastq.gz,,BCATENIN,NAIVE_INPUT -NAIVE_BCATENIN_IP_REP3,BLA203A64_S55_L001_R1_001.fastq.gz,,BCATENIN,NAIVE_INPUT -WT_TCF4_IP_REP1,BLA203A3_S29_L006_R1_001.fastq.gz,,TCF4,WT_INPUT -WT_TCF4_IP_REP2,BLA203A27_S18_L001_R1_001.fastq.gz,,TCF4,WT_INPUT -WT_TCF4_IP_REP3,BLA203A51_S42_L001_R1_001.fastq.gz,,TCF4,WT_INPUT -NAIVE_TCF4_IP_REP1,BLA203A9_S62_L001_R1_001.fastq.gz,,TCF4,NAIVE_INPUT -NAIVE_TCF4_IP_REP2,BLA203A45_S36_L001_R1_001.fastq.gz,,TCF4,NAIVE_INPUT -NAIVE_TCF4_IP_REP3,BLA203A66_S57_L001_R1_001.fastq.gz,,TCF4,NAIVE_INPUT -WT_INPUT_REP1,BLA203A6_S32_L006_R1_001.fastq.gz,,, -WT_INPUT_REP2,BLA203A30_S21_L001_R1_001.fastq.gz,,, -WT_INPUT_REP3,BLA203A31_S21_L003_R1_001.fastq.gz,,, -NAIVE_INPUT_REP1,BLA203A12_S3_L001_R1_001.fastq.gz,,, -NAIVE_INPUT_REP2,BLA203A48_S39_L001_R1_001.fastq.gz,,, -NAIVE_INPUT_REP3,BLA203A49_S1_L006_R1_001.fastq.gz,,, +sample,fastq_1,fastq_2,replicate,antibody,control,control_replicate +WT_BCATENIN_IP,BLA203A1_S27_L006_R1_001.fastq.gz,,1,BCATENIN,WT_INPUT,1 +WT_BCATENIN_IP,BLA203A25_S16_L001_R1_001.fastq.gz,,2,BCATENIN,WT_INPUT,2 +WT_BCATENIN_IP,BLA203A25_S16_L002_R1_001.fastq.gz,,2,BCATENIN,WT_INPUT,2 +WT_BCATENIN_IP,BLA203A49_S40_L001_R1_001.fastq.gz,,3,BCATENIN,WT_INPUT,3 +NAIVE_BCATENIN_IP,BLA203A7_S60_L001_R1_001.fastq.gz,,1,BCATENIN,NAIVE_INPUT,1 +NAIVE_BCATENIN_IP,BLA203A43_S34_L001_R1_001.fastq.gz,,2,BCATENIN,NAIVE_INPUT,2 +NAIVE_BCATENIN_IP,BLA203A43_S34_L002_R1_001.fastq.gz,,2,BCATENIN,NAIVE_INPUT,2 +NAIVE_BCATENIN_IP,BLA203A64_S55_L001_R1_001.fastq.gz,,3,BCATENIN,NAIVE_INPUT,3 +WT_TCF4_IP,BLA203A3_S29_L006_R1_001.fastq.gz,,1,TCF4,WT_INPUT,1 +WT_TCF4_IP,BLA203A27_S18_L001_R1_001.fastq.gz,,2,TCF4,WT_INPUT,2 +WT_TCF4_IP,BLA203A51_S42_L001_R1_001.fastq.gz,,3,TCF4,WT_INPUT,3 +NAIVE_TCF4_IP,BLA203A9_S62_L001_R1_001.fastq.gz,,1,TCF4,NAIVE_INPUT,1 +NAIVE_TCF4_IP,BLA203A45_S36_L001_R1_001.fastq.gz,,2,TCF4,NAIVE_INPUT,2 +NAIVE_TCF4_IP,BLA203A66_S57_L001_R1_001.fastq.gz,,3,TCF4,NAIVE_INPUT,3 +WT_INPUT,BLA203A6_S32_L006_R1_001.fastq.gz,,1,,, +WT_INPUT,BLA203A30_S21_L001_R1_001.fastq.gz,,2,,, +WT_INPUT,BLA203A31_S21_L003_R1_001.fastq.gz,,3,,, +NAIVE_INPUT,BLA203A12_S3_L001_R1_001.fastq.gz,,1,,, +NAIVE_INPUT,BLA203A48_S39_L001_R1_001.fastq.gz,,2,,, +NAIVE_INPUT,BLA203A49_S1_L006_R1_001.fastq.gz,,3,,, diff --git a/assets/schema_input.json b/assets/schema_input.json index cda13e0b..fb41210f 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -10,10 +10,13 @@ "sample": { "type": "string", "pattern": "^\\S+$", - "errorMessage": "Sample name must be provided and cannot contain spaces" + "errorMessage": "Sample name must be provided and cannot contain spaces", + "meta": ["id"] }, "fastq_1": { "type": "string", + "format": "file-path", + "exists": true, "pattern": "^\\S+\\.f(ast)?q\\.gz$", "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" }, @@ -30,15 +33,30 @@ } ] }, + "replicate": { + "type": "integer", + "errorMessage": "Replicate id not an integer!", + "meta": ["replicate"] + }, "antibody": { "type": "string", "pattern": "^\\S+$", - "errorMessage": "Antibody entry cannot contain spaces" + "errorMessage": "Antibody entry cannot contain spaces", + "dependentRequired": ["control"], + "meta": ["antibody"] }, "control": { "type": "string", "pattern": "^\\S+$", - "errorMessage": "Control entry cannot contain spaces" + "errorMessage": "Control entry cannot contain spaces", + "dependentRequired": ["antibody", "control_replicate"], + "meta": ["control"] + }, + "control_replicate": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Control entry cannot contain spaces", + "meta": ["control_replicate"] } }, "required": ["sample", "fastq_1"] diff --git a/assets/slackreport.json b/assets/slackreport.json new file mode 100644 index 00000000..8058c9bf --- /dev/null +++ b/assets/slackreport.json @@ -0,0 +1,34 @@ +{ + "attachments": [ + { + "fallback": "Plain-text summary of the attachment.", + "color": "<% if (success) { %>good<% } else { %>danger<%} %>", + "author_name": "nf-core/chipseq ${version} - ${runName}", + "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", + "fields": [ + { + "title": "Command used to launch the workflow", + "value": "```${commandLine}```", + "short": false + } + <% + if (!success) { %> + , + { + "title": "Full error message", + "value": "```${errorReport}```", + "short": false + }, + { + "title": "Pipeline configuration", + "value": "<% out << summary.collect{ k,v -> k == "hook_url" ? "_${k}_: (_hidden_)" : ( ( v.class.toString().contains('Path') || ( v.class.toString().contains('String') && v.contains('/') ) ) ? "_${k}_: `${v}`" : (v.class.toString().contains('DateTime') ? ("_${k}_: " + v.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM))) : "_${k}_: ${v}") ) }.join(",\n") %>", + "short": false + } + <% } + %> + ], + "footer": "Completed at <% out << dateComplete.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM)) %> (duration: ${duration})" + } + ] +} diff --git a/bin/bampe_rm_orphan.py b/bin/bampe_rm_orphan.py index 4ab9935b..ddba1d6f 100755 --- a/bin/bampe_rm_orphan.py +++ b/bin/bampe_rm_orphan.py @@ -46,7 +46,6 @@ def makedir(path): - if not len(path) == 0: try: os.makedirs(path) @@ -63,7 +62,6 @@ def makedir(path): def bampe_rm_orphan(BAMIn, BAMOut, onlyFRPairs=False): - ## SETUP DIRECTORY/FILE STRUCTURE OutDir = os.path.dirname(BAMOut) makedir(OutDir) @@ -89,7 +87,6 @@ def bampe_rm_orphan(BAMIn, BAMOut, onlyFRPairs=False): ## FILTER FOR READS ON SAME CHROMOSOME IN FR ORIENTATION if onlyFRPairs: if pair1.tid == pair2.tid: - ## READ1 FORWARD AND READ2 REVERSE STRAND if not pair1.is_reverse and pair2.is_reverse: if pair1.reference_start <= pair2.reference_start: diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index eaf0d24c..c7d8add7 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -4,6 +4,7 @@ import sys import errno import argparse +import re def parse_args(args=None): @@ -38,104 +39,126 @@ def print_error(error, context="Line", context_str=""): def check_samplesheet(file_in, file_out): """ This function checks that the samplesheet follows the following structure: - sample,fastq_1,fastq_2,antibody,control - SPT5_T0_REP1,SRR1822153_1.fastq.gz,SRR1822153_2.fastq.gz,SPT5,SPT5_INPUT_REP1 - SPT5_T0_REP2,SRR1822154_1.fastq.gz,SRR1822154_2.fastq.gz,SPT5,SPT5_INPUT_REP2 - SPT5_INPUT_REP1,SRR5204809_Spt5-ChIP_Input1_SacCer_ChIP-Seq_ss100k_R1.fastq.gz,SRR5204809_Spt5-ChIP_Input1_SacCer_ChIP-Seq_ss100k_R2.fastq.gz,, - SPT5_INPUT_REP2,SRR5204810_Spt5-ChIP_Input2_SacCer_ChIP-Seq_ss100k_R1.fastq.gz,SRR5204810_Spt5-ChIP_Input2_SacCer_ChIP-Seq_ss100k_R2.fastq.gz,, + sample,fastq_1,fastq_2,replicate,antibody,control,control_replicate + SPT5_T0,SRR1822153_1.fastq.gz,SRR1822153_2.fastq.gz,SPT5,1,SPT5_INPUT,1 + SPT5_T0,SRR1822154_1.fastq.gz,SRR1822154_2.fastq.gz,SPT5,2,SPT5_INPUT,2 + SPT5_INPUT,SRR5204809_Spt5-ChIP_Input1_SacCer_ChIP-Seq_ss100k_R1.fastq.gz,SRR5204809_Spt5-ChIP_Input1_SacCer_ChIP-Seq_ss100k_R2.fastq.gz,1,,, + SPT5_INPUT,SRR5204810_Spt5-ChIP_Input2_SacCer_ChIP-Seq_ss100k_R1.fastq.gz,SRR5204810_Spt5-ChIP_Input2_SacCer_ChIP-Seq_ss100k_R2.fastq.gz,2,,, For an example see: - https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/samplesheet/v2.0/samplesheet_test.csv + https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/samplesheet/v2.1/samplesheet_test.csv """ sample_mapping_dict = {} with open(file_in, "r", encoding="utf-8-sig") as fin: - ## Check header - MIN_COLS = 2 - HEADER = ["sample", "fastq_1", "fastq_2", "antibody", "control"] + MIN_COLS = 3 + HEADER = ["sample", "fastq_1", "fastq_2", "replicate", "antibody", "control", "control_replicate"] header = [x.strip('"') for x in fin.readline().strip().split(",")] if header[: len(HEADER)] != HEADER: print(f"ERROR: Please check samplesheet header -> {','.join(header)} != {','.join(HEADER)}") sys.exit(1) ## Check sample entries - for line in fin: - lspl = [x.strip().strip('"') for x in line.strip().split(",")] - - # Check valid number of columns per row - if len(lspl) < len(HEADER): - print_error( - "Invalid number of columns (minimum = {})!".format(len(HEADER)), - "Line", - line, - ) - num_cols = len([x for x in lspl if x]) - if num_cols < MIN_COLS: - print_error( - "Invalid number of populated columns (minimum = {})!".format(MIN_COLS), - "Line", - line, - ) - - ## Check sample name entries - sample, fastq_1, fastq_2, antibody, control = lspl[: len(HEADER)] - if sample.find(" ") != -1: - print(f"WARNING: Spaces have been replaced by underscores for sample: {sample}") - sample = sample.replace(" ", "_") - if not sample: - print_error("Sample entry has not been specified!", "Line", line) - - ## Check FastQ file extension - for fastq in [fastq_1, fastq_2]: - if fastq: - if fastq.find(" ") != -1: - print_error("FastQ file contains spaces!", "Line", line) - if not fastq.endswith(".fastq.gz") and not fastq.endswith(".fq.gz"): - print_error( - "FastQ file does not have extension '.fastq.gz' or '.fq.gz'!", - "Line", - line, - ) + for line_number, line in enumerate(fin, start=1): + if line.strip(): + lspl = [x.strip().strip('"') for x in line.strip().split(",")] - ## Check antibody and control columns have valid values - if antibody: - if antibody.find(" ") != -1: - print(f"WARNING: Spaces have been replaced by underscores for antibody: {antibody}") - antibody = antibody.replace(" ", "_") - if not control: + # Check valid number of columns per row + if len(lspl) < len(HEADER): + print_error( + "Invalid number of columns (found = {}, minimum = {})!".format(len(lspl),len(HEADER)), + "Line {}".format(line_number), + line, + ) + num_cols = len([x for x in lspl[: len(HEADER)] if x]) + if num_cols < MIN_COLS: print_error( - "Both antibody and control columns must be specified!", - "Line", + "Invalid number of populated columns (found = {}, minimum = {})!".format(num_cols,MIN_COLS), + "Line {}".format(line_number), line, ) - if control: - if control.find(" ") != -1: - print(f"WARNING: Spaces have been replaced by underscores for control: {control}") - control = control.replace(" ", "_") - if not antibody: + + ## Check sample name entries + sample, fastq_1, fastq_2, replicate, antibody, control, control_replicate = lspl[: len(HEADER)] + if sample.find(" ") != -1: + print(f"WARNING: Spaces have been replaced by underscores for sample: {sample}") + sample = sample.replace(" ", "_") + if not sample: + print_error("Sample entry has not been specified!", "Line {}".format(line_number), line) + if not re.match(r"^[a-zA-Z0-9_.-]+$", sample): print_error( - "Both antibody and control columns must be specified!", - "Line", + "Sample name contains invalid characters! Only alphanumeric characters, underscores, dots and dashes are allowed.", + "Line {}".format(line_number), line, ) - ## Auto-detect paired-end/single-end - sample_info = [] ## [single_end, fastq_1, fastq_2, antibody, control] - if sample and fastq_1 and fastq_2: ## Paired-end short reads - sample_info = ["0", fastq_1, fastq_2, antibody, control] - elif sample and fastq_1 and not fastq_2: ## Single-end short reads - sample_info = ["1", fastq_1, fastq_2, antibody, control] - else: - print_error("Invalid combination of columns provided!", "Line", line) - - ## Create sample mapping dictionary = {sample: [[ single_end, fastq_1, fastq_2, antibody, control ]]} - if sample not in sample_mapping_dict: - sample_mapping_dict[sample] = [sample_info] - else: - if sample_info in sample_mapping_dict[sample]: - print_error("Samplesheet contains duplicate rows!", "Line", line) + ## Check FastQ file extension + for fastq in [fastq_1, fastq_2]: + if fastq: + if fastq.find(" ") != -1: + print_error("FastQ file contains spaces!", "Line {}".format(line_number), line) + if not fastq.endswith(".fastq.gz") and not fastq.endswith(".fq.gz"): + print_error( + "FastQ file does not have extension '.fastq.gz' or '.fq.gz'!", + "Line {}".format(line_number), + line, + ) + + ## Check replicate column is integer + if not replicate.isdecimal(): + print_error("Replicate id not an integer!", "Line {}".format(line_number), line) + sys.exit(1) + + ## Check antibody and control columns have valid values + if antibody: + if antibody.find(" ") != -1: + print(f"WARNING: Spaces have been replaced by underscores for antibody: {antibody}") + antibody = antibody.replace(" ", "_") + if not control: + print_error( + "Both antibody and control columns must be specified!", + "Line {}".format(line_number), + line, + ) + + if control: + if control.find(" ") != -1: + print(f"WARNING: Spaces have been replaced by underscores for control: {control}") + control = control.replace(" ", "_") + if not control_replicate.isdecimal(): + print_error("Control replicate id not an integer!", "Line {}".format(line_number), line) + sys.exit(1) + control = "{}_REP{}".format(control, control_replicate) + if not antibody: + print_error( + "Both antibody and control columns must be specified!", + "Line {}".format(line_number), + line, + ) + + ## Auto-detect paired-end/single-end + sample_info = [] ## [single_end, fastq_1, fastq_2, replicate, antibody, control] + ## Paired-end short reads + if sample and fastq_1 and fastq_2: + sample_info = ["0", fastq_1, fastq_2, replicate, antibody, control] + ## Single-end short reads + elif sample and fastq_1 and not fastq_2: + sample_info = ["1", fastq_1, fastq_2, replicate, antibody, control] + else: + print_error("Invalid combination of columns provided!", "Line {}".format(line_number), line) + + ## Create sample mapping dictionary = {sample: [[ single_end, fastq_1, fastq_2, replicate, antibody, control ]]} + replicate = int(replicate) + sample_info = sample_info + lspl[len(HEADER) :] + if sample not in sample_mapping_dict: + sample_mapping_dict[sample] = {} + if replicate not in sample_mapping_dict[sample]: + sample_mapping_dict[sample][replicate] = [sample_info] else: - sample_mapping_dict[sample].append(sample_info) + if sample_info in sample_mapping_dict[sample][replicate]: + print_error("Samplesheet contains duplicate rows!", "Line {}".format(line_number), line) + else: + sample_mapping_dict[sample][replicate].append(sample_info) ## Write validated samplesheet with appropriate columns if len(sample_mapping_dict) > 0: @@ -149,6 +172,7 @@ def check_samplesheet(file_in, file_out): "single_end", "fastq_1", "fastq_2", + "replicate", "antibody", "control", ] @@ -156,25 +180,60 @@ def check_samplesheet(file_in, file_out): + "\n" ) for sample in sorted(sample_mapping_dict.keys()): + ## Check that replicate ids are in format 1.. + uniq_rep_ids = sorted(list(set(sample_mapping_dict[sample].keys()))) + if len(uniq_rep_ids) != max(uniq_rep_ids) or 1 != min(uniq_rep_ids): + print_error( + "Replicate ids must start with 1..!", + "Sample", + "{}, replicate ids: {}".format(sample, ",".join([str(x) for x in uniq_rep_ids])), + ) + sys.exit(1) - ## Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end - if not all(x[0] == sample_mapping_dict[sample][0][0] for x in sample_mapping_dict[sample]): + ## Check that multiple replicates are of the same datatype i.e. single-end / paired-end + if not all( + x[0][0] == sample_mapping_dict[sample][1][0][0] for x in sample_mapping_dict[sample].values() + ): print_error( - f"Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end!", + f"Multiple replicates of a sample must be of the same datatype i.e. single-end or paired-end!", "Sample", sample, ) - for idx, val in enumerate(sample_mapping_dict[sample]): - control = val[-1] - if control and control not in sample_mapping_dict.keys(): + for replicate in sorted(sample_mapping_dict[sample].keys()): + ## Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end + if not all( + x[0] == sample_mapping_dict[sample][replicate][0][0] + for x in sample_mapping_dict[sample][replicate] + ): print_error( - f"Control identifier has to match does a provided sample identifier!", - "Control", - control, + f"Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end!", + "Sample", + sample, ) - fout.write(",".join([f"{sample}_T{idx+1}"] + val) + "\n") + for idx, val in enumerate(sample_mapping_dict[sample][replicate]): + control = "_REP".join(val[-1].split("_REP")[:-1]) + control_replicate = val[-1].split("_REP")[-1] + if control and ( + control not in sample_mapping_dict.keys() + or int(control_replicate) not in sample_mapping_dict[control].keys() + ): + print_error( + f"Control identifier and replicate has to match a provided sample identifier and replicate!", + "Control", + val[4], + ) + + ## Write to file + for idx in range(len(sample_mapping_dict[sample][replicate])): + fastq_files = sample_mapping_dict[sample][replicate][idx] + sample_id = "{}_REP{}_T{}".format(sample, replicate, idx + 1) + if len(fastq_files) == 1: + fout.write(",".join([sample_id] + fastq_files) + ",\n") + else: + fout.write(",".join([sample_id] + fastq_files) + "\n") + else: print_error(f"No entries to process!", "Samplesheet: {file_in}") diff --git a/bin/igv_files_to_session.py b/bin/igv_files_to_session.py index 629e6cc9..6fdcb1a9 100755 --- a/bin/igv_files_to_session.py +++ b/bin/igv_files_to_session.py @@ -55,7 +55,6 @@ def makedir(path): - if not len(path) == 0: try: os.makedirs(path) @@ -72,7 +71,6 @@ def makedir(path): def igv_files_to_session(XMLOut, ListFile, ReplaceFile, Genome, PathPrefix=""): - makedir(os.path.dirname(XMLOut)) replaceFileDict = {} diff --git a/bin/macs2_merged_expand.py b/bin/macs3_merged_expand.py similarity index 96% rename from bin/macs2_merged_expand.py rename to bin/macs3_merged_expand.py index aa401123..64dc7770 100755 --- a/bin/macs2_merged_expand.py +++ b/bin/macs3_merged_expand.py @@ -17,15 +17,15 @@ ############################################ Description = "Add sample boolean files and aggregate columns from merged MACS narrow or broad peak file." -Epilog = """Example usage: python macs2_merged_expand.py --is_narrow_peak --min_replicates 1""" +Epilog = """Example usage: python macs3_merged_expand.py --is_narrow_peak --min_replicates 1""" argParser = argparse.ArgumentParser(description=Description, epilog=Epilog) ## REQUIRED PARAMETERS -argParser.add_argument("MERGED_INTERVAL_FILE", help="Merged MACS2 interval file created using linux sort and mergeBed.") +argParser.add_argument("MERGED_INTERVAL_FILE", help="Merged MACS3 interval file created using linux sort and mergeBed.") argParser.add_argument( "SAMPLE_NAME_LIST", - help="Comma-separated list of sample names as named in individual MACS2 broadPeak/narrowPeak output file e.g. SAMPLE_R1 for SAMPLE_R1_peak_1.", + help="Comma-separated list of sample names as named in individual MACS3 broadPeak/narrowPeak output file e.g. SAMPLE_R1 for SAMPLE_R1_peak_1.", ) argParser.add_argument("OUTFILE", help="Full path to output directory.") @@ -55,7 +55,6 @@ def makedir(path): - if not len(path) == 0: try: os.makedirs(path) @@ -77,8 +76,7 @@ def makedir(path): ## sort -k1,1 -k2,2n | mergeBed -c 2,3,4,5,6,7,8,9,10 -o collapse,collapse,collapse,collapse,collapse,collapse,collapse,collapse,collapse > merged_peaks.txt -def macs2_merged_expand(MergedIntervalTxtFile, SampleNameList, OutFile, isNarrow=False, minReplicates=1): - +def macs3_merged_expand(MergedIntervalTxtFile, SampleNameList, OutFile, isNarrow=False, minReplicates=1): makedir(os.path.dirname(OutFile)) combFreqDict = {} @@ -210,7 +208,7 @@ def macs2_merged_expand(MergedIntervalTxtFile, SampleNameList, OutFile, isNarrow ############################################ ############################################ -macs2_merged_expand( +macs3_merged_expand( MergedIntervalTxtFile=args.MERGED_INTERVAL_FILE, SampleNameList=args.SAMPLE_NAME_LIST.split(","), OutFile=args.OUTFILE, diff --git a/bin/plot_macs2_qc.r b/bin/plot_macs3_qc.r similarity index 99% rename from bin/plot_macs2_qc.r rename to bin/plot_macs3_qc.r index 5cf074de..e40a6837 100755 --- a/bin/plot_macs2_qc.r +++ b/bin/plot_macs3_qc.r @@ -20,7 +20,7 @@ library(scales) option_list <- list(make_option(c("-i", "--peak_files"), type="character", default=NULL, help="Comma-separated list of peak files.", metavar="path"), make_option(c("-s", "--sample_ids"), type="character", default=NULL, help="Comma-separated list of sample ids associated with peak files. Must be unique and in same order as peaks files input.", metavar="string"), make_option(c("-o", "--outdir"), type="character", default='./', help="Output directory", metavar="path"), - make_option(c("-p", "--outprefix"), type="character", default='macs2_peakqc', help="Output prefix", metavar="string")) + make_option(c("-p", "--outprefix"), type="character", default='macs3_peakqc', help="Output prefix", metavar="string")) opt_parser <- OptionParser(option_list=option_list) opt <- parse_args(opt_parser) diff --git a/conf/base.config b/conf/base.config index b38d1fe3..42ba5a89 100644 --- a/conf/base.config +++ b/conf/base.config @@ -14,7 +14,7 @@ process { memory = { check_max( 6.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } - errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } + errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } maxRetries = 1 maxErrors = '-1' @@ -57,7 +57,4 @@ process { errorStrategy = 'retry' maxRetries = 2 } - withName:CUSTOM_DUMPSOFTWAREVERSIONS { - cache = false - } } diff --git a/conf/igenomes.config b/conf/igenomes.config index 7f282cee..84f38729 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -48,6 +48,14 @@ params { "200" : 2892537351 ] } + 'CHM13' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAIndex/" + bwamem2 = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAmem2Index/" + gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/CHM13/Annotation/Genes/genes.gtf" + gff = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/914/755/GCF_009914755.1_T2T-CHM13v2.0/GCF_009914755.1_T2T-CHM13v2.0_genomic.gff.gz" + mito_name = "chrM" + } 'GRCm38' { fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/version0.6.0/" @@ -655,11 +663,11 @@ params { readme = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Annotation/README.txt" mito_name = "chrM" macs_gsize = [ - "50" : "11624332", - "75" : "11693438", - "100" : "11777680", - "150" : "11783749", - "200" : "11825681" + "50" : 11624332, + "75" : 11693438, + "100" : 11777680, + "150" : 11783749, + "200" : 11825681 ] } 'susScr3' { diff --git a/conf/modules.config b/conf/modules.config index b46fec83..22ae4ec0 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -21,7 +21,8 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - withName: 'NFCORE_CHIPSEQ:CHIPSEQ:INPUT_CHECK:SAMPLESHEET_CHECK' { + withName: 'SAMPLESHEET_CHECK' { + ext.args = 'samplesheet.valid.csv' publishDir = [ path: { "${params.outdir}/pipeline_info" }, mode: params.publish_dir_mode, @@ -29,12 +30,8 @@ process { ] } - withName: CUSTOM_DUMPSOFTWAREVERSIONS { - publishDir = [ - path: { "${params.outdir}/pipeline_info" }, - mode: params.publish_dir_mode, - pattern: '*_versions.yml' - ] + withName: 'KHMER_UNIQUEKMERS' { + publishDir = [ enabled: false ] } } @@ -71,15 +68,6 @@ process { ] } - withName: 'UNTAR_CHROMAP_INDEX|CHROMAP_INDEX' { - publishDir = [ - path: { "${params.outdir}/genome/index/chromap" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference - ] - } - withName: 'GFFREAD' { ext.args = '--keep-exon-attrs -F -T' publishDir = [ @@ -111,7 +99,8 @@ process { publishDir = [ path: { "${params.outdir}/genome" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference ] } } @@ -122,7 +111,7 @@ process { if (!(params.skip_fastqc || params.skip_qc)) { process { - withName: '.*:FASTQC_TRIMGALORE:FASTQC' { + withName: 'FASTQC' { ext.args = '--quiet' publishDir = [ [ @@ -142,11 +131,17 @@ if (!(params.skip_fastqc || params.skip_qc)) { if (!params.skip_trimming) { process { - withName: '.*:FASTQC_TRIMGALORE:TRIMGALORE' { - ext.args = [ - '--fastqc', - params.trim_nextseq > 0 ? "--nextseq ${params.trim_nextseq}" : '' - ].join(' ').trim() + withName: 'TRIMGALORE' { + ext.args = { + [ + '--fastqc', + params.trim_nextseq > 0 ? "--nextseq ${params.trim_nextseq}" : '', + params.clip_r1 > 0 ? "--clip_r1 ${params.clip_r1}" : '', + params.clip_r2 > 0 ? "--clip_r2 ${params.clip_r2}" : '', + params.three_prime_clip_r1 > 0 ? "--three_prime_clip_r1 ${params.three_prime_clip_r1}" : '', + params.three_prime_clip_r2 > 0 ? "--three_prime_clip_r2 ${params.three_prime_clip_r2}" : '' + ].join(' ').trim() + } publishDir = [ [ path: { "${params.outdir}/trimgalore/fastqc" }, @@ -175,7 +170,7 @@ if (!params.skip_trimming) { } process { - withName: 'NFCORE_CHIPSEQ:CHIPSEQ:ALIGN_.*:BAM_SORT_SAMTOOLS:SAMTOOLS_SORT' { + withName: 'NFCORE_CHIPSEQ:CHIPSEQ:.*:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' { ext.prefix = { "${meta.id}.Lb.sorted" } publishDir = [ path: { "${params.outdir}/${params.aligner}/library" }, @@ -185,7 +180,7 @@ process { ] } - withName: 'NFCORE_CHIPSEQ:CHIPSEQ:ALIGN_.*:BAM_SORT_SAMTOOLS:SAMTOOLS_INDEX' { + withName: 'NFCORE_CHIPSEQ:CHIPSEQ:.*:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' { publishDir = [ path: { "${params.outdir}/${params.aligner}/library" }, mode: params.publish_dir_mode, @@ -194,7 +189,7 @@ process { ] } - withName: 'NFCORE_CHIPSEQ:CHIPSEQ:ALIGN_.*:BAM_SORT_SAMTOOLS:BAM_STATS_SAMTOOLS:SAMTOOLS_.*' { + withName: 'NFCORE_CHIPSEQ:CHIPSEQ:.*:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:SAMTOOLS_.*' { ext.prefix = { "${meta.id}.Lb.sorted.bam" } publishDir = [ path: { "${params.outdir}/${params.aligner}/library/samtools_stats/" }, @@ -208,11 +203,13 @@ process { if (params.aligner == 'bwa') { process { withName: 'BWA_MEM' { - ext.args = { [ - '-M', - params.bwa_min_score ? " -T ${params.bwa_min_score}" : '', - meta.read_group ? "-R ${meta.read_group}": '' - ].join(' ').trim() } + ext.args = { + [ + '-M', + params.bwa_min_score ? " -T ${params.bwa_min_score}" : '', + meta.read_group ? "-R ${meta.read_group}": '' + ].join(' ').trim() + } ext.args2 = '-bhS -F 0x0100 -O BAM' ext.prefix = { "${meta.id}.Lb" } publishDir = [ @@ -228,7 +225,12 @@ if (params.aligner == 'bwa') { if (params.aligner == 'bowtie2') { process { withName: 'BOWTIE2_ALIGN' { - ext.args = '' + ext.args = { + [ + meta.read_group ? "--rg-id ${meta.id} --rg SM:${meta.id - ~/_T\d+$/} --rg PL:ILLUMINA --rg LB:${meta.id} --rg PU:1" : '', + params.seq_center ? "--rg CN:${params.seq_center}" : '' + ].join(' ').trim() + } ext.prefix = { "${meta.id}.Lb" } publishDir = [ [ @@ -250,14 +252,14 @@ if (params.aligner == 'bowtie2') { if (params.aligner == 'chromap') { process { - withName: CHROMAP_INDEX { - ext.args = '' + withName: 'CHROMAP_INDEX' { publishDir = [ - path: { "${params.outdir}/genome/${params.aligner}/index" }, + path: { "${params.outdir}/genome/index/${params.aligner}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + withName: CHROMAP_CHROMAP { ext.args = '-l 2000 --low-mem --SAM' ext.prefix = { "${meta.id}.Lb" } @@ -311,20 +313,25 @@ process { withName: 'PICARD_MERGESAMFILES' { ext.args = '--SORT_ORDER coordinate --VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp' ext.prefix = { "${meta.id}.mLb.sorted" } - publishDir = [ enabled: false ] + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library" }, + mode: params.publish_dir_mode, + pattern: '*.bam', + enabled: params.save_align_intermeds + ] } - withName: '.*:MARK_DUPLICATES_PICARD:PICARD_MARKDUPLICATES' { + withName: '.*:BAM_MARKDUPLICATES_PICARD:PICARD_MARKDUPLICATES' { ext.args = '--ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp' ext.prefix = { "${meta.id}.mLb.mkD.sorted" } publishDir = [ [ - path: { "${params.outdir}/${params.aligner}/mergedLibrary/picard_metrics" }, + path: { "${params.outdir}/${params.aligner}/merged_library/picard_metrics" }, mode: params.publish_dir_mode, pattern: '*.metrics.txt' ], [ - path: { "${params.outdir}/${params.aligner}/mergedLibrary" }, + path: { "${params.outdir}/${params.aligner}/merged_library" }, mode: params.publish_dir_mode, pattern: '*.bam', enabled: params.save_align_intermeds @@ -332,72 +339,86 @@ process { ] } - withName: '.*:MARK_DUPLICATES_PICARD:SAMTOOLS_INDEX' { + withName: '.*:BAM_MARKDUPLICATES_PICARD:SAMTOOLS_INDEX' { publishDir = [ - path: { "${params.outdir}/${params.aligner}/mergedLibrary" }, + path: { "${params.outdir}/${params.aligner}/merged_library" }, mode: params.publish_dir_mode, pattern: '*.{bai,csi}', enabled: params.save_align_intermeds ] } - withName: '.*:MARK_DUPLICATES_PICARD:BAM_STATS_SAMTOOLS:.*' { + withName: '.*:BAM_MARKDUPLICATES_PICARD:BAM_STATS_SAMTOOLS:.*' { ext.prefix = { "${meta.id}.mLb.mkD.sorted.bam" } publishDir = [ - path: { "${params.outdir}/${params.aligner}/mergedLibrary/samtools_stats" }, + path: { "${params.outdir}/${params.aligner}/merged_library/samtools_stats" }, mode: params.publish_dir_mode, pattern: '*.{stats,flagstat,idxstats}' ] } - // Should only be published when paired end data is used and save_align_intermeds is true - withName: 'BAM_FILTER' { - ext.prefix = { meta.single_end ? "${meta.id}.mLb.noPublish" : "${meta.id}.mLb.flT.sorted" } + withName: 'BAMTOOLS_FILTER' { + ext.args = { + [ + meta.single_end ? '-F 0x004' : '-F 0x004 -F 0x0008 -f 0x001', + params.keep_dups ? '' : '-F 0x0400', + params.keep_multi_map ? '' : '-q 1' + ].join(' ').trim() + } + ext.prefix = { meta.single_end ? "${meta.id}.mLb.clN.sorted" : "${meta.id}.mLb.flT.sorted" } publishDir = [ - path: { "${params.outdir}/${params.aligner}/mergedLibrary" }, + path: { "${params.outdir}/${params.aligner}/merged_library" }, mode: params.publish_dir_mode, - pattern: '*.mLb.flT.sorted.bam', - enabled: params.save_align_intermeds + pattern: '*.bam', + saveAs: { (meta.single_end || params.save_align_intermeds) ? "${it}" : null } ] } - withName: 'BAM_REMOVE_ORPHANS' { - ext.args = '--only_fr_pairs' - ext.prefix = { "${meta.id}.mLb.clN" } - publishDir = [ enabled: false ] + withName: 'NFCORE_CHIPSEQ:CHIPSEQ:BAM_FILTER_BAMTOOLS:SAMTOOLS_INDEX' { + ext.prefix = { "${meta.id}.mLb.clN.sorted" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library" }, + mode: params.publish_dir_mode, + pattern: '*.{bai,csi}' + ] } - withName: 'NFCORE_CHIPSEQ:CHIPSEQ:FILTER_BAM_BAMTOOLS:BAM_SORT_SAMTOOLS:SAMTOOLS_SORT' { - ext.prefix = { "${meta.id}.mLb.clN.sorted" } + withName: 'NFCORE_CHIPSEQ:CHIPSEQ:BAM_FILTER_BAMTOOLS:BAM_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.mLb.clN.sorted.bam" } publishDir = [ - path: { "${params.outdir}/${params.aligner}/mergedLibrary" }, + path: { "${params.outdir}/${params.aligner}/merged_library/samtools_stats" }, mode: params.publish_dir_mode, - pattern: '*.bam' + pattern: "*.{stats,flagstat,idxstats}" ] } - withName: 'NFCORE_CHIPSEQ:CHIPSEQ:FILTER_BAM_BAMTOOLS:BAM_SORT_SAMTOOLS:SAMTOOLS_INDEX' { - ext.prefix = { "${meta.id}.mLb.clN.sorted" } + withName: 'NFCORE_CHIPSEQ:CHIPSEQ:BAM_FILTER_BAMTOOLS:SAMTOOLS_SORT' { + ext.args = '-n' + ext.prefix = { "${meta.id}.mLb.flT.name_sorted" } publishDir = [ - path: { "${params.outdir}/${params.aligner}/mergedLibrary" }, + path: { "${params.outdir}/${params.aligner}/merged_library" }, mode: params.publish_dir_mode, - pattern: '*.{bai,csi}' + pattern: '*.bam', + enabled: params.save_align_intermeds ] } - withName: 'NFCORE_CHIPSEQ:CHIPSEQ:FILTER_BAM_BAMTOOLS:BAM_SORT_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { - ext.prefix = { "${meta.id}.mLb.clN.sorted.bam" } + withName: 'BAM_REMOVE_ORPHANS' { + ext.args = '--only_fr_pairs' + ext.prefix = { "${meta.id}.mLb.clN" } publishDir = [ - path: { "${params.outdir}/${params.aligner}/mergedLibrary/samtools_stats" }, + path: { "${params.outdir}/${params.aligner}/merged_library" }, mode: params.publish_dir_mode, - pattern: "*.{stats,flagstat,idxstats}" + pattern: '*.bam', + enabled: params.save_align_intermeds ] } withName: 'PHANTOMPEAKQUALTOOLS' { + ext.args = { "--max-ppsize=500000" } ext.args2 = { "-p=$task.cpus" } publishDir = [ - path: { "${params.outdir}/${params.aligner}/mergedLibrary/phantompeakqualtools" }, + path: { "${params.outdir}/${params.aligner}/merged_library/phantompeakqualtools" }, mode: params.publish_dir_mode, pattern: "*.{out,pdf}" ] @@ -405,25 +426,33 @@ process { withName: 'MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS' { publishDir = [ - path: { "${params.outdir}/${params.aligner}/mergedLibrary/phantompeakqualtools" }, + path: { "${params.outdir}/${params.aligner}/merged_library/phantompeakqualtools" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: 'BEDTOOLS_GENOMECOV' { - ext.prefix = { "${meta.id}" } + withName: '.*:BAM_BEDGRAPH_BIGWIG_BEDTOOLS_UCSC:BEDTOOLS_GENOMECOV' { + ext.args = { (meta.single_end && params.fragment_size > 0) ? "-fs ${params.fragment_size}" : '' } + ext.prefix = { "${meta.id}.mLB.clN" } publishDir = [ - path: { "${params.outdir}/${params.aligner}/mergedLibrary/bigwig/scale" }, - mode: params.publish_dir_mode, - pattern: "*.txt" + [ + path: { "${params.outdir}/${params.aligner}/merged_library/bigwig" }, + mode: params.publish_dir_mode, + pattern: "*.bigWig" + ], + [ + path: { "${params.outdir}/${params.aligner}/merged_library/bigwig/scale" }, + mode: params.publish_dir_mode, + pattern: "*.txt" + ] ] } - withName: 'UCSC_BEDGRAPHTOBIGWIG' { - ext.prefix = { "${meta.id}" } + withName: '.*:BAM_BEDGRAPH_BIGWIG_BEDTOOLS_UCSC:UCSC_BEDGRAPHTOBIGWIG' { + ext.prefix = { "${meta.id}.mLb.clN" } publishDir = [ - path: { "${params.outdir}/${params.aligner}/mergedLibrary/bigwig" }, + path: { "${params.outdir}/${params.aligner}/merged_library/bigwig" }, mode: params.publish_dir_mode, pattern: "*.bigWig" ] @@ -437,12 +466,12 @@ if (!params.skip_picard_metrics) { ext.prefix = { "${meta.id}.mLb.clN.sorted" } publishDir = [ [ - path: { "${params.outdir}/${params.aligner}/mergedLibrary/picard_metrics" }, + path: { "${params.outdir}/${params.aligner}/merged_library/picard_metrics" }, mode: params.publish_dir_mode, pattern: "*_metrics" ], [ - path: { "${params.outdir}/${params.aligner}/mergedLibrary/picard_metrics/pdf" }, + path: { "${params.outdir}/${params.aligner}/merged_library/picard_metrics/pdf" }, mode: params.publish_dir_mode, pattern: "*.pdf" ] @@ -455,9 +484,9 @@ if (!params.skip_preseq) { process { withName: 'PRESEQ_LCEXTRAP' { ext.args = '-verbose -bam -seed 1' - ext.prefix = { "${meta.id}.mLb.clN" } + ext.prefix = { "${meta.id}.mLb.mkD" } publishDir = [ - path: { "${params.outdir}/${params.aligner}/mergedLibrary/preseq" }, + path: { "${params.outdir}/${params.aligner}/merged_library/preseq" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -468,10 +497,10 @@ if (!params.skip_preseq) { if (!params.skip_plot_profile) { process { withName: 'DEEPTOOLS_COMPUTEMATRIX' { - ext.args = 'scale-regions --regionBodyLength 1000 --beforeRegionStartLength 3000 --afterRegionStartLength 3000 --skipZeros --smartLabels' + ext.args = 'scale-regions --regionBodyLength 1000 --beforeRegionStartLength 3000 --afterRegionStartLength 3000 --missingDataAsZero --skipZeros --smartLabels' ext.prefix = { "${meta.id}.mLb.clN" } publishDir = [ - path: { "${params.outdir}/${params.aligner}/mergedLibrary/deepTools/plotProfile" }, + path: { "${params.outdir}/${params.aligner}/merged_library/deepTools/plotProfile" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -480,7 +509,7 @@ if (!params.skip_plot_profile) { withName: 'DEEPTOOLS_PLOTPROFILE' { ext.prefix = { "${meta.id}.mLb.clN" } publishDir = [ - path: { "${params.outdir}/${params.aligner}/mergedLibrary/deepTools/plotProfile" }, + path: { "${params.outdir}/${params.aligner}/merged_library/deepTools/plotProfile" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -489,7 +518,7 @@ if (!params.skip_plot_profile) { withName: 'DEEPTOOLS_PLOTHEATMAP' { ext.prefix = { "${meta.id}.mLb.clN" } publishDir = [ - path: { "${params.outdir}/${params.aligner}/mergedLibrary/deepTools/plotProfile" }, + path: { "${params.outdir}/${params.aligner}/merged_library/deepTools/plotProfile" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -497,12 +526,6 @@ if (!params.skip_plot_profile) { } } -process { - withName: 'KHMER_UNIQUEKMERS' { - publishDir = [ enabled: false ] - } -} - if (!params.skip_plot_fingerprint) { process { withName: 'DEEPTOOLS_PLOTFINGERPRINT' { @@ -513,7 +536,7 @@ if (!params.skip_plot_fingerprint) { ].join(' ').trim() } ext.prefix = { "${meta.id}.mLb.clN" } publishDir = [ - path: { "${params.outdir}/${params.aligner}/mergedLibrary/deepTools/plotFingerprint" }, + path: { "${params.outdir}/${params.aligner}/merged_library/deepTools/plotFingerprint" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -522,20 +545,17 @@ if (!params.skip_plot_fingerprint) { } process { - withName: 'MACS2_CALLPEAK' { + withName: 'MACS3_CALLPEAK' { ext.args = [ '--keep-dup all', params.narrow_peak ? '' : "--broad --broad-cutoff ${params.broad_cutoff}", params.save_macs_pileup ? '--bdg --SPMR' : '', params.macs_fdr ? "--qvalue ${params.macs_fdr}" : '', params.macs_pvalue ? "--pvalue ${params.macs_pvalue}" : '', - params.aligner == "chromap" ? "--format BAM" : '' + params.aligner == "chromap" ? "--format BAM" : '' //TODO check if not needed anymore with new chromap versions ].join(' ').trim() publishDir = [ - path: { [ - "${params.outdir}/${params.aligner}/mergedLibrary/macs2", - params.narrow_peak? '/narrowPeak' : '/broadPeak' - ].join('') }, + path: { "${params.outdir}/${params.aligner}/merged_library/macs3/${params.narrow_peak ? 'narrow_peak' : 'broad_peak'}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -544,22 +564,14 @@ process { withName: 'FRIP_SCORE' { ext.args = '-bed -c -f 0.20' publishDir = [ - path: { [ - "${params.outdir}/${params.aligner}/mergedLibrary/macs2", - params.narrow_peak? '/narrowPeak' : '/broadPeak', - '/qc' - ].join('') }, + path: { "${params.outdir}/${params.aligner}/merged_library/macs3/${params.narrow_peak ? 'narrow_peak' : 'broad_peak'}/qc" }, enabled: false ] } withName: 'MULTIQC_CUSTOM_PEAKS' { publishDir = [ - path: { [ - "${params.outdir}/${params.aligner}/mergedLibrary/macs2", - params.narrow_peak? '/narrowPeak' : '/broadPeak', - '/qc' - ].join('') }, + path: { "${params.outdir}/${params.aligner}/merged_library/macs3/${params.narrow_peak ? 'narrow_peak' : 'broad_peak'}/qc" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -568,14 +580,11 @@ process { if (!params.skip_peak_annotation) { process { - withName: 'HOMER_ANNOTATEPEAKS_MACS2' { + withName: '.*:BAM_PEAKS_CALL_QC_ANNOTATE_MACS3_HOMER:HOMER_ANNOTATEPEAKS' { ext.args = '-gid' ext.prefix = { "${meta.id}_peaks" } publishDir = [ - path: { [ - "${params.outdir}/${params.aligner}/mergedLibrary/macs2", - params.narrow_peak? '/narrowPeak' : '/broadPeak' - ].join('') }, + path: { "${params.outdir}/${params.aligner}/merged_library/macs3/${params.narrow_peak ? 'narrow_peak' : 'broad_peak'}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -584,14 +593,10 @@ if (!params.skip_peak_annotation) { if (!params.skip_peak_qc) { process { - withName: 'PLOT_MACS2_QC' { - ext.args = '-o ./ -p macs2_peak' + withName: 'PLOT_MACS3_QC' { + ext.args = '-o ./ -p macs3_peak' publishDir = [ - path: { [ - "${params.outdir}/${params.aligner}/mergedLibrary/macs2", - params.narrow_peak? '/narrowPeak' : '/broadPeak', - '/qc' - ].join('') }, + path: { "${params.outdir}/${params.aligner}/merged_library/macs3/${params.narrow_peak ? 'narrow_peak' : 'broad_peak'}/qc" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -599,13 +604,9 @@ if (!params.skip_peak_annotation) { withName: 'PLOT_HOMER_ANNOTATEPEAKS' { ext.args = '-o ./' - ext.prefix = 'macs2_annotatePeaks' + ext.prefix = 'macs3_annotatePeaks' publishDir = [ - path: { [ - "${params.outdir}/${params.aligner}/mergedLibrary/macs2", - params.narrow_peak? '/narrowPeak' : '/broadPeak', - '/qc' - ].join('') }, + path: { "${params.outdir}/${params.aligner}/merged_library/macs3/${params.narrow_peak ? 'narrow_peak' : 'broad_peak'}/qc" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -616,16 +617,11 @@ if (!params.skip_peak_annotation) { if (!params.skip_consensus_peaks) { process { - withName: 'MACS2_CONSENSUS' { + withName: 'MACS3_CONSENSUS' { ext.when = { meta.multiple_groups || meta.replicates_exist } ext.prefix = { "${meta.id}.consensus_peaks" } publishDir = [ - path: { [ - "${params.outdir}/${params.aligner}/mergedLibrary/macs2", - params.narrow_peak? '/narrowPeak' : '/broadPeak', - '/consensus', - "/${meta.id}" - ].join('') }, + path: { "${params.outdir}/${params.aligner}/merged_library/macs3/${params.narrow_peak ? 'narrow_peak' : 'broad_peak'}/consensus/${meta.id}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -635,12 +631,7 @@ if (!params.skip_consensus_peaks) { ext.args = '-F SAF -O --fracOverlap 0.2' ext.prefix = { "${meta.id}.consensus_peaks" } publishDir = [ - path: { [ - "${params.outdir}/${params.aligner}/mergedLibrary/macs2", - params.narrow_peak? '/narrowPeak' : '/broadPeak', - '/consensus', - "/${meta.id}" - ].join('') }, + path: { "${params.outdir}/${params.aligner}/merged_library/macs3/${params.narrow_peak ? 'narrow_peak' : 'broad_peak'}/consensus/${meta.id}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -649,16 +640,11 @@ if (!params.skip_consensus_peaks) { if (!params.skip_peak_annotation) { process { - withName: 'HOMER_ANNOTATEPEAKS_CONSENSUS' { + withName: '.*:BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2:HOMER_ANNOTATEPEAKS' { ext.args = '-gid' ext.prefix = { "${meta.id}.consensus_peaks" } publishDir = [ - path: { [ - "${params.outdir}/${params.aligner}/mergedLibrary/macs2", - params.narrow_peak? '/narrowPeak' : '/broadPeak', - '/consensus', - "/${meta.id}" - ].join('') }, + path: { "${params.outdir}/${params.aligner}/merged_library/macs3/${params.narrow_peak ? 'narrow_peak' : 'broad_peak'}/consensus/${meta.id}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -667,12 +653,7 @@ if (!params.skip_consensus_peaks) { withName: 'ANNOTATE_BOOLEAN_PEAKS' { ext.prefix = { "${meta.id}.consensus_peaks" } publishDir = [ - path: { [ - "${params.outdir}/${params.aligner}/mergedLibrary/macs2", - params.narrow_peak? '/narrowPeak' : '/broadPeak', - '/consensus', - "/${meta.id}" - ].join('') }, + path: { "${params.outdir}/${params.aligner}/merged_library/macs3/${params.narrow_peak ? 'narrow_peak' : 'broad_peak'}/consensus/${meta.id}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -682,7 +663,7 @@ if (!params.skip_consensus_peaks) { if (!params.skip_deseq2_qc) { process { - withName: DESEQ2_QC { + withName: 'DESEQ2_QC' { ext.when = { meta.multiple_groups && meta.replicates_exist } ext.args = [ '--id_col 1', @@ -692,13 +673,7 @@ if (!params.skip_consensus_peaks) { ].join(' ').trim() ext.prefix = { "${meta.id}.consensus_peaks" } publishDir = [ - path: { [ - "${params.outdir}/${params.aligner}/mergedLibrary/macs2", - params.narrow_peak? '/narrowPeak' : '/broadPeak', - '/consensus', - "/${meta.id}", - '/deseq2' - ].join('') }, + path: { "${params.outdir}/${params.aligner}/merged_library/macs3/${params.narrow_peak ? 'narrow_peak' : 'broad_peak'}/consensus/${meta.id}/deseq2" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -711,12 +686,16 @@ if (!params.skip_igv) { process { withName: 'IGV' { publishDir = [ - path: { [ - "${params.outdir}/igv", - params.narrow_peak? '/narrowPeak' : '/broadPeak' - ].join('') }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + [ + path: { "${params.outdir}/igv/${params.narrow_peak ? 'narrow_peak' : 'broad_peak'}" }, + mode: params.publish_dir_mode, + pattern: '*.{txt,xml}' + ], + [ + path: { "${params.outdir}/genome" }, + mode: params.publish_dir_mode, + pattern: '*.{fa,fasta}' + ] ] } } @@ -727,10 +706,7 @@ if (!params.skip_multiqc) { withName: 'MULTIQC' { ext.args = params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' publishDir = [ - path: { [ - "${params.outdir}/multiqc", - params.narrow_peak? '/narrowPeak' : '/broadPeak' - ].join('') }, + path: { "${params.outdir}/multiqc/${params.narrow_peak ? 'narrow_peak' : 'broad_peak'}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] diff --git a/conf/test.config b/conf/test.config index 9b24bc9a..3601340b 100644 --- a/conf/test.config +++ b/conf/test.config @@ -20,12 +20,12 @@ params { max_time = '6.h' // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/samplesheet/v2.0/samplesheet_test.csv' + input = params.pipelines_testdata_base_path + 'chipseq/samplesheet/v2.1/samplesheet_test.csv' read_length = 50 // Genome references - fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/reference/genome.fa' - gtf = 'https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/reference/genes.gtf' + fasta = params.pipelines_testdata_base_path + 'atacseq/reference/genome.fa' + gtf = params.pipelines_testdata_base_path + 'atacseq/reference/genes.gtf' // For speed to avoid CI time-out fingerprint_bins = 100 diff --git a/conf/test_full.config b/conf/test_full.config index d25c37d7..ae3a6146 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -15,7 +15,7 @@ params { config_profile_description = 'Full test dataset to check pipeline function' // Input data for full size test - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/samplesheet/v2.0/samplesheet_full.csv' + input = params.pipelines_testdata_base_path + 'chipseq/samplesheet/v2.1/samplesheet_full.csv' // Used to calculate --macs_gsize read_length = 50 diff --git a/docs/images/mqc_macs2_peak_count_plot.png b/docs/images/mqc_macs3_peak_count_plot.png similarity index 100% rename from docs/images/mqc_macs2_peak_count_plot.png rename to docs/images/mqc_macs3_peak_count_plot.png diff --git a/docs/images/nf-core-chipseq_logo_dark.png b/docs/images/nf-core-chipseq_logo_dark.png index 9f2b3010..b72c0163 100644 Binary files a/docs/images/nf-core-chipseq_logo_dark.png and b/docs/images/nf-core-chipseq_logo_dark.png differ diff --git a/docs/images/nf-core-chipseq_logo_light.png b/docs/images/nf-core-chipseq_logo_light.png index 51a83b3d..21283bb1 100644 Binary files a/docs/images/nf-core-chipseq_logo_light.png and b/docs/images/nf-core-chipseq_logo_light.png differ diff --git a/docs/images/nf-core-chipseq_metro_map_grey.png b/docs/images/nf-core-chipseq_metro_map_grey.png new file mode 100644 index 00000000..c9ff4b0b Binary files /dev/null and b/docs/images/nf-core-chipseq_metro_map_grey.png differ diff --git a/docs/images/nf-core-chipseq_metro_map_grey.svg b/docs/images/nf-core-chipseq_metro_map_grey.svg new file mode 100644 index 00000000..4bf901ee --- /dev/null +++ b/docs/images/nf-core-chipseq_metro_map_grey.svg @@ -0,0 +1,2825 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + r + + + + + + + + + + fasta + + + + + + + GFF + + + + + + + fastq + + + + + + + HTML + + Pre-processing + Genome alignment + + + + + + + + + Cutadapt + FastQC + FastQC + + IGV + phantompeakqualtools + MultiQC + + STAR + BWA + Peak calling & ChIP QC + + + + + + BAM + + + + + + + BAI + + + + + + + bigWig + + + + + + + + + + + + + + + + + + Bowtie2 + Chromap + + + + + + + + + + + + + + + + + + + + Picard¹ + Filtering² + + + + + + + + + + + + + + + + + + Picard + Preseq + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Alignment QC + + Deeptools + BedgraphtoBigWig + Bedtools + + + + + + + + + + + + + IP enrichment analysis + + + + + + + + + + + + + + + + + + + + MACS3 + Homer + Bedtools + FeatureCounts + R+DESeq2 + + + + + + + + + + + + + + + XML + + + + + + + + + tab + + + + 1. + + 2. + Picard Mark duplicatesMerge alignments from multiple libraries of the same sample + SAMtoolsBEDtoolsBAMtools (paired-ends only)Pysam (paired-ends only) + + + License: + + + + + + + + + + + + + + + + + + + diff --git a/docs/output.md b/docs/output.md index b3198554..2f0ee320 100644 --- a/docs/output.md +++ b/docs/output.md @@ -108,19 +108,19 @@ The library-level alignments associated with the same sample are merged and subs
Output files -- `/mergedLibrary/` +- `/merged_library/` - `*.bam`: Merged library-level, coordinate sorted `*.bam` files after the marking of duplicates, and filtering based on various criteria. The file suffix for the final filtered files will be `*.mLb.clN.*`. If you specify the `--save_align_intermeds` parameter then two additional sets of files will be present. These represent the unfiltered alignments with duplicates marked (`*.mLb.mkD.*`), and in the case of paired-end datasets the filtered alignments before the removal of orphan read pairs (`*.mLb.flT.*`). -- `/mergedLibrary/samtools_stats/` +- `/merged_library/samtools_stats/` - SAMtools `*.flagstat`, `*.idxstats` and `*.stats` files generated from the alignment files. -- `/mergedLibrary/picard_metrics/` +- `/merged_library/picard_metrics/` - `*_metrics`: Alignment QC files from picard CollectMultipleMetrics. - `*.metrics.txt`: Metrics file from MarkDuplicates. -- `/mergedLibrary/picard_metrics/pdf/` +- `/merged_library/picard_metrics/pdf/` - `*.pdf`: Alignment QC plot files from picard CollectMultipleMetrics. -- `/mergedLibrary/preseq/` +- `/merged_library/preseq/` - `*.lc_extrap.txt`: Preseq expected future yield file. -> **NB:** File names in the resulting directory (i.e. `/mergedLibrary/`) will have the '`.mLb.`' suffix. +> **NB:** File names in the resulting directory (i.e. `/merged_library/`) will have the '`.mLb.`' suffix.
@@ -141,7 +141,7 @@ The [Preseq](http://smithlabresearch.org/software/preseq/) package is aimed at p
Output files -- `/mergedLibrary/bigwig/` +- `/merged_library/bigwig/` - `*.bigWig`: Normalised bigWig files scaled to 1 million mapped reads.
@@ -153,12 +153,12 @@ The [bigWig](https://genome.ucsc.edu/goldenpath/help/bigWig.html) format is in a
Output files -- `/mergedLibrary/phantompeakqualtools/` +- `/merged_library/phantompeakqualtools/` - `*.spp.out`, `*.spp.pdf`: phantompeakqualtools output files. - `*_mqc.tsv`: MultiQC custom content files. -- `/mergedLibrary/deepTools/plotFingerprint/` +- `/merged_library/deepTools/plotFingerprint/` - `*.plotFingerprint.pdf`, `*.plotFingerprint.qcmetrics.txt`, `*.plotFingerprint.raw.txt`: plotFingerprint output files. -- `/mergedLibrary/deepTools/plotProfile/` +- `/merged_library/deepTools/plotProfile/` - `*.computeMatrix.mat.gz`, `*.computeMatrix.vals.mat.tab`, `*.plotProfile.pdf`, `*.plotProfile.tab`, `*.plotHeatmap.pdf`, `*.plotHeatmap.mat.tab`: plotProfile output files.
@@ -188,21 +188,21 @@ The results from deepTools plotProfile gives you a quick visualisation for the g
Output files -- `/mergedLibrary/macs2//` - - `*.xls`, `*.broadPeak` or `*.narrowPeak`, `*.gappedPeak`, `*summits.bed`: MACS2 output files - the files generated will depend on whether MACS2 has been run in _narrowPeak_ or _broadPeak_ mode. +- `/merged_library/macs3//` + - `*.xls`, `*.broadPeak` or `*.narrowPeak`, `*.gappedPeak`, `*summits.bed`: MACS3 output files - the files generated will depend on whether MACS3 has been run in _narrowPeak_ or _broadPeak_ mode. - `*.annotatePeaks.txt`: HOMER peak-to-gene annotation file. -- `/mergedLibrary/macs2//qc/` - - `macs2_peak.plots.pdf`: QC plots for MACS2 peaks. - - `macs2_annotatePeaks.plots.pdf`: QC plots for peak-to-gene feature annotation. +- `/merged_library/macs3//qc/` + - `macs3_peak.plots.pdf`: QC plots for MACS3 peaks. + - `macs3_annotatePeaks.plots.pdf`: QC plots for peak-to-gene feature annotation. - `*.FRiP_mqc.tsv`, `*.peak_count_mqc.tsv`, `annotatepeaks.summary_mqc.tsv`: MultiQC custom-content files for FRiP score, peak count and peak-to-gene ratios. -> **NB:** `` in the directory structure above corresponds to the type of peak that you have specified to call with MACS2 i.e. `broadPeak` or `narrowPeak`. If you so wish, you can call both narrow and broad peaks without redoing the preceding steps in the pipeline such as the alignment and filtering. For example, if you already have broad peaks then just add `--narrow_peak -resume` to the command you used to run the pipeline, and these will be called too! However, resuming the pipeline will only be possible if you have not deleted the `work/` directory generated by the pipeline. +> **NB:** `` in the directory structure above corresponds to the type of peak that you have specified to call with MACS3 i.e. `broadPeak` or `narrowPeak`. If you so wish, you can call both narrow and broad peaks without redoing the preceding steps in the pipeline such as the alignment and filtering. For example, if you already have broad peaks then just add `--narrow_peak -resume` to the command you used to run the pipeline, and these will be called too! However, resuming the pipeline will only be possible if you have not deleted the `work/` directory generated by the pipeline.
-[MACS2](https://github.com/macs3-project/MACS) is one of the most popular peak-calling algorithms for ChIP-seq data. By default, the peaks are called with the MACS2 `--broad` parameter. If, however, you would like to call narrow peaks then please provide the `--narrow_peak` parameter when running the pipeline. See [MACS2 outputs](https://github.com/macs3-project/MACS/blob/master/docs/callpeak.md#output-files) for a description of the output files generated by MACS2. +[MACS3](https://github.com/macs3-project/MACS) is one of the most popular peak-calling algorithms for ChIP-seq data. By default, the peaks are called with the MACS3 `--broad` parameter. If, however, you would like to call narrow peaks then please provide the `--narrow_peak` parameter when running the pipeline. See [MACS3 outputs](https://github.com/macs3-project/MACS/blob/master/docs/callpeak.md#output-files) for a description of the output files generated by MACS3. -![MultiQC - MACS2 total peak count plot](images/mqc_macs2_peak_count_plot.png) +![MultiQC - MACS3 total peak count plot](images/mqc_macs2_peak_count_plot.png) [HOMER annotatePeaks.pl](http://homer.ucsd.edu/homer/ngs/annotation.html) is used to annotate the peaks relative to known genomic features. HOMER is able to use the `--gtf` annotation file which is provided to the pipeline. Please note that some of the output columns will be blank because the annotation is not provided using HOMER's in-built database format. However, the more important fields required for downstream analysis will be populated i.e. _Annotation_, _Distance to TSS_ and _Nearest Promoter ID_. @@ -210,14 +210,14 @@ The results from deepTools plotProfile gives you a quick visualisation for the g Various QC plots per sample including number of peaks, fold-change distribution, [FRiP score](https://genome.cshlp.org/content/22/9/1813.full.pdf+html) and peak-to-gene feature annotation are also generated by the pipeline. Where possible these have been integrated into the MultiQC report. -![MultiQC - MACS2 peaks FRiP score plot](images/mqc_frip_score_plot.png) +![MultiQC - MACS3 peaks FRiP score plot](images/mqc_frip_score_plot.png) ### Create and quantify consensus set of peaks
Output files -- `/mergedLibrary/macs2//consensus//` +- `/merged_library/macs3//consensus//` - `*.bed`: Consensus peak-set across all samples in BED format. - `*.saf`: Consensus peak-set across all samples in SAF format. Required by featureCounts for read quantification. - `*.featureCounts.txt`: Read counts across all samples relative to consensus peak-set. @@ -245,7 +245,7 @@ The [featureCounts](http://bioinf.wehi.edu.au/featureCounts/) tool is used to co
Output files -- `/mergedLibrary/macs2//consensus//deseq2/` +- `/merged_library/macs3//consensus//deseq2/` - `*.sample.dists.txt`: Spreadsheet containing sample-to-sample distance across each consensus peak. - `*.plots.pdf`: File containing PCA and hierarchical clustering plots. - `*.dds.RData`: File containing R `DESeqDataSet` object generated by DESeq2, with either @@ -254,7 +254,7 @@ The [featureCounts](http://bioinf.wehi.edu.au/featureCounts/) tool is used to co `readRDS` to give user control of the eventual object name. - `*pca.vals.txt`: Matrix of values for the first 2 principal components. - `R_sessionInfo.log`: File containing information about R, the OS and attached or loaded packages. - - `/mergedLibrary/macs2//consensus//sizeFactors/` + - `/merged_library/macs3//consensus//sizeFactors/` - `*.txt`, `*.RData`: Files containing DESeq2 sizeFactors per sample.
@@ -343,6 +343,7 @@ Reference genome-specific files can be useful to keep for the downstream process - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. + - Parameters used by the pipeline run: `params.json`.
diff --git a/docs/usage.md b/docs/usage.md index 4a656032..29a6c5ee 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -12,66 +12,82 @@ You will need to create a samplesheet with information about the samples you wou --input '[path to samplesheet file]' ``` -### Multiple runs of the same library +### Multiple replicates -The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will perform the alignments in parallel, and subsequently merge them before further analysis. Below is an example where the samples called `WT_BCATENIN_IP_REP2` and `WT_INPUT_REP2` have been re-sequenced multiple times: +The `sample` identifier should be identical when you have multiple replicates from the same experimental group, just increment the `replicate` identifier appropriately. The first replicate value for any given experimental group must be 1. -```console -sample,fastq_1,fastq_2,antibody,control -WT_BCATENIN_IP_REP1,BLA203A1_S27_L006_R1_001.fastq.gz,,BCATENIN,WT_INPUT -WT_BCATENIN_IP_REP2,BLA203A25_S16_L001_R1_001.fastq.gz,,BCATENIN,WT_INPUT -WT_BCATENIN_IP_REP2,BLA203A25_S16_L002_R1_001.fastq.gz,,BCATENIN,WT_INPUT -WT_BCATENIN_IP_REP2,BLA203A25_S16_L003_R1_001.fastq.gz,,BCATENIN,WT_INPUT -WT_BCATENIN_IP_REP3,BLA203A49_S40_L001_R1_001.fastq.gz,,BCATENIN,WT_INPUT -WT_INPUT_REP1,BLA203A6_S32_L006_R1_001.fastq.gz,,, -WT_INPUT_REP2,BLA203A30_S21_L001_R1_001.fastq.gz,,, -WT_INPUT_REP2,BLA203A30_S21_L002_R1_001.fastq.gz,,, -WT_INPUT_REP3,BLA203A31_S21_L003_R1_001.fastq.gz,,, -``` +The `antibody` column is required to separate the downstream consensus peak merging for different antibodies. It is not advisable to generate a consensus peak set across different antibodies especially if their binding patterns are inherently different e.g. narrow transcription factors and broad histone marks. -### Full design +The `control` column should be the `sample` identifier for the controls for any given IP. This column together with the `control_replicate` column will set the corresponding control for each of the samples in the table. -The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 5 columns to match those defined in the table below. +```console +group,fastq_1,fastq_2,replicate,antibody,control,control_replicate +WT_BCATENIN_IP,BLA203A1_S27_L006_R1_001.fastq.gz,,1,BCATENIN,WT_INPUT,1 +WT_BCATENIN_IP,BLA203A25_S16_L002_R1_001.fastq.gz,,2,BCATENIN,WT_INPUT,2 +WT_BCATENIN_IP,BLA203A49_S40_L001_R1_001.fastq.gz,,3,BCATENIN,WT_INPUT,3 +WT_INPUT,BLA203A6_S32_L006_R1_001.fastq.gz,,1,,, +WT_INPUT,BLA203A30_S21_L002_R1_001.fastq.gz,,2,,, +WT_INPUT,BLA203A31_S21_L003_R1_001.fastq.gz,,3,,, +``` -The `antibody` column is required to separate the downstream consensus peak merging for different antibodies. Its not advisable to generate a consensus peak set across different antibodies especially if their binding patterns are inherently different e.g. narrow transcription factors and broad histone marks. +### Multiple runs of the same library -The `control` column should be the `sample` identifier for the controls for any given IP. +Both the `sample` and `replicate` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will perform the alignments in parallel, and subsequently merge them before further analysis. Below is an example where the samples called `WT_BCATENIN_IP` and `WT_INPUT` have been re-sequenced multiple times: + +```csv title="samplesheet.csv" +sample,fastq_1,fastq_2,replicate,antibody,control,control_replicate +WT_BCATENIN_IP,BLA203A1_S27_L006_R1_001.fastq.gz,,1,BCATENIN,WT_INPUT,1 +WT_BCATENIN_IP,BLA203A25_S16_L001_R1_001.fastq.gz,,2,BCATENIN,WT_INPUT,2 +WT_BCATENIN_IP,BLA203A25_S16_L002_R1_001.fastq.gz,,2,BCATENIN,WT_INPUT,2 +WT_BCATENIN_IP,BLA203A25_S16_L003_R1_001.fastq.gz,,2,BCATENIN,WT_INPUT,2 +WT_BCATENIN_IP,BLA203A49_S40_L001_R1_001.fastq.gz,,3,BCATENIN,WT_INPUT,3 +WT_INPUT,BLA203A6_S32_L006_R1_001.fastq.gz,,1,,, +WT_INPUT,BLA203A30_S21_L001_R1_001.fastq.gz,,2,,, +WT_INPUT,BLA203A30_S21_L002_R1_001.fastq.gz,,2,,, +WT_INPUT,BLA203A31_S21_L003_R1_001.fastq.gz,,3,,, +``` -A final design file may look something like the one below. This is for two antibodies and associated controls, where the `WT_BCATENIN_IP_REP2` and `NAIVE_BCATENIN_IP_REP2` samples have been sequenced twice: +### Full design -```console -sample,fastq_1,fastq_2,antibody,control -WT_BCATENIN_IP_REP1,BLA203A1_S27_L006_R1_001.fastq.gz,,BCATENIN,WT_INPUT -WT_BCATENIN_IP_REP2,BLA203A25_S16_L001_R1_001.fastq.gz,,BCATENIN,WT_INPUT -WT_BCATENIN_IP_REP2,BLA203A25_S16_L002_R1_001.fastq.gz,,BCATENIN,WT_INPUT -WT_BCATENIN_IP_REP3,BLA203A49_S40_L001_R1_001.fastq.gz,,BCATENIN,WT_INPUT -NAIVE_BCATENIN_IP_REP1,BLA203A7_S60_L001_R1_001.fastq.gz,,BCATENIN,NAIVE_INPUT -NAIVE_BCATENIN_IP_REP2,BLA203A43_S34_L001_R1_001.fastq.gz,,BCATENIN,NAIVE_INPUT -NAIVE_BCATENIN_IP_REP2,BLA203A43_S34_L002_R1_001.fastq.gz,,BCATENIN,NAIVE_INPUT -NAIVE_BCATENIN_IP_REP3,BLA203A64_S55_L001_R1_001.fastq.gz,,BCATENIN,NAIVE_INPUT -WT_TCF4_IP_REP1,BLA203A3_S29_L006_R1_001.fastq.gz,,TCF4,WT_INPUT -WT_TCF4_IP_REP2,BLA203A27_S18_L001_R1_001.fastq.gz,,TCF4,WT_INPUT -WT_TCF4_IP_REP3,BLA203A51_S42_L001_R1_001.fastq.gz,,TCF4,WT_INPUT -NAIVE_TCF4_IP_REP1,BLA203A9_S62_L001_R1_001.fastq.gz,,TCF4,NAIVE_INPUT -NAIVE_TCF4_IP_REP2,BLA203A45_S36_L001_R1_001.fastq.gz,,TCF4,NAIVE_INPUT -NAIVE_TCF4_IP_REP3,BLA203A66_S57_L001_R1_001.fastq.gz,,TCF4,NAIVE_INPUT -WT_INPUT_REP1,BLA203A6_S32_L006_R1_001.fastq.gz,,, -WT_INPUT_REP2,BLA203A30_S21_L001_R1_001.fastq.gz,,, -WT_INPUT_REP3,BLA203A31_S21_L003_R1_001.fastq.gz,,, -NAIVE_INPUT_REP1,BLA203A12_S3_L001_R1_001.fastq.gz,,, -NAIVE_INPUT_REP2,BLA203A48_S39_L001_R1_001.fastq.gz,,, -NAIVE_INPUT_REP3,BLA203A49_S1_L006_R1_001.fastq.gz,,, +The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 7 columns to match those defined in the table below. + +A final design file may look something like the one below. This is for two antibodies and associated controls, where the second replicate of the `WT_BCATENIN_IP` and `NAIVE_BCATENIN_IP` samples have been sequenced twice: + +```csv title="samplesheet.csv" +sample,fastq_1,fastq_2,replicate,antibody,control,control_replicate +WT_BCATENIN_IP,BLA203A1_S27_L006_R1_001.fastq.gz,,1,BCATENIN,WT_INPUT,1 +WT_BCATENIN_IP,BLA203A25_S16_L001_R1_001.fastq.gz,,2,BCATENIN,WT_INPUT,2 +WT_BCATENIN_IP,BLA203A25_S16_L002_R1_001.fastq.gz,,2,BCATENIN,WT_INPUT,2 +WT_BCATENIN_IP,BLA203A49_S40_L001_R1_001.fastq.gz,,3,BCATENIN,WT_INPUT,3 +NAIVE_BCATENIN_IP,BLA203A7_S60_L001_R1_001.fastq.gz,,1,BCATENIN,NAIVE_INPUT,1 +NAIVE_BCATENIN_IP,BLA203A43_S34_L001_R1_001.fastq.gz,,2,BCATENIN,NAIVE_INPUT,2 +NAIVE_BCATENIN_IP,BLA203A43_S34_L002_R1_001.fastq.gz,,2,BCATENIN,NAIVE_INPUT,2 +NAIVE_BCATENIN_IP,BLA203A64_S55_L001_R1_001.fastq.gz,,3,BCATENIN,NAIVE_INPUT,3 +WT_TCF4_IP,BLA203A3_S29_L006_R1_001.fastq.gz,,1,TCF4,WT_INPUT,1 +WT_TCF4_IP,BLA203A27_S18_L001_R1_001.fastq.gz,,2,TCF4,WT_INPUT,2 +WT_TCF4_IP,BLA203A51_S42_L001_R1_001.fastq.gz,,3,TCF4,WT_INPUT,3 +NAIVE_TCF4_IP,BLA203A9_S62_L001_R1_001.fastq.gz,,1,TCF4,NAIVE_INPUT,1 +NAIVE_TCF4_IP,BLA203A45_S36_L001_R1_001.fastq.gz,,2,TCF4,NAIVE_INPUT,2 +NAIVE_TCF4_IP,BLA203A66_S57_L001_R1_001.fastq.gz,,3,TCF4,NAIVE_INPUT,3 +WT_INPUT,BLA203A6_S32_L006_R1_001.fastq.gz,,1,,, +WT_INPUT,BLA203A30_S21_L001_R1_001.fastq.gz,,2,,, +WT_INPUT,BLA203A31_S21_L003_R1_001.fastq.gz,,3,,, +NAIVE_INPUT,BLA203A12_S3_L001_R1_001.fastq.gz,,1,,, +NAIVE_INPUT,BLA203A48_S39_L001_R1_001.fastq.gz,,2,,, +NAIVE_INPUT,BLA203A49_S1_L006_R1_001.fastq.gz,,3,,, ``` -| Column | Description | -| ---------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | -| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `antibody` | Antibody name. This is required to segregate downstream analysis for different antibodies. Required when `control` is specified. | -| `control` | Sample name for control sample. | +| Column | Description | +| ------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | +| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| `replicate` | Integer representing replicate number. This will be identical for re-sequenced libraries. Must start from `1..`. | +| `antibody` | Antibody name. This is required to segregate downstream analysis for different antibodies. Required when `control` is specified. | +| `control` | Sample name for control sample. | +| `control_replicate` | Integer representing replicate number for control sample. | -Example design files have been provided with the pipeline for [paired-end](../assets/samplesheet_pe.csv) and [single-end](../assets/samplesheet_se.csv) data. +Example design files have bee_n provided with the pipeline for [paired-end](../assets/samplesheet_pe.csv) and [single-end](../assets/samplesheet_se.csv) data. > **NB:** The `group` and `replicate` columns were replaced with a single `sample` column as of v2.0 of the pipeline. The `sample` column is essentially a concatenation of the `group` and `replicate` columns. If all values of `sample` have the same number of underscores, fields defined by these underscore-separated names may be used in the PCA plots produced by the pipeline, to regain the ability to represent different groupings. @@ -117,7 +133,7 @@ wget -L https://www.encodeproject.org/files/ENCFF356LFX/@@download/ENCFF356LFX.b The typical command for running the pipeline is as follows: ```bash -nextflow run nf-core/chipseq --input samplesheet.csv --outdir --genome GRCh37 -profile docker +nextflow run nf-core/chipseq --input ./samplesheet.csv --outdir ./results --genome GRCh37 -profile docker ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. @@ -131,6 +147,31 @@ work # Directory containing the nextflow working files # Other nextflow hidden files, eg. history of pipeline runs and old logs. ``` +If you wish to repeatedly use the same parameters for multiple runs, rather than specifying each flag in the command, you can specify these in a params file. + +Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. + +:::warning +Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). +::: + +The above pipeline run specified with a params file in yaml format: + +```bash +nextflow run nf-core/chipseq -profile docker -params-file params.yaml +``` + +with `params.yaml` containing: + +```yaml +input: './samplesheet.csv' +outdir: './results/' +genome: 'GRCh37' +<...> +``` + +You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). + ### Updating the pipeline When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: @@ -143,29 +184,42 @@ nextflow pull nf-core/chipseq It is a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. -First, go to the [nf-core/chipseq releases page](https://github.com/nf-core/chipseq/releases) and find the latest version number - numeric only (eg. `1.2.2`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.2.2`. +First, go to the [nf-core/chipseq releases page](https://github.com/nf-core/chipseq/releases) and find the latest pipeline version - numeric only (eg. `2.0.0`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 2.0.0`. Of course, you can switch to another version by changing the number after the `-r` flag. -This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. +This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports. + +To further assist in reproducbility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. + +:::tip +If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. +::: ## Core Nextflow arguments -> **NB:** These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). +:::note +These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). +::: ### `-profile` Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. -Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Conda) - see below. When using Biocontainers, most of these software packaging methods pull Docker containers from quay.io e.g [FastQC](https://quay.io/repository/biocontainers/fastqc) except for Singularity which directly downloads Singularity images via https hosted by the [Galaxy project](https://depot.galaxyproject.org/singularity/) and Conda which downloads and installs software locally from [Bioconda](https://bioconda.github.io/). +Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below. -> We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. +:::info +We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. +::: The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important! They are loaded in sequence, so later profiles can overwrite earlier profiles. -If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended. +If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended, since it can lead to different results on different machines dependent on the computer enviroment. +- `test` + - A profile with a complete configuration for automated testing + - Includes links to test data so needs no other parameters - `docker` - A generic configuration profile to be used with [Docker](https://docker.com/) - `singularity` @@ -176,11 +230,12 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) - `charliecloud` - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) +- `apptainer` + - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) +- `wave` + - A generic configuration profile to enable [Wave](https://seqera.io/wave/) containers. Use together with one of the above (requires Nextflow ` 24.03.0-edge` or later). - `conda` - - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter or Charliecloud. -- `test` - - A profile with a complete configuration for automated testing - - Includes links to test data so needs no other parameters + - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. ### `-resume` @@ -198,95 +253,19 @@ Specify the path to a specific config file (this is a core Nextflow command). Se Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L18) it will automatically be resubmitted with higher requests (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped. -For example, if the nf-core/rnaseq pipeline is failing after multiple re-submissions of the `STAR_ALIGN` process due to an exit code of `137` this would indicate that there is an out of memory issue: - -```console -[62/149eb0] NOTE: Process `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) -- Execution is retried (1) -Error executing process > 'NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)' - -Caused by: - Process `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) - -Command executed: - STAR \ - --genomeDir star \ - --readFilesIn WT_REP1_trimmed.fq.gz \ - --runThreadN 2 \ - --outFileNamePrefix WT_REP1. \ - - -Command exit status: - 137 - -Command output: - (empty) - -Command error: - .command.sh: line 9: 30 Killed STAR --genomeDir star --readFilesIn WT_REP1_trimmed.fq.gz --runThreadN 2 --outFileNamePrefix WT_REP1. -Work dir: - /home/pipelinetest/work/9d/172ca5881234073e8d76f2a19c88fb +To change the resource requests, please see the [max resources](https://nf-co.re/docs/usage/configuration#max-resources) and [tuning workflow resources](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources) section of the nf-core website. -Tip: you can replicate the issue by changing to the process work dir and entering the command `bash .command.run` -``` - -To bypass this error you would need to find exactly which resources are set by the `STAR_ALIGN` process. The quickest way is to search for `process STAR_ALIGN` in the [nf-core/rnaseq Github repo](https://github.com/nf-core/rnaseq/search?q=process+STAR_ALIGN). -We have standardised the structure of Nextflow DSL2 pipelines such that all module files will be present in the `modules/` directory and so, based on the search results, the file we want is `modules/nf-core/software/star/align/main.nf`. -If you click on the link to that file you will notice that there is a `label` directive at the top of the module that is set to [`label process_high`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/modules/nf-core/software/star/align/main.nf#L9). -The [Nextflow `label`](https://www.nextflow.io/docs/latest/process.html#label) directive allows us to organise workflow processes in separate groups which can be referenced in a configuration file to select and configure subset of processes having similar computing requirements. -The default values for the `process_high` label are set in the pipeline's [`base.config`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L33-L37) which in this case is defined as 72GB. -Providing you haven't set any other standard nf-core parameters to **cap** the [maximum resources](https://nf-co.re/usage/configuration#max-resources) used by the pipeline then we can try and bypass the `STAR_ALIGN` process failure by creating a custom config file that sets at least 72GB of memory, in this case increased to 100GB. -The custom config below can then be provided to the pipeline via the [`-c`](#-c) parameter as highlighted in previous sections. - -```nextflow -process { - withName: 'NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN' { - memory = 100.GB - } -} -``` - -> **NB:** We specify the full process name i.e. `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN` in the config file because this takes priority over the short name (`STAR_ALIGN`) and allows existing configuration using the full process name to be correctly overridden. -> If you get a warning suggesting that the process selector isn't recognised check that the process name has been specified correctly. - -### Updating containers - -The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. If for some reason you need to use a different version of a particular tool with the pipeline then you just need to identify the `process` name and override the Nextflow `container` definition for that process using the `withName` declaration. For example, in the [nf-core/viralrecon](https://nf-co.re/viralrecon) pipeline a tool called [Pangolin](https://github.com/cov-lineages/pangolin) has been used during the COVID-19 pandemic to assign lineages to SARS-CoV-2 genome sequenced samples. Given that the lineage assignments change quite frequently it doesn't make sense to re-release the nf-core/viralrecon everytime a new version of Pangolin has been released. However, you can override the default container used by the pipeline by creating a custom config file and passing it as a command-line argument via `-c custom.config`. +### Custom Containers -1. Check the default version used by the pipeline in the module file for [Pangolin](https://github.com/nf-core/viralrecon/blob/a85d5969f9025409e3618d6c280ef15ce417df65/modules/nf-core/software/pangolin/main.nf#L14-L19) -2. Find the latest version of the Biocontainer available on [Quay.io](https://quay.io/repository/biocontainers/pangolin?tag=latest&tab=tags) -3. Create the custom config accordingly: +In some cases you may wish to change which container or conda environment a step of the pipeline uses for a particular tool. By default nf-core pipelines use containers and software from the [biocontainers](https://biocontainers.pro/) or [bioconda](https://bioconda.github.io/) projects. However in some cases the pipeline specified version maybe out of date. - - For Docker: +To use a different container from the default container or conda environment specified in a pipeline, please see the [updating tool versions](https://nf-co.re/docs/usage/configuration#updating-tool-versions) section of the nf-core website. - ```nextflow - process { - withName: PANGOLIN { - container = 'quay.io/biocontainers/pangolin:3.0.5--pyhdfd78af_0' - } - } - ``` +### Custom Tool Arguments - - For Singularity: +A pipeline might not always support every possible argument or option of a particular tool used in pipeline. Fortunately, nf-core pipelines provide some freedom to users to insert additional parameters that the pipeline does not include by default. - ```nextflow - process { - withName: PANGOLIN { - container = 'https://depot.galaxyproject.org/singularity/pangolin:3.0.5--pyhdfd78af_0' - } - } - ``` - - - For Conda: - - ```nextflow - process { - withName: PANGOLIN { - conda = 'bioconda::pangolin=3.0.5' - } - } - ``` - -> **NB:** If you wish to periodically update individual tool-specific results (e.g. Pangolin) generated by the pipeline then you must ensure to keep the `work/` directory otherwise the `-resume` ability of the pipeline will be compromised and it will restart from scratch. +To learn how to provide additional arguments to a particular tool of the pipeline, please see the [customising tool arguments](https://nf-co.re/docs/usage/configuration#customising-tool-arguments) section of the nf-core website. ### nf-core/configs @@ -296,6 +275,14 @@ See the main [Nextflow documentation](https://www.nextflow.io/docs/latest/config If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack) on the [`#configs` channel](https://nfcore.slack.com/channels/configs). +## Azure Resource Requests + +To be used with the `azurebatch` profile by specifying the `-profile azurebatch`. +We recommend providing a compute `params.vm_type` of `Standard_D16_v3` VMs by default but these options can be changed if required. + +Note that the choice of VM size depends on your quota and the overall workload during the analysis. +For a thorough list, please refer the [Azure Sizes for virtual machines in Azure](https://docs.microsoft.com/en-us/azure/virtual-machines/sizes). + ## Running in the background Nextflow handles job submissions and supervises the running jobs. The Nextflow process must run until the pipeline is finished. diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy deleted file mode 100755 index b3d092f8..00000000 --- a/lib/NfcoreSchema.groovy +++ /dev/null @@ -1,529 +0,0 @@ -// -// This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. -// - -import org.everit.json.schema.Schema -import org.everit.json.schema.loader.SchemaLoader -import org.everit.json.schema.ValidationException -import org.json.JSONObject -import org.json.JSONTokener -import org.json.JSONArray -import groovy.json.JsonSlurper -import groovy.json.JsonBuilder - -class NfcoreSchema { - - // - // Resolve Schema path relative to main workflow directory - // - public static String getSchemaPath(workflow, schema_filename='nextflow_schema.json') { - return "${workflow.projectDir}/${schema_filename}" - } - - // - // Function to loop over all parameters defined in schema and check - // whether the given parameters adhere to the specifications - // - /* groovylint-disable-next-line UnusedPrivateMethodParameter */ - public static void validateParameters(workflow, params, log, schema_filename='nextflow_schema.json') { - def has_error = false - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Check for nextflow core params and unexpected params - def json = new File(getSchemaPath(workflow, schema_filename=schema_filename)).text - def Map schemaParams = (Map) new JsonSlurper().parseText(json).get('definitions') - def nf_params = [ - // Options for base `nextflow` command - 'bg', - 'c', - 'C', - 'config', - 'd', - 'D', - 'dockerize', - 'h', - 'log', - 'q', - 'quiet', - 'syslog', - 'v', - 'version', - - // Options for `nextflow run` command - 'ansi', - 'ansi-log', - 'bg', - 'bucket-dir', - 'c', - 'cache', - 'config', - 'dsl2', - 'dump-channels', - 'dump-hashes', - 'E', - 'entry', - 'latest', - 'lib', - 'main-script', - 'N', - 'name', - 'offline', - 'params-file', - 'pi', - 'plugins', - 'poll-interval', - 'pool-size', - 'profile', - 'ps', - 'qs', - 'queue-size', - 'r', - 'resume', - 'revision', - 'stdin', - 'stub', - 'stub-run', - 'test', - 'w', - 'with-charliecloud', - 'with-conda', - 'with-dag', - 'with-docker', - 'with-mpi', - 'with-notification', - 'with-podman', - 'with-report', - 'with-singularity', - 'with-timeline', - 'with-tower', - 'with-trace', - 'with-weblog', - 'without-docker', - 'without-podman', - 'work-dir' - ] - def unexpectedParams = [] - - // Collect expected parameters from the schema - def expectedParams = [] - def enums = [:] - for (group in schemaParams) { - for (p in group.value['properties']) { - expectedParams.push(p.key) - if (group.value['properties'][p.key].containsKey('enum')) { - enums[p.key] = group.value['properties'][p.key]['enum'] - } - } - } - - for (specifiedParam in params.keySet()) { - // nextflow params - if (nf_params.contains(specifiedParam)) { - log.error "ERROR: You used a core Nextflow option with two hyphens: '--${specifiedParam}'. Please resubmit with '-${specifiedParam}'" - has_error = true - } - // unexpected params - def params_ignore = params.schema_ignore_params.split(',') + 'schema_ignore_params' - def expectedParamsLowerCase = expectedParams.collect{ it.replace("-", "").toLowerCase() } - def specifiedParamLowerCase = specifiedParam.replace("-", "").toLowerCase() - def isCamelCaseBug = (specifiedParam.contains("-") && !expectedParams.contains(specifiedParam) && expectedParamsLowerCase.contains(specifiedParamLowerCase)) - if (!expectedParams.contains(specifiedParam) && !params_ignore.contains(specifiedParam) && !isCamelCaseBug) { - // Temporarily remove camelCase/camel-case params #1035 - def unexpectedParamsLowerCase = unexpectedParams.collect{ it.replace("-", "").toLowerCase()} - if (!unexpectedParamsLowerCase.contains(specifiedParamLowerCase)){ - unexpectedParams.push(specifiedParam) - } - } - } - - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Validate parameters against the schema - InputStream input_stream = new File(getSchemaPath(workflow, schema_filename=schema_filename)).newInputStream() - JSONObject raw_schema = new JSONObject(new JSONTokener(input_stream)) - - // Remove anything that's in params.schema_ignore_params - raw_schema = removeIgnoredParams(raw_schema, params) - - Schema schema = SchemaLoader.load(raw_schema) - - // Clean the parameters - def cleanedParams = cleanParameters(params) - - // Convert to JSONObject - def jsonParams = new JsonBuilder(cleanedParams) - JSONObject params_json = new JSONObject(jsonParams.toString()) - - // Validate - try { - schema.validate(params_json) - } catch (ValidationException e) { - println '' - log.error 'ERROR: Validation of pipeline parameters failed!' - JSONObject exceptionJSON = e.toJSON() - printExceptions(exceptionJSON, params_json, log, enums) - println '' - has_error = true - } - - // Check for unexpected parameters - if (unexpectedParams.size() > 0) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - println '' - def warn_msg = 'Found unexpected parameters:' - for (unexpectedParam in unexpectedParams) { - warn_msg = warn_msg + "\n* --${unexpectedParam}: ${params[unexpectedParam].toString()}" - } - log.warn warn_msg - log.info "- ${colors.dim}Ignore this warning: params.schema_ignore_params = \"${unexpectedParams.join(',')}\" ${colors.reset}" - println '' - } - - if (has_error) { - System.exit(1) - } - } - - // - // Beautify parameters for --help - // - public static String paramsHelp(workflow, params, command, schema_filename='nextflow_schema.json') { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - Integer num_hidden = 0 - String output = '' - output += 'Typical pipeline command:\n\n' - output += " ${colors.cyan}${command}${colors.reset}\n\n" - Map params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - Integer max_chars = paramsMaxChars(params_map) + 1 - Integer desc_indent = max_chars + 14 - Integer dec_linewidth = 160 - desc_indent - for (group in params_map.keySet()) { - Integer num_params = 0 - String group_output = colors.underlined + colors.bold + group + colors.reset + '\n' - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (group_params.get(param).hidden && !params.show_hidden_params) { - num_hidden += 1 - continue; - } - def type = '[' + group_params.get(param).type + ']' - def description = group_params.get(param).description - def defaultValue = group_params.get(param).default != null ? " [default: " + group_params.get(param).default.toString() + "]" : '' - def description_default = description + colors.dim + defaultValue + colors.reset - // Wrap long description texts - // Loosely based on https://dzone.com/articles/groovy-plain-text-word-wrap - if (description_default.length() > dec_linewidth){ - List olines = [] - String oline = "" // " " * indent - description_default.split(" ").each() { wrd -> - if ((oline.size() + wrd.size()) <= dec_linewidth) { - oline += wrd + " " - } else { - olines += oline - oline = wrd + " " - } - } - olines += oline - description_default = olines.join("\n" + " " * desc_indent) - } - group_output += " --" + param.padRight(max_chars) + colors.dim + type.padRight(10) + colors.reset + description_default + '\n' - num_params += 1 - } - group_output += '\n' - if (num_params > 0){ - output += group_output - } - } - if (num_hidden > 0){ - output += colors.dim + "!! Hiding $num_hidden params, use --show_hidden_params to show them !!\n" + colors.reset - } - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Groovy Map summarising parameters/workflow options used by the pipeline - // - public static LinkedHashMap paramsSummaryMap(workflow, params, schema_filename='nextflow_schema.json') { - // Get a selection of core Nextflow workflow options - def Map workflow_summary = [:] - if (workflow.revision) { - workflow_summary['revision'] = workflow.revision - } - workflow_summary['runName'] = workflow.runName - if (workflow.containerEngine) { - workflow_summary['containerEngine'] = workflow.containerEngine - } - if (workflow.container) { - workflow_summary['container'] = workflow.container - } - workflow_summary['launchDir'] = workflow.launchDir - workflow_summary['workDir'] = workflow.workDir - workflow_summary['projectDir'] = workflow.projectDir - workflow_summary['userName'] = workflow.userName - workflow_summary['profile'] = workflow.profile - workflow_summary['configFiles'] = workflow.configFiles.join(', ') - - // Get pipeline parameters defined in JSON Schema - def Map params_summary = [:] - def params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - for (group in params_map.keySet()) { - def sub_params = new LinkedHashMap() - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (params.containsKey(param)) { - def params_value = params.get(param) - def schema_value = group_params.get(param).default - def param_type = group_params.get(param).type - if (schema_value != null) { - if (param_type == 'string') { - if (schema_value.contains('$projectDir') || schema_value.contains('${projectDir}')) { - def sub_string = schema_value.replace('\$projectDir', '') - sub_string = sub_string.replace('\${projectDir}', '') - if (params_value.contains(sub_string)) { - schema_value = params_value - } - } - if (schema_value.contains('$params.outdir') || schema_value.contains('${params.outdir}')) { - def sub_string = schema_value.replace('\$params.outdir', '') - sub_string = sub_string.replace('\${params.outdir}', '') - if ("${params.outdir}${sub_string}" == params_value) { - schema_value = params_value - } - } - } - } - - // We have a default in the schema, and this isn't it - if (schema_value != null && params_value != schema_value) { - sub_params.put(param, params_value) - } - // No default in the schema, and this isn't empty - else if (schema_value == null && params_value != "" && params_value != null && params_value != false) { - sub_params.put(param, params_value) - } - } - } - params_summary.put(group, sub_params) - } - return [ 'Core Nextflow options' : workflow_summary ] << params_summary - } - - // - // Beautify parameters for summary and return as string - // - public static String paramsSummaryLog(workflow, params) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - String output = '' - def params_map = paramsSummaryMap(workflow, params) - def max_chars = paramsMaxChars(params_map) - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - if (group_params) { - output += colors.bold + group + colors.reset + '\n' - for (param in group_params.keySet()) { - output += " " + colors.blue + param.padRight(max_chars) + ": " + colors.green + group_params.get(param) + colors.reset + '\n' - } - output += '\n' - } - } - output += "!! Only displaying parameters that differ from the pipeline defaults !!\n" - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Loop over nested exceptions and print the causingException - // - private static void printExceptions(ex_json, params_json, log, enums, limit=5) { - def causingExceptions = ex_json['causingExceptions'] - if (causingExceptions.length() == 0) { - def m = ex_json['message'] =~ /required key \[([^\]]+)\] not found/ - // Missing required param - if (m.matches()) { - log.error "* Missing required parameter: --${m[0][1]}" - } - // Other base-level error - else if (ex_json['pointerToViolation'] == '#') { - log.error "* ${ex_json['message']}" - } - // Error with specific param - else { - def param = ex_json['pointerToViolation'] - ~/^#\// - def param_val = params_json[param].toString() - if (enums.containsKey(param)) { - def error_msg = "* --${param}: '${param_val}' is not a valid choice (Available choices" - if (enums[param].size() > limit) { - log.error "${error_msg} (${limit} of ${enums[param].size()}): ${enums[param][0..limit-1].join(', ')}, ... )" - } else { - log.error "${error_msg}: ${enums[param].join(', ')})" - } - } else { - log.error "* --${param}: ${ex_json['message']} (${param_val})" - } - } - } - for (ex in causingExceptions) { - printExceptions(ex, params_json, log, enums) - } - } - - // - // Remove an element from a JSONArray - // - private static JSONArray removeElement(json_array, element) { - def list = [] - int len = json_array.length() - for (int i=0;i - if(raw_schema.keySet().contains('definitions')){ - raw_schema.definitions.each { definition -> - for (key in definition.keySet()){ - if (definition[key].get("properties").keySet().contains(ignore_param)){ - // Remove the param to ignore - definition[key].get("properties").remove(ignore_param) - // If the param was required, change this - if (definition[key].has("required")) { - def cleaned_required = removeElement(definition[key].required, ignore_param) - definition[key].put("required", cleaned_required) - } - } - } - } - } - if(raw_schema.keySet().contains('properties') && raw_schema.get('properties').keySet().contains(ignore_param)) { - raw_schema.get("properties").remove(ignore_param) - } - if(raw_schema.keySet().contains('required') && raw_schema.required.contains(ignore_param)) { - def cleaned_required = removeElement(raw_schema.required, ignore_param) - raw_schema.put("required", cleaned_required) - } - } - return raw_schema - } - - // - // Clean and check parameters relative to Nextflow native classes - // - private static Map cleanParameters(params) { - def new_params = params.getClass().newInstance(params) - for (p in params) { - // remove anything evaluating to false - if (!p['value']) { - new_params.remove(p.key) - } - // Cast MemoryUnit to String - if (p['value'].getClass() == nextflow.util.MemoryUnit) { - new_params.replace(p.key, p['value'].toString()) - } - // Cast Duration to String - if (p['value'].getClass() == nextflow.util.Duration) { - new_params.replace(p.key, p['value'].toString().replaceFirst(/d(?!\S)/, "day")) - } - // Cast LinkedHashMap to String - if (p['value'].getClass() == LinkedHashMap) { - new_params.replace(p.key, p['value'].toString()) - } - } - return new_params - } - - // - // This function tries to read a JSON params file - // - private static LinkedHashMap paramsLoad(String json_schema) { - def params_map = new LinkedHashMap() - try { - params_map = paramsRead(json_schema) - } catch (Exception e) { - println "Could not read parameters settings from JSON. $e" - params_map = new LinkedHashMap() - } - return params_map - } - - // - // Method to actually read in JSON file using Groovy. - // Group (as Key), values are all parameters - // - Parameter1 as Key, Description as Value - // - Parameter2 as Key, Description as Value - // .... - // Group - // - - private static LinkedHashMap paramsRead(String json_schema) throws Exception { - def json = new File(json_schema).text - def Map schema_definitions = (Map) new JsonSlurper().parseText(json).get('definitions') - def Map schema_properties = (Map) new JsonSlurper().parseText(json).get('properties') - /* Tree looks like this in nf-core schema - * definitions <- this is what the first get('definitions') gets us - group 1 - title - description - properties - parameter 1 - type - description - parameter 2 - type - description - group 2 - title - description - properties - parameter 1 - type - description - * properties <- parameters can also be ungrouped, outside of definitions - parameter 1 - type - description - */ - - // Grouped params - def params_map = new LinkedHashMap() - schema_definitions.each { key, val -> - def Map group = schema_definitions."$key".properties // Gets the property object of the group - def title = schema_definitions."$key".title - def sub_params = new LinkedHashMap() - group.each { innerkey, value -> - sub_params.put(innerkey, value) - } - params_map.put(title, sub_params) - } - - // Ungrouped params - def ungrouped_params = new LinkedHashMap() - schema_properties.each { innerkey, value -> - ungrouped_params.put(innerkey, value) - } - params_map.put("Other parameters", ungrouped_params) - - return params_map - } - - // - // Get maximum number of characters across all parameter names - // - private static Integer paramsMaxChars(params_map) { - Integer max_chars = 0 - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (param.size() > max_chars) { - max_chars = param.size() - } - } - } - return max_chars - } -} diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy deleted file mode 100755 index 2fc0a9b9..00000000 --- a/lib/NfcoreTemplate.groovy +++ /dev/null @@ -1,258 +0,0 @@ -// -// This file holds several functions used within the nf-core pipeline template. -// - -import org.yaml.snakeyaml.Yaml - -class NfcoreTemplate { - - // - // Check AWS Batch related parameters have been specified correctly - // - public static void awsBatch(workflow, params) { - if (workflow.profile.contains('awsbatch')) { - // Check params.awsqueue and params.awsregion have been set if running on AWSBatch - assert (params.awsqueue && params.awsregion) : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" - // Check outdir paths to be S3 buckets if running on AWSBatch - assert params.outdir.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" - } - } - - // - // Warn if a -profile or Nextflow config has not been provided to run the pipeline - // - public static void checkConfigProvided(workflow, log) { - if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { - log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + - "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + - " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + - " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + - " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + - "Please refer to the quick start section and usage docs for the pipeline.\n " - } - } - - // - // Construct and send completion email - // - public static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[]) { - - // Set up the e-mail variables - def subject = "[$workflow.manifest.name] Successful: $workflow.runName" - if (!workflow.success) { - subject = "[$workflow.manifest.name] FAILED: $workflow.runName" - } - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['Date Started'] = workflow.start - misc_fields['Date Completed'] = workflow.complete - misc_fields['Pipeline script file path'] = workflow.scriptFile - misc_fields['Pipeline script hash ID'] = workflow.scriptId - if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository - if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId - if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision - misc_fields['Nextflow Version'] = workflow.nextflow.version - misc_fields['Nextflow Build'] = workflow.nextflow.build - misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - - def email_fields = [:] - email_fields['version'] = workflow.manifest.version - email_fields['runName'] = workflow.runName - email_fields['success'] = workflow.success - email_fields['dateComplete'] = workflow.complete - email_fields['duration'] = workflow.duration - email_fields['exitStatus'] = workflow.exitStatus - email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - email_fields['errorReport'] = (workflow.errorReport ?: 'None') - email_fields['commandLine'] = workflow.commandLine - email_fields['projectDir'] = workflow.projectDir - email_fields['summary'] = summary << misc_fields - - // On success try attach the multiqc report - def mqc_report = null - try { - if (workflow.success) { - mqc_report = multiqc_report.getVal() - if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { - if (mqc_report.size() > 1) { - log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" - } - mqc_report = mqc_report[0] - } - } - } catch (all) { - if (multiqc_report) { - log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" - } - } - - // Check if we are only sending emails on failure - def email_address = params.email - if (!params.email && params.email_on_fail && !workflow.success) { - email_address = params.email_on_fail - } - - // Render the TXT template - def engine = new groovy.text.GStringTemplateEngine() - def tf = new File("$projectDir/assets/email_template.txt") - def txt_template = engine.createTemplate(tf).make(email_fields) - def email_txt = txt_template.toString() - - // Render the HTML template - def hf = new File("$projectDir/assets/email_template.html") - def html_template = engine.createTemplate(hf).make(email_fields) - def email_html = html_template.toString() - - // Render the sendmail template - def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit - def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] - def sf = new File("$projectDir/assets/sendmail_template.txt") - def sendmail_template = engine.createTemplate(sf).make(smail_fields) - def sendmail_html = sendmail_template.toString() - - // Send the HTML e-mail - Map colors = logColours(params.monochrome_logs) - if (email_address) { - try { - if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } - // Try to send HTML e-mail using sendmail - [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" - } catch (all) { - // Catch failures and try with plaintext - def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) { - mail_cmd += [ '-A', mqc_report ] - } - mail_cmd.execute() << email_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" - } - } - - // Write summary e-mail HTML to a file - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def output_hf = new File(output_d, "pipeline_report.html") - output_hf.withWriter { w -> w << email_html } - def output_tf = new File(output_d, "pipeline_report.txt") - output_tf.withWriter { w -> w << email_txt } - } - - // - // Print pipeline summary on completion - // - public static void summary(workflow, params, log) { - Map colors = logColours(params.monochrome_logs) - if (workflow.success) { - if (workflow.stats.ignoredCount == 0) { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" - } - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" - } - } - - // - // ANSII Colours used for terminal logging - // - public static Map logColours(Boolean monochrome_logs) { - Map colorcodes = [:] - - // Reset / Meta - colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" - colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" - colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" - colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" - colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" - colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" - colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" - - // Regular Colors - colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" - colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" - colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" - colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" - colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" - colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" - colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" - colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" - - // Bold - colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" - colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" - colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" - colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" - colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" - colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" - colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" - colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" - - // Underline - colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" - colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" - colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" - colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" - colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" - colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" - colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" - colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" - - // High Intensity - colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" - colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" - colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" - colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" - colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" - colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" - colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" - colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" - - // Bold High Intensity - colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" - colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" - colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" - colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" - colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" - colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" - colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" - colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" - - return colorcodes - } - - // - // Does what is says on the tin - // - public static String dashedLine(monochrome_logs) { - Map colors = logColours(monochrome_logs) - return "-${colors.dim}----------------------------------------------------${colors.reset}-" - } - - // - // nf-core logo - // - public static String logo(workflow, monochrome_logs) { - Map colors = logColours(monochrome_logs) - String.format( - """\n - ${dashedLine(monochrome_logs)} - ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} - ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} - ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} - ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} - ${colors.green}`._,._,\'${colors.reset} - ${colors.purple} ${workflow.manifest.name} v${workflow.manifest.version}${colors.reset} - ${dashedLine(monochrome_logs)} - """.stripIndent() - ) - } -} diff --git a/lib/Utils.groovy b/lib/Utils.groovy deleted file mode 100755 index 28567bd7..00000000 --- a/lib/Utils.groovy +++ /dev/null @@ -1,40 +0,0 @@ -// -// This file holds several Groovy functions that could be useful for any Nextflow pipeline -// - -import org.yaml.snakeyaml.Yaml - -class Utils { - - // - // When running with -profile conda, warn if channels have not been set-up appropriately - // - public static void checkCondaChannels(log) { - Yaml parser = new Yaml() - def channels = [] - try { - def config = parser.load("conda config --show channels".execute().text) - channels = config.channels - } catch(NullPointerException | IOException e) { - log.warn "Could not verify conda channel configuration." - return - } - - // Check that all channels are present - def required_channels = ['conda-forge', 'bioconda', 'defaults'] - def conda_check_failed = !required_channels.every { ch -> ch in channels } - - // Check that they are in the right order - conda_check_failed |= !(channels.indexOf('conda-forge') < channels.indexOf('bioconda')) - conda_check_failed |= !(channels.indexOf('bioconda') < channels.indexOf('defaults')) - - if (conda_check_failed) { - log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " There is a problem with your Conda configuration!\n\n" + - " You will need to set-up the conda-forge and bioconda channels correctly.\n" + - " Please refer to https://bioconda.github.io/user/install.html#set-up-channels\n" + - " NB: The order of the channels matters!\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - } - } -} diff --git a/lib/WorkflowChipseq.groovy b/lib/WorkflowChipseq.groovy deleted file mode 100755 index dfae806f..00000000 --- a/lib/WorkflowChipseq.groovy +++ /dev/null @@ -1,105 +0,0 @@ -// -// This file holds several functions specific to the workflow/chipseq.nf in the nf-core/chipseq pipeline -// - -class WorkflowChipseq { - - // - // Check and validate parameters - // - public static void initialise(params, log, valid_params) { - genomeExistsError(params, log) - - - if (!params.fasta) { - log.error "Genome fasta file not specified with e.g. '--fasta' or via a detectable config file." - System.exit(1) - } - - if (!params.gtf && !params.gff) { - log.error "No GTF or GFF3 annotation specified! The pipeline requires at least one of these files." - System.exit(1) - } - - if (params.gtf && params.gff) { - gtfGffWarn(log) - } - - if (!params.macs_gsize) { - macsGsizeWarn(log) - } - - if (!params.read_length && !params.macs_gsize) { - log.error "Both '--read_length' and '--macs_gsize' not specified! Please specify either to infer MACS2 genome size for peak calling." - System.exit(1) - } - - if (params.aligner) { - if (!valid_params['aligners'].contains(params.aligner)) { - log.error "Invalid option: '${params.aligner}'. Valid options for '--aligner': ${valid_params['aligners'].join(', ')}." - System.exit(1) - } - } - } - - // - // Get workflow summary for MultiQC - // - public static String paramsSummaryMultiqc(workflow, summary) { - String summary_section = '' - for (group in summary.keySet()) { - def group_params = summary.get(group) // This gets the parameters of that particular group - if (group_params) { - summary_section += "

$group

\n" - summary_section += "
\n" - for (param in group_params.keySet()) { - summary_section += "
$param
${group_params.get(param) ?: 'N/A'}
\n" - } - summary_section += "
\n" - } - } - - String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" - yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" - yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" - yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" - yaml_file_text += "plot_type: 'html'\n" - yaml_file_text += "data: |\n" - yaml_file_text += "${summary_section}" - return yaml_file_text - }// - // Exit pipeline if incorrect --genome key provided - // - private static void genomeExistsError(params, log) { - if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + - " Currently, the available genome keys are:\n" + - " ${params.genomes.keySet().join(", ")}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - System.exit(1) - } - } - - // - // Print a warning if both GTF and GFF have been provided - // - private static void gtfGffWarn(log) { - log.warn "=============================================================================\n" + - " Both '--gtf' and '--gff' parameters have been provided.\n" + - " Using GTF file as priority.\n" + - "===================================================================================" - } - - // - // Print a warning if macs_gsize parameter has not been provided - // - private static void macsGsizeWarn(log) { - log.warn "=============================================================================\n" + - " --macs_gsize parameter has not been provided.\n" + - " It will be auto-calculated by 'khmer unique-kmers.py' using the '--read_length' parameter.\n" + - " Explicitly provide '--macs_gsize' to change this behaviour.\n" + - "===================================================================================" - } - -} diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy deleted file mode 100755 index 9eaea561..00000000 --- a/lib/WorkflowMain.groovy +++ /dev/null @@ -1,107 +0,0 @@ -// -// This file holds several functions specific to the main.nf workflow in the nf-core/chipseq pipeline -// - -class WorkflowMain { - - // - // Citation string for pipeline - // - public static String citation(workflow) { - return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + - "* The pipeline\n" + - " https://doi.org/10.5281/zenodo.3240506\n\n" + - "* The nf-core framework\n" + - " https://doi.org/10.1038/s41587-020-0439-x\n\n" + - "* Software dependencies\n" + - " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" - } - - // - // Print help to screen if required - // - public static String help(workflow, params, log) { - def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --outdir --genome GRCh37 -profile docker" - def help_string = '' - help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs) - help_string += NfcoreSchema.paramsHelp(workflow, params, command) - help_string += '\n' + citation(workflow) + '\n' - help_string += NfcoreTemplate.dashedLine(params.monochrome_logs) - return help_string - } - - // - // Print parameter summary log to screen - // - public static String paramsSummaryLog(workflow, params, log) { - def summary_log = '' - summary_log += NfcoreTemplate.logo(workflow, params.monochrome_logs) - summary_log += NfcoreSchema.paramsSummaryLog(workflow, params) - summary_log += '\n' + citation(workflow) + '\n' - summary_log += NfcoreTemplate.dashedLine(params.monochrome_logs) - return summary_log - } - - // - // Validate parameters and print summary to screen - // - public static void initialise(workflow, params, log) { - // Print help to screen if required - if (params.help) { - log.info help(workflow, params, log) - System.exit(0) - } - - // Validate workflow parameters via the JSON schema - if (params.validate_params) { - NfcoreSchema.validateParameters(workflow, params, log) - } - - // Print parameter summary log to screen - - log.info paramsSummaryLog(workflow, params, log) - - // Check that a -profile or Nextflow config has been provided to run the pipeline - NfcoreTemplate.checkConfigProvided(workflow, log) - - // Check that conda channels are set-up correctly - if (params.enable_conda) { - Utils.checkCondaChannels(log) - } - - // Check AWS batch settings - NfcoreTemplate.awsBatch(workflow, params) - - // Check input has been provided - if (!params.input) { - log.error "Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'" - System.exit(1) - } - } - // - // Get attribute from genome config file e.g. fasta - // - public static Object getGenomeAttribute(params, attribute) { - if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { - if (params.genomes[ params.genome ].containsKey(attribute)) { - return params.genomes[ params.genome ][ attribute ] - } - } - return null - } - - // - // Get macs genome size (macs_gsize) - // - public static Long getMacsGsize(params) { - def val = null - if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { - if (params.genomes[ params.genome ].containsKey('macs_gsize')) { - if (params.genomes[ params.genome ][ 'macs_gsize' ].containsKey(params.read_length.toString())) { - val = params.genomes[ params.genome ][ 'macs_gsize' ][ params.read_length.toString() ] - } - } - } - return val - } -} diff --git a/lib/nfcore_external_java_deps.jar b/lib/nfcore_external_java_deps.jar deleted file mode 100644 index 805c8bb5..00000000 Binary files a/lib/nfcore_external_java_deps.jar and /dev/null differ diff --git a/main.nf b/main.nf index 9baf7cb8..af328320 100755 --- a/main.nf +++ b/main.nf @@ -17,52 +17,157 @@ nextflow.enable.dsl = 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') -params.bwa_index = WorkflowMain.getGenomeAttribute(params, 'bwa') -params.bowtie2_index = WorkflowMain.getGenomeAttribute(params, 'bowtie2') -params.chromap_index = WorkflowMain.getGenomeAttribute(params, 'chromap') -params.star_index = WorkflowMain.getGenomeAttribute(params, 'star') -params.gtf = WorkflowMain.getGenomeAttribute(params, 'gtf') -params.gff = WorkflowMain.getGenomeAttribute(params, 'gff') -params.gene_bed = WorkflowMain.getGenomeAttribute(params, 'gene_bed') -params.blacklist = WorkflowMain.getGenomeAttribute(params, 'blacklist') -params.macs_gsize = WorkflowMain.getMacsGsize(params) +params.fasta = getGenomeAttribute('fasta') +params.bwa_index = getGenomeAttribute('bwa') +params.bowtie2_index = getGenomeAttribute('bowtie2') +params.chromap_index = getGenomeAttribute('chromap') +params.star_index = getGenomeAttribute('star') +params.gtf = getGenomeAttribute('gtf') +params.gff = getGenomeAttribute('gff') +params.gene_bed = getGenomeAttribute('gene_bed') +params.blacklist = getGenomeAttribute('blacklist') +params.macs_gsize = getMacsGsize(params) /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE & PRINT PARAMETER SUMMARY + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - -WorkflowMain.initialise(workflow, params, log) +include { CHIPSEQ } from './workflows/chipseq' +include { PREPARE_GENOME } from './subworkflows/local/prepare_genome' +include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_chipseq_pipeline' +include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_chipseq_pipeline' +// include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_chipseq_pipeline' +// include { getMacsGsize } from './subworkflows/local/utils_nfcore_chipseq_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - NAMED WORKFLOW FOR PIPELINE + NAMED WORKFLOWS FOR PIPELINE ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { CHIPSEQ } from './workflows/chipseq' - // -// WORKFLOW: Run main nf-core/chipseq analysis pipeline +// WORKFLOW: Run main analysis pipeline depending on type of input // workflow NFCORE_CHIPSEQ { - CHIPSEQ () + + main: + ch_versions = Channel.empty() + + // SUBWORKFLOW: Prepare genome files + PREPARE_GENOME ( + params.genome, + params.genomes, + params.aligner, + params.fasta, + params.gtf, + params.gff, + params.blacklist, + params.gene_bed, + params.bwa_index, + params.bowtie2_index, + params.chromap_index, + params.star_index, + ) + ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions) + + // + // WORKFLOW: Run nf-core/chipseq workflow + // + ch_input = Channel.value(file(params.input, checkIfExists: true)) + + CHIPSEQ( + ch_input, + ch_versions, + PREPARE_GENOME.out.fasta, + PREPARE_GENOME.out.fai, + PREPARE_GENOME.out.gtf, + PREPARE_GENOME.out.gene_bed, + PREPARE_GENOME.out.chrom_sizes, + PREPARE_GENOME.out.filtered_bed, + PREPARE_GENOME.out.bwa_index, + PREPARE_GENOME.out.bowtie2_index, + PREPARE_GENOME.out.chromap_index, + PREPARE_GENOME.out.star_index + ) + + emit: + multiqc_report = CHIPSEQ.out.multiqc_report // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [version1, version2, ...] } /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN ALL WORKFLOWS + RUN MAIN WORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// -// WORKFLOW: Execute a single named workflow for the pipeline -// See: https://github.com/nf-core/rnaseq/issues/619 -// workflow { + + main: + + // + // SUBWORKFLOW: Run initialisation tasks + // + PIPELINE_INITIALISATION ( + params.version, + params.help, + params.validate_params, + params.monochrome_logs, + args, + params.outdir + ) + + // + // WORKFLOW: Run main workflow + // NFCORE_CHIPSEQ () + + // + // SUBWORKFLOW: Run completion tasks + // + PIPELINE_COMPLETION ( + params.email, + params.email_on_fail, + params.plaintext_email, + params.outdir, + params.monochrome_logs, + params.hook_url, + NFCORE_CHIPSEQ.out.multiqc_report + ) +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// Get attribute from genome config file e.g. fasta +// +def getGenomeAttribute(attribute) { + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + if (params.genomes[ params.genome ].containsKey(attribute)) { + return params.genomes[ params.genome ][ attribute ] + } + } + return null +} + +// +// Get macs genome size (macs_gsize) +// +def getMacsGsize(params) { + def val = null + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + if (params.genomes[ params.genome ].containsKey('macs_gsize')) { + if (params.genomes[ params.genome ][ 'macs_gsize' ].containsKey(params.read_length.toString())) { + val = params.genomes[ params.genome ][ 'macs_gsize' ][ params.read_length.toString() ] + } + } + } + return val } /* diff --git a/modules.json b/modules.json index 81c94b44..e8c2f7aa 100644 --- a/modules.json +++ b/modules.json @@ -2,136 +2,233 @@ "name": "nf-core/chipseq", "homePage": "https://github.com/nf-core/chipseq", "repos": { - "nf-core/modules": { - "git_url": "https://github.com/nf-core/modules.git", + "https://github.com/nf-core/modules.git": { "modules": { - "bowtie2/align": { - "branch": "master", - "git_sha": "848ee9a215d02d80be033bfa60881700f2bd914c" - }, - "bowtie2/build": { - "branch": "master", - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "bwa/index": { - "branch": "master", - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "bwa/mem": { - "branch": "master", - "git_sha": "4f5274c3de0c9521f5033893ff61057a74c45ba9" - }, - "chromap/chromap": { - "branch": "master", - "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" - }, - "chromap/index": { - "branch": "master", - "git_sha": "dbb46c9b635080b132bab4b8d5b9a14f0d1c22e7" - }, - "custom/dumpsoftwareversions": { - "branch": "master", - "git_sha": "e5b44499efcf6f7fb24874886bac60591c5d94dd" - }, - "custom/getchromsizes": { - "branch": "master", - "git_sha": "213403187932dbbdd936a04474cc8cd8abae7a08" - }, - "deeptools/computematrix": { - "branch": "master", - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "deeptools/plotfingerprint": { - "branch": "master", - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "deeptools/plotheatmap": { - "branch": "master", - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "deeptools/plotprofile": { - "branch": "master", - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "fastqc": { - "branch": "master", - "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe" - }, - "gffread": { - "branch": "master", - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "gunzip": { - "branch": "master", - "git_sha": "fa37e0662690c4ec4260dae282fbce08777503e6" - }, - "homer/annotatepeaks": { - "branch": "master", - "git_sha": "233fa70811a03a4cecb2ece483b5c8396e2cee1d" - }, - "khmer/uniquekmers": { - "branch": "master", - "git_sha": "82fdff4fb4ce6cafcc028a7503da835427f35352" - }, - "macs2/callpeak": { - "branch": "master", - "git_sha": "f0800157544a82ae222931764483331a81812012" - }, - "phantompeakqualtools": { - "branch": "master", - "git_sha": "233fa70811a03a4cecb2ece483b5c8396e2cee1d" - }, - "picard/collectmultiplemetrics": { - "branch": "master", - "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" - }, - "picard/markduplicates": { - "branch": "master", - "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" - }, - "picard/mergesamfiles": { - "branch": "master", - "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" - }, - "preseq/lcextrap": { - "branch": "master", - "git_sha": "7111e571cc5b6069de4673cd6165af680f17b4d7" - }, - "samtools/flagstat": { - "branch": "master", - "git_sha": "ecece498f10b47b7c9d06f53a310cea5811b4c5f" - }, - "samtools/idxstats": { - "branch": "master", - "git_sha": "ecece498f10b47b7c9d06f53a310cea5811b4c5f" - }, - "samtools/index": { - "branch": "master", - "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" - }, - "samtools/sort": { - "branch": "master", - "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" - }, - "samtools/stats": { - "branch": "master", - "git_sha": "f48a24770e24358e58de66e9b805a70d77cd154b" - }, - "subread/featurecounts": { - "branch": "master", - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "trimgalore": { - "branch": "master", - "git_sha": "85ec13ff1fc2196c5a507ea497de468101baabed" - }, - "ucsc/bedgraphtobigwig": { - "branch": "master", - "git_sha": "233fa70811a03a4cecb2ece483b5c8396e2cee1d" - }, - "untar": { - "branch": "master", - "git_sha": "51be617b1ca9bff973655eb899d591ed6ab253b5" + "nf-core": { + "bowtie2/align": { + "branch": "master", + "git_sha": "3c77ca9aac783e76c3614a06db3bfe4fef619bde", + "installed_by": ["fastq_align_bowtie2", "modules"] + }, + "bowtie2/build": { + "branch": "master", + "git_sha": "6a24fbe314bb2e6fe6306c29a63076ea87e8eb3c", + "installed_by": ["modules"] + }, + "bwa/index": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "bwa/mem": { + "branch": "master", + "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", + "installed_by": ["fastq_align_bwa", "modules"] + }, + "chromap/chromap": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["fastq_align_chromap", "modules"] + }, + "chromap/index": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "custom/getchromsizes": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "deeptools/computematrix": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "deeptools/plotfingerprint": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "deeptools/plotheatmap": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "deeptools/plotprofile": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "fastqc": { + "branch": "master", + "git_sha": "285a50500f9e02578d90b3ce6382ea3c30216acd", + "installed_by": ["fastq_fastqc_umitools_trimgalore", "modules"] + }, + "gffread": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "gunzip": { + "branch": "master", + "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", + "installed_by": ["modules"] + }, + "homer/annotatepeaks": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "khmer/uniquekmers": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "macs3/callpeak": { + "branch": "master", + "git_sha": "3a3f471ed42f640115b2661edee5b258e6a254c1", + "installed_by": ["modules"] + }, + "multiqc": { + "branch": "master", + "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", + "installed_by": ["modules"] + }, + "phantompeakqualtools": { + "branch": "master", + "git_sha": "2dfe9afa90fefc70e320140e5f41287f01f324b0", + "installed_by": ["modules"] + }, + "picard/collectmultiplemetrics": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "picard/markduplicates": { + "branch": "master", + "git_sha": "ec833ac4c29db6005d18baccf3306f557c46b006", + "installed_by": ["bam_markduplicates_picard", "modules"] + }, + "picard/mergesamfiles": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "preseq/lcextrap": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "samtools/flagstat": { + "branch": "master", + "git_sha": "ce0b1aed7d504883061e748f492a31bf44c5777c", + "installed_by": ["bam_stats_samtools", "modules"] + }, + "samtools/idxstats": { + "branch": "master", + "git_sha": "ce0b1aed7d504883061e748f492a31bf44c5777c", + "installed_by": ["bam_stats_samtools", "modules"] + }, + "samtools/index": { + "branch": "master", + "git_sha": "ce0b1aed7d504883061e748f492a31bf44c5777c", + "installed_by": ["bam_markduplicates_picard", "bam_sort_stats_samtools", "modules"] + }, + "samtools/sort": { + "branch": "master", + "git_sha": "ce0b1aed7d504883061e748f492a31bf44c5777c", + "installed_by": ["bam_sort_stats_samtools", "modules"] + }, + "samtools/stats": { + "branch": "master", + "git_sha": "ec833ac4c29db6005d18baccf3306f557c46b006", + "installed_by": ["bam_stats_samtools", "modules"] + }, + "subread/featurecounts": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "trimgalore": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["fastq_fastqc_umitools_trimgalore", "modules"] + }, + "ucsc/bedgraphtobigwig": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "umitools/extract": { + "branch": "master", + "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53", + "installed_by": ["fastq_fastqc_umitools_trimgalore"] + }, + "untar": { + "branch": "master", + "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", + "installed_by": ["modules"] + }, + "untarfiles": { + "branch": "master", + "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", + "installed_by": ["modules"] + } + } + }, + "subworkflows": { + "nf-core": { + "bam_markduplicates_picard": { + "branch": "master", + "git_sha": "0c38be7e652a0b2f3a37681ee4c0dbdf85677647", + "installed_by": ["subworkflows"] + }, + "bam_sort_stats_samtools": { + "branch": "master", + "git_sha": "0c38be7e652a0b2f3a37681ee4c0dbdf85677647", + "installed_by": ["fastq_align_bowtie2", "fastq_align_bwa", "fastq_align_chromap"] + }, + "bam_stats_samtools": { + "branch": "master", + "git_sha": "0c38be7e652a0b2f3a37681ee4c0dbdf85677647", + "installed_by": ["bam_markduplicates_picard", "bam_sort_stats_samtools"] + }, + "fastq_align_bowtie2": { + "branch": "master", + "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", + "installed_by": ["subworkflows"] + }, + "fastq_align_bwa": { + "branch": "master", + "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", + "installed_by": ["subworkflows"] + }, + "fastq_align_chromap": { + "branch": "master", + "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", + "installed_by": ["subworkflows"] + }, + "fastq_fastqc_umitools_trimgalore": { + "branch": "master", + "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", + "installed_by": ["subworkflows"] + }, + "utils_nextflow_pipeline": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] + }, + "utils_nfcore_pipeline": { + "branch": "master", + "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", + "installed_by": ["subworkflows"] + }, + "utils_nfvalidation_plugin": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] + } } } } diff --git a/modules/local/annotate_boolean_peaks.nf b/modules/local/annotate_boolean_peaks.nf index ab2dfbed..77baf5c4 100644 --- a/modules/local/annotate_boolean_peaks.nf +++ b/modules/local/annotate_boolean_peaks.nf @@ -2,10 +2,10 @@ process ANNOTATE_BOOLEAN_PEAKS { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "conda-forge::sed=4.7" : null) + conda "conda-forge::sed=4.7" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'ubuntu:20.04' }" + 'docker.io/library/ubuntu:20.04' }" input: tuple val(meta), path(boolean_txt), path(homer_peaks) diff --git a/modules/local/bam_remove_orphans.nf b/modules/local/bam_remove_orphans.nf index 21ffc73a..2bfb7dfd 100644 --- a/modules/local/bam_remove_orphans.nf +++ b/modules/local/bam_remove_orphans.nf @@ -5,10 +5,10 @@ process BAM_REMOVE_ORPHANS { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::pysam=0.19.0 bioconda::samtools=1.15.1" : null) + conda "bioconda::pysam=0.19.0 bioconda::samtools=1.15.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-57736af1eb98c01010848572c9fec9fff6ffaafd:402e865b8f6af2f3e58c6fc8d57127ff0144b2c7-0' : - 'quay.io/biocontainers/mulled-v2-57736af1eb98c01010848572c9fec9fff6ffaafd:402e865b8f6af2f3e58c6fc8d57127ff0144b2c7-0' }" + 'biocontainers/mulled-v2-57736af1eb98c01010848572c9fec9fff6ffaafd:402e865b8f6af2f3e58c6fc8d57127ff0144b2c7-0' }" input: tuple val(meta), path(bam) @@ -17,6 +17,9 @@ process BAM_REMOVE_ORPHANS { tuple val(meta), path("${prefix}.bam"), emit: bam path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: // This script is bundled with the pipeline, in nf-core/chipseq/bin/ def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" diff --git a/modules/local/bam_filter.nf b/modules/local/bamtools_filter.nf similarity index 54% rename from modules/local/bam_filter.nf rename to modules/local/bamtools_filter.nf index ff07c084..a1e8feed 100644 --- a/modules/local/bam_filter.nf +++ b/modules/local/bamtools_filter.nf @@ -1,14 +1,11 @@ -/* - * Filter BAM file - */ -process BAM_FILTER { +process BAMTOOLS_FILTER { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::bamtools=2.5.2 bioconda::samtools=1.15.1" : null) + conda "bioconda::bamtools=2.5.2 bioconda::samtools=1.15.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-0560a8046fc82aa4338588eca29ff18edab2c5aa:5687a7da26983502d0a8a9a6b05ed727c740ddc4-0' : - 'quay.io/biocontainers/mulled-v2-0560a8046fc82aa4338588eca29ff18edab2c5aa:5687a7da26983502d0a8a9a6b05ed727c740ddc4-0' }" + 'biocontainers/mulled-v2-0560a8046fc82aa4338588eca29ff18edab2c5aa:5687a7da26983502d0a8a9a6b05ed727c740ddc4-0' }" input: tuple val(meta), path(bam), path(bai) @@ -20,19 +17,18 @@ process BAM_FILTER { tuple val(meta), path("*.bam"), emit: bam path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: - def prefix = task.ext.prefix ?: "${meta.id}" - def filter_params = meta.single_end ? '-F 0x004' : '-F 0x004 -F 0x0008 -f 0x001' - def dup_params = params.keep_dups ? '' : '-F 0x0400' - def multimap_params = params.keep_multi_map ? '' : '-q 1' - def blacklist_params = params.blacklist ? "-L $bed" : '' - def config = meta.single_end ? bamtools_filter_se_config : bamtools_filter_pe_config + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def blacklist = bed ? "-L $bed" : '' + def config = meta.single_end ? bamtools_filter_se_config : bamtools_filter_pe_config """ samtools view \\ - $filter_params \\ - $dup_params \\ - $multimap_params \\ - $blacklist_params \\ + $args \\ + $blacklist \\ -b $bam \\ | bamtools filter \\ -out ${prefix}.bam \\ diff --git a/modules/local/bedtools_genomecov.nf b/modules/local/bedtools_genomecov.nf index e8cbb3f7..fb274631 100644 --- a/modules/local/bedtools_genomecov.nf +++ b/modules/local/bedtools_genomecov.nf @@ -2,10 +2,10 @@ process BEDTOOLS_GENOMECOV { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::bedtools=2.30.0" : null) + conda "bioconda::bedtools=2.30.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0': - 'quay.io/biocontainers/bedtools:2.30.0--hc088bd4_0' }" + 'biocontainers/bedtools:2.30.0--hc088bd4_0' }" input: tuple val(meta), path(bam), path(flagstat) @@ -15,11 +15,13 @@ process BEDTOOLS_GENOMECOV { tuple val(meta), path("*.txt") , emit: scale_factor path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: + def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def pe = meta.single_end ? '' : '-pc' - def extend = (meta.single_end && params.fragment_size > 0) ? "-fs ${params.fragment_size}" : '' """ SCALE_FACTOR=\$(grep '[0-9] mapped (' $flagstat | awk '{print 1000000/\$1}') echo \$SCALE_FACTOR > ${prefix}.scale_factor.txt @@ -30,7 +32,7 @@ process BEDTOOLS_GENOMECOV { -bg \\ -scale \$SCALE_FACTOR \\ $pe \\ - $extend \\ + $args \\ | sort -T '.' -k1,1 -k2,2n > ${prefix}.bedGraph cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/deseq2_qc.nf b/modules/local/deseq2_qc.nf index 84ff0d5f..14b57f69 100644 --- a/modules/local/deseq2_qc.nf +++ b/modules/local/deseq2_qc.nf @@ -4,10 +4,10 @@ process DESEQ2_QC { // (Bio)conda packages have intentionally not been pinned to a specific version // This was to avoid the pipeline failing due to package conflicts whilst creating the environment when using -profile conda - conda (params.enable_conda ? "conda-forge::r-base bioconda::bioconductor-deseq2 bioconda::bioconductor-biocparallel bioconda::bioconductor-tximport bioconda::bioconductor-complexheatmap conda-forge::r-optparse conda-forge::r-ggplot2 conda-forge::r-rcolorbrewer conda-forge::r-pheatmap" : null) + conda "conda-forge::r-base bioconda::bioconductor-deseq2 bioconda::bioconductor-biocparallel bioconda::bioconductor-tximport bioconda::bioconductor-complexheatmap conda-forge::r-optparse conda-forge::r-ggplot2 conda-forge::r-rcolorbrewer conda-forge::r-pheatmap" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-8849acf39a43cdd6c839a369a74c0adc823e2f91:ab110436faf952a33575c64dd74615a84011450b-0' : - 'quay.io/biocontainers/mulled-v2-8849acf39a43cdd6c839a369a74c0adc823e2f91:ab110436faf952a33575c64dd74615a84011450b-0' }" + 'biocontainers/mulled-v2-8849acf39a43cdd6c839a369a74c0adc823e2f91:ab110436faf952a33575c64dd74615a84011450b-0' }" input: tuple val(meta), path(counts) @@ -26,10 +26,12 @@ process DESEQ2_QC { path "size_factors" , optional:true, emit: size_factors path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: - def args = task.ext.args ?: '' - def peak_type = params.narrow_peak ? 'narrowPeak' : 'broadPeak' - def prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" """ deseq2_qc.r \\ --count_file $counts \\ diff --git a/modules/local/frip_score.nf b/modules/local/frip_score.nf index 337b18c5..77f856bc 100644 --- a/modules/local/frip_score.nf +++ b/modules/local/frip_score.nf @@ -2,10 +2,10 @@ process FRIP_SCORE { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::bedtools=2.30.0 bioconda::samtools=1.15.1" : null) + conda "bioconda::bedtools=2.30.0 bioconda::samtools=1.15.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-8186960447c5cb2faa697666dc1e6d919ad23f3e:3127fcae6b6bdaf8181e21a26ae61231030a9fcb-0': - 'quay.io/biocontainers/mulled-v2-8186960447c5cb2faa697666dc1e6d919ad23f3e:3127fcae6b6bdaf8181e21a26ae61231030a9fcb-0' }" + 'biocontainers/mulled-v2-8186960447c5cb2faa697666dc1e6d919ad23f3e:3127fcae6b6bdaf8181e21a26ae61231030a9fcb-0' }" input: tuple val(meta), path(bam), path(peak) @@ -14,6 +14,9 @@ process FRIP_SCORE { tuple val(meta), path("*.txt"), emit: txt path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" diff --git a/modules/local/genome_blacklist_regions.nf b/modules/local/genome_blacklist_regions.nf index 1a28af0f..c365c8a7 100644 --- a/modules/local/genome_blacklist_regions.nf +++ b/modules/local/genome_blacklist_regions.nf @@ -4,10 +4,10 @@ process GENOME_BLACKLIST_REGIONS { tag "$sizes" - conda (params.enable_conda ? "bioconda::bedtools=2.30.0" : null) + conda "bioconda::bedtools=2.30.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0': - 'quay.io/biocontainers/bedtools:2.30.0--hc088bd4_0' }" + 'biocontainers/bedtools:2.30.0--hc088bd4_0' }" input: path sizes @@ -17,6 +17,9 @@ process GENOME_BLACKLIST_REGIONS { path '*.bed' , emit: bed path "versions.yml", emit: versions + when: + task.ext.when == null || task.ext.when + script: def file_out = "${sizes.simpleName}.include_regions.bed" if (blacklist) { diff --git a/modules/local/gtf2bed.nf b/modules/local/gtf2bed.nf index 1d306cee..bc0eef54 100644 --- a/modules/local/gtf2bed.nf +++ b/modules/local/gtf2bed.nf @@ -2,10 +2,10 @@ process GTF2BED { tag "$gtf" label 'process_low' - conda (params.enable_conda ? "conda-forge::perl=5.26.2" : null) + conda "conda-forge::perl=5.26.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/perl:5.26.2': - 'quay.io/biocontainers/perl:5.26.2' }" + 'biocontainers/perl:5.26.2' }" input: path gtf @@ -14,6 +14,9 @@ process GTF2BED { path '*.bed' , emit: bed path "versions.yml", emit: versions + when: + task.ext.when == null || task.ext.when + script: // This script is bundled with the pipeline, in nf-core/chipseq/bin/ """ gtf2bed \\ diff --git a/modules/local/igv.nf b/modules/local/igv.nf index 213904b3..182a339e 100644 --- a/modules/local/igv.nf +++ b/modules/local/igv.nf @@ -3,27 +3,31 @@ */ process IGV { - conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) + conda "conda-forge::python=3.8.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/python:3.8.3': - 'quay.io/biocontainers/python:3.8.3' }" + 'biocontainers/python:3.8.3' }" input: val aligner_dir val peak_dir path fasta - path ("${aligner_dir}/mergedLibrary/bigwig/*") - path ("${aligner_dir}/mergedLibrary/macs2/${peak_dir}/*") - path ("${aligner_dir}/mergedLibrary/macs2/${peak_dir}/consensus/*") + path ("${aligner_dir}/merged_library/bigwig/*") + path ("${aligner_dir}/merged_library/macs3/${peak_dir}/*") + path ("${aligner_dir}/merged_library/macs3/${peak_dir}/consensus/*") path ("mappings/*") output: path "*files.txt" , emit: txt path "*.xml" , emit: xml + path fasta , emit: fasta path "versions.yml", emit: versions + when: + task.ext.when == null || task.ext.when + script: // scripts are bundled with the pipeline in nf-core/chipseq/bin/ - def consensus_dir = "${aligner_dir}/mergedLibrary/macs2/${peak_dir}/consensus/*" + def consensus_dir = "${aligner_dir}/merged_library/macs3/${peak_dir}/consensus/*" """ find * -type l -name "*.bigWig" -exec echo -e ""{}"\\t0,0,178" \\; > bigwig.igv.txt find * -type l -name "*Peak" -exec echo -e ""{}"\\t0,0,178" \\; > peaks.igv.txt diff --git a/modules/local/macs2_consensus.nf b/modules/local/macs3_consensus.nf similarity index 73% rename from modules/local/macs2_consensus.nf rename to modules/local/macs3_consensus.nf index ab9ef93a..520727a8 100644 --- a/modules/local/macs2_consensus.nf +++ b/modules/local/macs3_consensus.nf @@ -1,17 +1,18 @@ /* * Consensus peaks across samples, create boolean filtering file, SAF file for featureCounts */ -process MACS2_CONSENSUS { +process MACS3_CONSENSUS { tag "$meta.id" label 'process_long' - conda (params.enable_conda ? "conda-forge::biopython conda-forge::r-optparse=1.7.1 conda-forge::r-upsetr=1.4.0 bioconda::bedtools=2.30.0" : null) + conda "conda-forge::biopython conda-forge::r-optparse=1.7.1 conda-forge::r-upsetr=1.4.0 bioconda::bedtools=2.30.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-2f48cc59b03027e31ead6d383fe1b8057785dd24:5d182f583f4696f4c4d9f3be93052811b383341f-0': - 'quay.io/biocontainers/mulled-v2-2f48cc59b03027e31ead6d383fe1b8057785dd24:5d182f583f4696f4c4d9f3be93052811b383341f-0' }" + 'biocontainers/mulled-v2-2f48cc59b03027e31ead6d383fe1b8057785dd24:5d182f583f4696f4c4d9f3be93052811b383341f-0' }" input: tuple val(meta), path(peaks) + val is_narrow_peak output: tuple val(meta), path("*.bed") , emit: bed @@ -26,20 +27,22 @@ process MACS2_CONSENSUS { task.ext.when == null || task.ext.when script: // This script is bundled with the pipeline, in nf-core/chipseq/bin/ - def prefix = task.ext.prefix ?: "${meta.id}" - def peak_type = params.narrow_peak ? 'narrowPeak' : 'broadPeak' - def mergecols = params.narrow_peak ? (2..10).join(',') : (2..9).join(',') - def collapsecols = params.narrow_peak ? (['collapse']*9).join(',') : (['collapse']*8).join(',') - def expandparam = params.narrow_peak ? '--is_narrow_peak' : '' + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def peak_type = is_narrow_peak ? 'narrowPeak' : 'broadPeak' + def mergecols = is_narrow_peak ? (2..10).join(',') : (2..9).join(',') + def collapsecols = is_narrow_peak ? (['collapse']*9).join(',') : (['collapse']*8).join(',') + def expandparam = is_narrow_peak ? '--is_narrow_peak' : '' """ sort -T '.' -k1,1 -k2,2n ${peaks.collect{it.toString()}.sort().join(' ')} \\ | mergeBed -c $mergecols -o $collapsecols > ${prefix}.txt - macs2_merged_expand.py \\ + macs3_merged_expand.py \\ ${prefix}.txt \\ ${peaks.collect{it.toString()}.sort().join(',').replaceAll("_peaks.${peak_type}","")} \\ ${prefix}.boolean.txt \\ --min_replicates $params.min_reps_consensus \\ + $args \\ $expandparam awk -v FS='\t' -v OFS='\t' 'FNR > 1 { print \$1, \$2, \$3, \$4, "0", "+" }' ${prefix}.boolean.txt > ${prefix}.bed diff --git a/modules/local/multiqc.nf b/modules/local/multiqc.nf index 702b239f..db64f9d7 100644 --- a/modules/local/multiqc.nf +++ b/modules/local/multiqc.nf @@ -1,16 +1,16 @@ process MULTIQC { label 'process_medium' - conda (params.enable_conda ? "bioconda::multiqc=1.13a" : null) + conda "bioconda::multiqc=1.13a" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/multiqc:1.13a--pyhdfd78af_1': - 'quay.io/biocontainers/multiqc:1.13a--pyhdfd78af_1' }" + 'biocontainers/multiqc:1.13a--pyhdfd78af_1' }" input: + path workflow_summary path multiqc_config path mqc_custom_config - path software_versions - path workflow_summary + path logo path ('fastqc/*') path ('trimgalore/fastqc/*') @@ -20,15 +20,15 @@ process MULTIQC { path ('alignment/library/*') path ('alignment/library/*') - path ('alignment/mergedLibrary/unfiltered/*') - path ('alignment/mergedLibrary/unfiltered/*') - path ('alignment/mergedLibrary/unfiltered/*') - path ('alignment/mergedLibrary/unfiltered/picard_metrics/*') + path ('alignment/merged_library/unfiltered/*') + path ('alignment/merged_library/unfiltered/*') + path ('alignment/merged_library/unfiltered/*') + path ('alignment/merged_library/unfiltered/picard_metrics/*') - path ('alignment/mergedLibrary/filtered/*') - path ('alignment/mergedLibrary/filtered/*') - path ('alignment/mergedLibrary/filtered/*') - path ('alignment/mergedLibrary/filtered/picard_metrics/*') + path ('alignment/merged_library/filtered/*') + path ('alignment/merged_library/filtered/*') + path ('alignment/merged_library/filtered/*') + path ('alignment/merged_library/filtered/picard_metrics/*') path ('preseq/*') @@ -40,10 +40,10 @@ process MULTIQC { path ('phantompeakqualtools/*') path ('phantompeakqualtools/*') - path ('macs2/peaks/*') - path ('macs2/peaks/*') - path ('macs2/annotation/*') - path ('macs2/featurecounts/*') + path ('macs3/peaks/*') + path ('macs3/peaks/*') + path ('macs3/annotation/*') + path ('macs3/featurecounts/*') path ('deseq2/*') path ('deseq2/*') @@ -54,6 +54,9 @@ process MULTIQC { path "*_plots" , optional:true, emit: plots path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def args = task.ext.args ?: '' def custom_config = params.multiqc_config ? "--config $mqc_custom_config" : '' diff --git a/modules/local/multiqc_custom_peaks.nf b/modules/local/multiqc_custom_peaks.nf index ebef7b13..eb3e5cbb 100644 --- a/modules/local/multiqc_custom_peaks.nf +++ b/modules/local/multiqc_custom_peaks.nf @@ -1,9 +1,10 @@ process MULTIQC_CUSTOM_PEAKS { tag "$meta.id" - conda (params.enable_conda ? "conda-forge::sed=4.7" : null) + + conda "conda-forge::sed=4.7" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'ubuntu:20.04' }" + 'nf-core/ubuntu:20.04' }" input: tuple val(meta), path(peak), path(frip) @@ -13,6 +14,10 @@ process MULTIQC_CUSTOM_PEAKS { output: tuple val(meta), path("*.peak_count_mqc.tsv"), emit: count tuple val(meta), path("*.FRiP_mqc.tsv") , emit: frip + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when script: def prefix = task.ext.prefix ?: "${meta.id}" diff --git a/modules/local/multiqc_custom_phantompeakqualtools.nf b/modules/local/multiqc_custom_phantompeakqualtools.nf index 4878e2c2..7fc74a2e 100644 --- a/modules/local/multiqc_custom_phantompeakqualtools.nf +++ b/modules/local/multiqc_custom_phantompeakqualtools.nf @@ -1,9 +1,9 @@ process MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS { tag "$meta.id" - conda (params.enable_conda ? "conda-forge::r-base=3.5.1" : null) + conda "conda-forge::r-base=4.3.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/r-base:3.5.1': - 'quay.io/biocontainers/r-base:3.5.1' }" + 'oras://community.wave.seqera.io/library/r-base:4.3.3--452dec8277637366': + 'community.wave.seqera.io/library/r-base:4.3.3--14bb33ac537aea22' }" input: tuple val(meta), path(spp), path(rdata) @@ -16,6 +16,9 @@ process MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS { tuple val(meta), path("*.spp_rsc_mqc.tsv") , emit: rsc tuple val(meta), path("*.spp_correlation_mqc.tsv"), emit: correlation + when: + task.ext.when == null || task.ext.when + script: def prefix = task.ext.prefix ?: "${meta.id}" """ diff --git a/modules/local/plot_homer_annotatepeaks.nf b/modules/local/plot_homer_annotatepeaks.nf index 3375349a..ef4b1ce6 100644 --- a/modules/local/plot_homer_annotatepeaks.nf +++ b/modules/local/plot_homer_annotatepeaks.nf @@ -1,10 +1,10 @@ process PLOT_HOMER_ANNOTATEPEAKS { label 'process_medium' - conda (params.enable_conda ? "conda-forge::r-base=4.0.3 conda-forge::r-reshape2=1.4.4 conda-forge::r-optparse=1.6.6 conda-forge::r-ggplot2=3.3.3 conda-forge::r-scales=1.1.1 conda-forge::r-viridis=0.5.1 conda-forge::r-tidyverse=1.3.0 bioconda::bioconductor-biostrings=2.58.0 bioconda::bioconductor-complexheatmap=2.6.2" : null) + conda "conda-forge::r-base=4.0.3 conda-forge::r-reshape2=1.4.4 conda-forge::r-optparse=1.6.6 conda-forge::r-ggplot2=3.3.3 conda-forge::r-scales=1.1.1 conda-forge::r-viridis=0.5.1 conda-forge::r-tidyverse=1.3.0 bioconda::bioconductor-biostrings=2.58.0 bioconda::bioconductor-complexheatmap=2.6.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0': - 'quay.io/biocontainers/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0' }" + 'biocontainers/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0' }" input: path annos @@ -17,6 +17,9 @@ process PLOT_HOMER_ANNOTATEPEAKS { path '*.tsv' , emit: tsv path "versions.yml", emit: versions + when: + task.ext.when == null || task.ext.when + script: // This script is bundled with the pipeline, in nf-core/chipseq/bin/ def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "annotatepeaks" diff --git a/modules/local/plot_macs2_qc.nf b/modules/local/plot_macs3_qc.nf similarity index 58% rename from modules/local/plot_macs2_qc.nf rename to modules/local/plot_macs3_qc.nf index bbbf1ce8..1722cadd 100644 --- a/modules/local/plot_macs2_qc.nf +++ b/modules/local/plot_macs3_qc.nf @@ -1,24 +1,28 @@ -process PLOT_MACS2_QC { +process PLOT_MACS3_QC { label 'process_medium' - conda (params.enable_conda ? "conda-forge::r-base=4.0.3 conda-forge::r-reshape2=1.4.4 conda-forge::r-optparse=1.6.6 conda-forge::r-ggplot2=3.3.3 conda-forge::r-scales=1.1.1 conda-forge::r-viridis=0.5.1 conda-forge::r-tidyverse=1.3.0 bioconda::bioconductor-biostrings=2.58.0 bioconda::bioconductor-complexheatmap=2.6.2" : null) + conda "conda-forge::r-base=4.0.3 conda-forge::r-reshape2=1.4.4 conda-forge::r-optparse=1.6.6 conda-forge::r-ggplot2=3.3.3 conda-forge::r-scales=1.1.1 conda-forge::r-viridis=0.5.1 conda-forge::r-tidyverse=1.3.0 bioconda::bioconductor-biostrings=2.58.0 bioconda::bioconductor-complexheatmap=2.6.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0': - 'quay.io/biocontainers/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0' }" + 'biocontainers/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0' }" input: path peaks + val is_narrow_peak output: path '*.txt' , emit: txt path '*.pdf' , emit: pdf path "versions.yml", emit: versions + when: + task.ext.when == null || task.ext.when + script: // This script is bundled with the pipeline, in nf-core/chipseq/bin/ def args = task.ext.args ?: '' - def peak_type = params.narrow_peak ? 'narrowPeak' : 'broadPeak' + def peak_type = is_narrow_peak ? 'narrowPeak' : 'broadPeak' """ - plot_macs2_qc.r \\ + plot_macs3_qc.r \\ -i ${peaks.join(',')} \\ -s ${peaks.join(',').replaceAll("_peaks.${peak_type}","")} \\ $args diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf index 5094e5a0..216e9fb7 100644 --- a/modules/local/samplesheet_check.nf +++ b/modules/local/samplesheet_check.nf @@ -1,10 +1,11 @@ process SAMPLESHEET_CHECK { tag "$samplesheet" + label 'process_single' - conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) + conda "conda-forge::python=3.8.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/python:3.8.3' : - 'quay.io/biocontainers/python:3.8.3' }" + 'biocontainers/python:3.8.3' }" input: path samplesheet @@ -13,11 +14,15 @@ process SAMPLESHEET_CHECK { path '*.csv' , emit: csv path "versions.yml", emit: versions + when: + task.ext.when == null || task.ext.when + script: // This script is bundled with the pipeline, in nf-core/chipseq/bin/ + def args = task.ext.args ?: '' """ check_samplesheet.py \\ $samplesheet \\ - samplesheet.valid.csv + $args cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/star_align.nf b/modules/local/star_align.nf index f960045f..6a45b731 100644 --- a/modules/local/star_align.nf +++ b/modules/local/star_align.nf @@ -3,14 +3,15 @@ process STAR_ALIGN { label 'process_high' // Note: 2.7X indices incompatible with AWS iGenomes. - conda (params.enable_conda ? "bioconda::star=2.6.1d" : null) + conda "bioconda::star=2.6.1d" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/star:2.6.1d--0' : - 'quay.io/biocontainers/star:2.6.1d--0' }" + 'biocontainers/star:2.6.1d--0' }" input: - tuple val(meta), path(reads) + tuple val(meta) , path(reads) path index + val seq_center output: tuple val(meta), path('*d.out.bam') , emit: bam @@ -25,10 +26,13 @@ process STAR_ALIGN { tuple val(meta), path('*fastq.gz') , optional:true, emit: fastq tuple val(meta), path('*.tab') , optional:true, emit: tab + when: + task.ext.when == null || task.ext.when + script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def seq_center = params.seq_center ? "--outSAMattrRGline ID:$prefix 'CN:$params.seq_center' 'SM:$prefix'" : "--outSAMattrRGline ID:$prefix 'SM:$prefix'" + def seq_center_tag = seq_center ? "--outSAMattrRGline ID:$prefix 'CN:$seq_center' 'SM:$prefix'" : "--outSAMattrRGline ID:$prefix 'SM:$prefix'" def out_sam_type = (args.contains('--outSAMtype')) ? '' : '--outSAMtype BAM Unsorted' def mv_unsorted_bam = (args.contains('--outSAMtype BAM Unsorted SortedByCoordinate')) ? "mv ${prefix}.Aligned.out.bam ${prefix}.Aligned.unsort.out.bam" : '' """ @@ -38,7 +42,7 @@ process STAR_ALIGN { --runThreadN $task.cpus \\ --outFileNamePrefix $prefix. \\ $out_sam_type \\ - $seq_center \\ + $seq_center_tag \\ $args $mv_unsorted_bam if [ -f ${prefix}.Unmapped.out.mate1 ]; then diff --git a/modules/local/star_genomegenerate.nf b/modules/local/star_genomegenerate.nf index 3cd4ff20..a36af0a8 100644 --- a/modules/local/star_genomegenerate.nf +++ b/modules/local/star_genomegenerate.nf @@ -3,10 +3,10 @@ process STAR_GENOMEGENERATE { label 'process_high' // Note: 2.7X indices incompatible with AWS iGenomes. - conda (params.enable_conda ? "bioconda::star=2.6.1d bioconda::samtools=1.10 conda-forge::gawk=5.1.0" : null) + conda "bioconda::star=2.6.1d bioconda::samtools=1.10 conda-forge::gawk=5.1.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:59cdd445419f14abac76b31dd0d71217994cbcc9-0' : - 'quay.io/biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:59cdd445419f14abac76b31dd0d71217994cbcc9-0' }" + 'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:59cdd445419f14abac76b31dd0d71217994cbcc9-0' }" input: path fasta @@ -16,6 +16,9 @@ process STAR_GENOMEGENERATE { path "star" , emit: index path "versions.yml", emit: versions + when: + task.ext.when == null || task.ext.when + script: def args = (task.ext.args ?: '').tokenize() def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : '' diff --git a/modules/nf-core/bowtie2/align/environment.yml b/modules/nf-core/bowtie2/align/environment.yml new file mode 100644 index 00000000..d2796359 --- /dev/null +++ b/modules/nf-core/bowtie2/align/environment.yml @@ -0,0 +1,9 @@ +name: bowtie2_align +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bowtie2=2.5.2 + - bioconda::samtools=1.18 + - conda-forge::pigz=2.6 diff --git a/modules/nf-core/bowtie2/align/main.nf b/modules/nf-core/bowtie2/align/main.nf new file mode 100644 index 00000000..8c405ee3 --- /dev/null +++ b/modules/nf-core/bowtie2/align/main.nf @@ -0,0 +1,99 @@ +process BOWTIE2_ALIGN { + tag "$meta.id" + label "process_high" + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:f70b31a2db15c023d641c32f433fb02cd04df5a6-0' : + 'biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:f70b31a2db15c023d641c32f433fb02cd04df5a6-0' }" + + input: + tuple val(meta) , path(reads) + tuple val(meta2), path(index) + val save_unaligned + val sort_bam + + output: + tuple val(meta), path("*.{bam,sam}"), emit: aligned + tuple val(meta), path("*.log") , emit: log + tuple val(meta), path("*fastq.gz") , emit: fastq, optional:true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: "" + def args2 = task.ext.args2 ?: "" + def prefix = task.ext.prefix ?: "${meta.id}" + + def unaligned = "" + def reads_args = "" + if (meta.single_end) { + unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : "" + reads_args = "-U ${reads}" + } else { + unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : "" + reads_args = "-1 ${reads[0]} -2 ${reads[1]}" + } + + def samtools_command = sort_bam ? 'sort' : 'view' + def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ + def extension = (args2 ==~ extension_pattern) ? (args2 =~ extension_pattern)[0][2].toLowerCase() : "bam" + + """ + INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/\\.rev.1.bt2\$//"` + [ -z "\$INDEX" ] && INDEX=`find -L ./ -name "*.rev.1.bt2l" | sed "s/\\.rev.1.bt2l\$//"` + [ -z "\$INDEX" ] && echo "Bowtie2 index files not found" 1>&2 && exit 1 + + bowtie2 \\ + -x \$INDEX \\ + $reads_args \\ + --threads $task.cpus \\ + $unaligned \\ + $args \\ + 2> >(tee ${prefix}.bowtie2.log >&2) \\ + | samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.${extension} - + + if [ -f ${prefix}.unmapped.fastq.1.gz ]; then + mv ${prefix}.unmapped.fastq.1.gz ${prefix}.unmapped_1.fastq.gz + fi + + if [ -f ${prefix}.unmapped.fastq.2.gz ]; then + mv ${prefix}.unmapped.fastq.2.gz ${prefix}.unmapped_2.fastq.gz + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + + stub: + def args2 = task.ext.args2 ?: "" + def prefix = task.ext.prefix ?: "${meta.id}" + def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ + def extension = (args2 ==~ extension_pattern) ? (args2 =~ extension_pattern)[0][2].toLowerCase() : "bam" + def create_unmapped = "" + if (meta.single_end) { + create_unmapped = save_unaligned ? "touch ${prefix}.unmapped.fastq.gz" : "" + } else { + create_unmapped = save_unaligned ? "touch ${prefix}.unmapped_1.fastq.gz && touch ${prefix}.unmapped_2.fastq.gz" : "" + } + + """ + touch ${prefix}.${extension} + touch ${prefix}.bowtie2.log + ${create_unmapped} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + +} diff --git a/modules/nf-core/modules/bowtie2/align/meta.yml b/modules/nf-core/bowtie2/align/meta.yml similarity index 84% rename from modules/nf-core/modules/bowtie2/align/meta.yml rename to modules/nf-core/bowtie2/align/meta.yml index 42ba0f96..e66811d0 100644 --- a/modules/nf-core/modules/bowtie2/align/meta.yml +++ b/modules/nf-core/bowtie2/align/meta.yml @@ -27,6 +27,11 @@ input: description: | List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively. + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] - index: type: file description: Bowtie2 genome index files @@ -41,10 +46,10 @@ input: description: use samtools sort (true) or samtools view (false) pattern: "true or false" output: - - bam: + - aligned: type: file - description: Output BAM file containing read alignments - pattern: "*.{bam}" + description: Output BAM/SAM file containing read alignments + pattern: "*.{bam,sam}" - versions: type: file description: File containing software versions @@ -60,3 +65,6 @@ output: authors: - "@joseespinosa" - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/bowtie2/align/tests/large_index.config b/modules/nf-core/bowtie2/align/tests/large_index.config new file mode 100644 index 00000000..fdc1c59d --- /dev/null +++ b/modules/nf-core/bowtie2/align/tests/large_index.config @@ -0,0 +1,5 @@ +process { + withName: BOWTIE2_BUILD { + ext.args = '--large-index' + } +} \ No newline at end of file diff --git a/modules/nf-core/bowtie2/align/tests/main.nf.test b/modules/nf-core/bowtie2/align/tests/main.nf.test new file mode 100644 index 00000000..a478d17b --- /dev/null +++ b/modules/nf-core/bowtie2/align/tests/main.nf.test @@ -0,0 +1,561 @@ +nextflow_process { + + name "Test Process BOWTIE2_ALIGN" + script "../main.nf" + process "BOWTIE2_ALIGN" + tag "modules" + tag "modules_nfcore" + tag "bowtie2" + tag "bowtie2/align" + + test("sarscov2 - fastq, index, false, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = false //save_unaligned + input[3] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.aligned[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, false, false - sam") { + + config "./sam.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = false //save_unaligned + input[3] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.aligned[0][1]).readLines()[0..4], + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, false, false - sam2") { + + config "./sam2.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = false //save_unaligned + input[3] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.aligned[0][1]).readLines()[0..4], + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, false, true - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = false //save_unaligned + input[3] = true //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.aligned[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, false, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = false //save_unaligned + input[3] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.aligned[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, false, true - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = false //save_unaligned + input[3] = true //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.aligned[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, large_index, false, false - bam") { + + config "./large_index.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = false //save_unaligned + input[3] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.aligned[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], large_index, false, false - bam") { + + config "./large_index.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = false //save_unaligned + input[3] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.aligned[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, true, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = true //save_unaligned + input[3] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.aligned[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, true, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = true //save_unaligned + input[3] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.aligned[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, false, false - stub") { + + options "-stub" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = false //save_unaligned + input[3] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.aligned[0][1]).name, + file(process.out.log[0][1]).name, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, true, false - stub") { + + options "-stub" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = true //save_unaligned + input[3] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.aligned[0][1]).name, + file(process.out.log[0][1]).name, + file(process.out.fastq[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bowtie2/align/tests/main.nf.test.snap b/modules/nf-core/bowtie2/align/tests/main.nf.test.snap new file mode 100644 index 00000000..883dc7ec --- /dev/null +++ b/modules/nf-core/bowtie2/align/tests/main.nf.test.snap @@ -0,0 +1,263 @@ +{ + "sarscov2 - fastq, index, false, false - sam2": { + "content": [ + [ + "ERR5069949.2151832\t16\tMT192765.1\t17453\t42\t150M\t*\t0\t0\tACGCACATTGCTAACTAAGGGCACACTAGAACCAGAATATTTCAATTCAGTGTGTAGACTTATGAAAACTATAGGTCCAGACATGTTCCTCGGAACTTGTCGGCGTTGTCCTGCTGAAATTGTTGACACTGTGAGTGCTTTGGTTTATGA\tAAAA&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') END_VERSIONS """ + + stub: + """ + mkdir bowtie2 + touch bowtie2/${fasta.baseName}.{1..4}.bt2 + touch bowtie2/${fasta.baseName}.rev.{1,2}.bt2 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/bowtie2/build/meta.yml b/modules/nf-core/bowtie2/build/meta.yml similarity index 74% rename from modules/nf-core/modules/bowtie2/build/meta.yml rename to modules/nf-core/bowtie2/build/meta.yml index 2da9a217..0240224d 100644 --- a/modules/nf-core/modules/bowtie2/build/meta.yml +++ b/modules/nf-core/bowtie2/build/meta.yml @@ -16,10 +16,20 @@ tools: doi: 10.1038/nmeth.1923 licence: ["GPL-3.0-or-later"] input: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] - fasta: type: file description: Input genome fasta file output: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] - index: type: file description: Bowtie2 genome index files diff --git a/modules/nf-core/modules/bwa/index/main.nf b/modules/nf-core/bwa/index/main.nf similarity index 53% rename from modules/nf-core/modules/bwa/index/main.nf rename to modules/nf-core/bwa/index/main.nf index 3affbf16..8d2e56d9 100644 --- a/modules/nf-core/modules/bwa/index/main.nf +++ b/modules/nf-core/bwa/index/main.nf @@ -1,18 +1,18 @@ process BWA_INDEX { tag "$fasta" - label 'process_high' + label 'process_single' - conda (params.enable_conda ? "bioconda::bwa=0.7.17" : null) + conda "bioconda::bwa=0.7.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bwa:0.7.17--hed695b0_7' : - 'quay.io/biocontainers/bwa:0.7.17--hed695b0_7' }" + 'biocontainers/bwa:0.7.17--hed695b0_7' }" input: - path fasta + tuple val(meta), path(fasta) output: - path "bwa" , emit: index - path "versions.yml", emit: versions + tuple val(meta), path(bwa) , emit: index + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -32,4 +32,20 @@ process BWA_INDEX { bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') END_VERSIONS """ + + stub: + """ + mkdir bwa + + touch bwa/genome.amb + touch bwa/genome.ann + touch bwa/genome.bwt + touch bwa/genome.pac + touch bwa/genome.sa + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/bwa/index/meta.yml b/modules/nf-core/bwa/index/meta.yml similarity index 73% rename from modules/nf-core/modules/bwa/index/meta.yml rename to modules/nf-core/bwa/index/meta.yml index 2bbd81d9..2c6cfcd7 100644 --- a/modules/nf-core/modules/bwa/index/meta.yml +++ b/modules/nf-core/bwa/index/meta.yml @@ -15,10 +15,20 @@ tools: arxiv: arXiv:1303.3997 licence: ["GPL-3.0-or-later"] input: + - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] - fasta: type: file description: Input genome fasta file output: + - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] - index: type: file description: BWA genome index files diff --git a/modules/nf-core/bwa/mem/environment.yml b/modules/nf-core/bwa/mem/environment.yml new file mode 100644 index 00000000..c5b2a9ce --- /dev/null +++ b/modules/nf-core/bwa/mem/environment.yml @@ -0,0 +1,9 @@ +name: bwa_mem +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bwa=0.7.17 + # renovate: datasource=conda depName=bioconda/samtools + - samtools=1.18 diff --git a/modules/nf-core/modules/bwa/mem/main.nf b/modules/nf-core/bwa/mem/main.nf similarity index 61% rename from modules/nf-core/modules/bwa/mem/main.nf rename to modules/nf-core/bwa/mem/main.nf index f55af944..a3a82994 100644 --- a/modules/nf-core/modules/bwa/mem/main.nf +++ b/modules/nf-core/bwa/mem/main.nf @@ -2,14 +2,14 @@ process BWA_MEM { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? "bioconda::bwa=0.7.17 bioconda::samtools=1.15.1" : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:8110a70be2bfe7f75a2ea7f2a89cda4cc7732095-0' : - 'quay.io/biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:8110a70be2bfe7f75a2ea7f2a89cda4cc7732095-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:9c0128851101dafef65cef649826d2dbe6bedd7e-0' : + 'biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:9c0128851101dafef65cef649826d2dbe6bedd7e-0' }" input: tuple val(meta), path(reads) - path index + tuple val(meta2), path(index) val sort_bam output: @@ -25,7 +25,7 @@ process BWA_MEM { def prefix = task.ext.prefix ?: "${meta.id}" def samtools_command = sort_bam ? 'sort' : 'view' """ - INDEX=`find -L ./ -name "*.amb" | sed 's/.amb//'` + INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` bwa mem \\ $args \\ @@ -40,4 +40,16 @@ process BWA_MEM { samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/bwa/mem/meta.yml b/modules/nf-core/bwa/mem/meta.yml similarity index 87% rename from modules/nf-core/modules/bwa/mem/meta.yml rename to modules/nf-core/bwa/mem/meta.yml index f84c5227..440fb1f9 100644 --- a/modules/nf-core/modules/bwa/mem/meta.yml +++ b/modules/nf-core/bwa/mem/meta.yml @@ -28,6 +28,11 @@ input: description: | List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively. + - meta2: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] - index: type: file description: BWA genome index files @@ -48,3 +53,6 @@ output: authors: - "@drpatelh" - "@jeremy1805" +maintainers: + - "@drpatelh" + - "@jeremy1805" diff --git a/modules/nf-core/bwa/mem/tests/main.nf.test b/modules/nf-core/bwa/mem/tests/main.nf.test new file mode 100644 index 00000000..b199bb70 --- /dev/null +++ b/modules/nf-core/bwa/mem/tests/main.nf.test @@ -0,0 +1,172 @@ +nextflow_process { + + name "Test Process BWA_MEM" + tag "modules_nfcore" + tag "modules" + tag "bwa" + tag "bwa/mem" + script "../main.nf" + process "BWA_MEM" + + test("Single-End") { + + setup { + run("BWA_INDEX") { + script "../../index/main.nf" + process { + """ + input[0] = [ + [id: 'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BWA_INDEX.out.index + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Single-End Sort") { + + setup { + run("BWA_INDEX") { + script "../../index/main.nf" + process { + """ + input[0] = [ + [id: 'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BWA_INDEX.out.index + input[2] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Paired-End") { + + setup { + run("BWA_INDEX") { + script "../../index/main.nf" + process { + """ + input[0] = [ + [id: 'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BWA_INDEX.out.index + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Paired-End Sort") { + + setup { + run("BWA_INDEX") { + script "../../index/main.nf" + process { + """ + input[0] = [ + [id: 'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BWA_INDEX.out.index + input[2] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } +} diff --git a/modules/nf-core/bwa/mem/tests/main.nf.test.snap b/modules/nf-core/bwa/mem/tests/main.nf.test.snap new file mode 100644 index 00000000..bfb55fc7 --- /dev/null +++ b/modules/nf-core/bwa/mem/tests/main.nf.test.snap @@ -0,0 +1,126 @@ +{ + "Single-End": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,df203d7c7e8fef351408a909570c7952" + ] + ], + "1": [ + "versions.yml:md5,a18ac8ef8cfcc7b2cc262c49d4c064f9" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,df203d7c7e8fef351408a909570c7952" + ] + ], + "versions": [ + "versions.yml:md5,a18ac8ef8cfcc7b2cc262c49d4c064f9" + ] + } + ], + "timestamp": "2023-12-04T11:01:22.483594641" + }, + "Single-End Sort": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,8a52bd78fdcecb994c1f63897d5b431c" + ] + ], + "1": [ + "versions.yml:md5,a18ac8ef8cfcc7b2cc262c49d4c064f9" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,8a52bd78fdcecb994c1f63897d5b431c" + ] + ], + "versions": [ + "versions.yml:md5,a18ac8ef8cfcc7b2cc262c49d4c064f9" + ] + } + ], + "timestamp": "2023-12-04T11:01:30.180783483" + }, + "Paired-End": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,9815aef9ec763a60c53c3879be2d73ae" + ] + ], + "1": [ + "versions.yml:md5,a18ac8ef8cfcc7b2cc262c49d4c064f9" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,9815aef9ec763a60c53c3879be2d73ae" + ] + ], + "versions": [ + "versions.yml:md5,a18ac8ef8cfcc7b2cc262c49d4c064f9" + ] + } + ], + "timestamp": "2023-12-04T11:01:38.761983007" + }, + "Paired-End Sort": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,0f0cda73704c4f7ba08af482edcbbe88" + ] + ], + "1": [ + "versions.yml:md5,a18ac8ef8cfcc7b2cc262c49d4c064f9" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,0f0cda73704c4f7ba08af482edcbbe88" + ] + ], + "versions": [ + "versions.yml:md5,a18ac8ef8cfcc7b2cc262c49d4c064f9" + ] + } + ], + "timestamp": "2023-12-04T11:01:46.284587802" + } +} \ No newline at end of file diff --git a/modules/nf-core/bwa/mem/tests/tags.yml b/modules/nf-core/bwa/mem/tests/tags.yml new file mode 100644 index 00000000..82992d1f --- /dev/null +++ b/modules/nf-core/bwa/mem/tests/tags.yml @@ -0,0 +1,3 @@ +bwa/mem: + - modules/nf-core/bwa/index/** + - modules/nf-core/bwa/mem/** diff --git a/modules/nf-core/chromap/chromap/environment.yml b/modules/nf-core/chromap/chromap/environment.yml new file mode 100644 index 00000000..a630f9c5 --- /dev/null +++ b/modules/nf-core/chromap/chromap/environment.yml @@ -0,0 +1,8 @@ +name: chromap_chromap +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::chromap=0.2.4 + - bioconda::samtools=1.16.1 diff --git a/modules/nf-core/modules/chromap/chromap/main.nf b/modules/nf-core/chromap/chromap/main.nf similarity index 89% rename from modules/nf-core/modules/chromap/chromap/main.nf rename to modules/nf-core/chromap/chromap/main.nf index 137f0340..2cc09a43 100644 --- a/modules/nf-core/modules/chromap/chromap/main.nf +++ b/modules/nf-core/chromap/chromap/main.nf @@ -2,15 +2,15 @@ process CHROMAP_CHROMAP { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::chromap=0.2.1 bioconda::samtools=1.15.1" : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-1f09f39f20b1c4ee36581dc81cc323c70e661633:963e4fe6a85c548a4018585660aed79780a175d3-0' : - 'quay.io/biocontainers/mulled-v2-1f09f39f20b1c4ee36581dc81cc323c70e661633:963e4fe6a85c548a4018585660aed79780a175d3-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-1f09f39f20b1c4ee36581dc81cc323c70e661633:5b2e433ab8b3d1ef098fc944b567fd98caa23f56-0' : + 'biocontainers/mulled-v2-1f09f39f20b1c4ee36581dc81cc323c70e661633:5b2e433ab8b3d1ef098fc944b567fd98caa23f56-0' }" input: tuple val(meta), path(reads) - path fasta - path index + tuple val(meta2), path(fasta) + tuple val(meta3), path(index) path barcodes path whitelist path chr_order diff --git a/modules/nf-core/modules/chromap/chromap/meta.yml b/modules/nf-core/chromap/chromap/meta.yml similarity index 85% rename from modules/nf-core/modules/chromap/chromap/meta.yml rename to modules/nf-core/chromap/chromap/meta.yml index a86fddc9..140299ed 100644 --- a/modules/nf-core/modules/chromap/chromap/meta.yml +++ b/modules/nf-core/chromap/chromap/meta.yml @@ -20,7 +20,6 @@ tools: homepage: https://github.com/haowenz/chromap documentation: https://github.com/haowenz/chromap tool_dev_url: https://github.com/haowenz/chromap - doi: "" licence: ["GPL v3"] input: - meta: @@ -33,10 +32,20 @@ input: description: | List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively. + - meta2: + type: map + description: | + Groovy Map containing information for the fasta + e.g. [ id:'test' ] - fasta: type: file description: | The fasta reference file. + - meta3: + type: map + description: | + Groovy Map containing information for the index + e.g. [ id:'test' ] - index: type: file description: | @@ -83,6 +92,9 @@ output: type: file description: pairs file pattern: "*.pairs.gz" - authors: - "@mahesh-panchal" + - "@joseespinosa" +maintainers: + - "@mahesh-panchal" + - "@joseespinosa" diff --git a/modules/nf-core/modules/chromap/index/main.nf b/modules/nf-core/chromap/index/main.nf similarity index 73% rename from modules/nf-core/modules/chromap/index/main.nf rename to modules/nf-core/chromap/index/main.nf index ee370695..fb3773f8 100644 --- a/modules/nf-core/modules/chromap/index/main.nf +++ b/modules/nf-core/chromap/index/main.nf @@ -2,17 +2,17 @@ process CHROMAP_INDEX { tag "$fasta" label 'process_medium' - conda (params.enable_conda ? "bioconda::chromap=0.2.1" : null) + conda "bioconda::chromap=0.2.4" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/chromap:0.2.1--hd03093a_0' : - 'quay.io/biocontainers/chromap:0.2.1--hd03093a_0' }" + 'https://depot.galaxyproject.org/singularity/chromap:0.2.4--hd03093a_0' : + 'biocontainers/chromap:0.2.4--hd03093a_0' }" input: - path fasta + tuple val(meta), path(fasta) output: - path "*.index" , emit: index - path "versions.yml", emit: versions + tuple val(meta), path ("*.index"), emit: index + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/modules/chromap/index/meta.yml b/modules/nf-core/chromap/index/meta.yml similarity index 73% rename from modules/nf-core/modules/chromap/index/meta.yml rename to modules/nf-core/chromap/index/meta.yml index 6659221f..39c5459b 100644 --- a/modules/nf-core/modules/chromap/index/meta.yml +++ b/modules/nf-core/chromap/index/meta.yml @@ -11,10 +11,15 @@ tools: homepage: https://github.com/haowenz/chromap documentation: https://github.com/haowenz/chromap tool_dev_url: https://github.com/haowenz/chromap - doi: "" + licence: ["GPL v3"] input: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] - fasta: type: file description: Fasta reference file. @@ -24,6 +29,11 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] - index: type: file description: Index file of the reference genome @@ -31,3 +41,4 @@ output: authors: - "@mahesh-panchal" + - "@joseespinosa" diff --git a/modules/nf-core/custom/getchromsizes/main.nf b/modules/nf-core/custom/getchromsizes/main.nf new file mode 100644 index 00000000..060a2e88 --- /dev/null +++ b/modules/nf-core/custom/getchromsizes/main.nf @@ -0,0 +1,44 @@ +process CUSTOM_GETCHROMSIZES { + tag "$fasta" + label 'process_single' + + conda "bioconda::samtools=1.16.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : + 'biocontainers/samtools:1.16.1--h6899075_1' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path ("*.sizes"), emit: sizes + tuple val(meta), path ("*.fai") , emit: fai + tuple val(meta), path ("*.gzi") , emit: gzi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools faidx $fasta + cut -f 1,2 ${fasta}.fai > ${fasta}.sizes + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + getchromsizes: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${fasta}.fai + touch ${fasta}.sizes + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + getchromsizes: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/custom/getchromsizes/meta.yml b/modules/nf-core/custom/getchromsizes/meta.yml similarity index 62% rename from modules/nf-core/modules/custom/getchromsizes/meta.yml rename to modules/nf-core/custom/getchromsizes/meta.yml index ee6c2571..219ca1d8 100644 --- a/modules/nf-core/modules/custom/getchromsizes/meta.yml +++ b/modules/nf-core/custom/getchromsizes/meta.yml @@ -14,12 +14,22 @@ tools: licence: ["MIT"] input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] - fasta: type: file description: FASTA file - pattern: "*.{fasta}" + pattern: "*.{fa,fasta,fna,fas}" output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] - sizes: type: file description: File containing chromosome lengths @@ -28,11 +38,16 @@ output: type: file description: FASTA index file pattern: "*.{fai}" + - gzi: + type: file + description: Optional gzip index file for compressed inputs + pattern: "*.gzi" - versions: type: file - description: File containing software version + description: File containing software versions pattern: "versions.yml" authors: - "@tamara-hodgetts" - "@chris-cheshire" + - "@muffato" diff --git a/modules/nf-core/modules/deeptools/computematrix/main.nf b/modules/nf-core/deeptools/computematrix/main.nf similarity index 89% rename from modules/nf-core/modules/deeptools/computematrix/main.nf rename to modules/nf-core/deeptools/computematrix/main.nf index 96dfef3c..e77e2839 100644 --- a/modules/nf-core/modules/deeptools/computematrix/main.nf +++ b/modules/nf-core/deeptools/computematrix/main.nf @@ -2,10 +2,10 @@ process DEEPTOOLS_COMPUTEMATRIX { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? 'bioconda::deeptools=3.5.1' : null) + conda "bioconda::deeptools=3.5.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/deeptools:3.5.1--py_0' : - 'quay.io/biocontainers/deeptools:3.5.1--py_0' }" + 'biocontainers/deeptools:3.5.1--py_0' }" input: tuple val(meta), path(bigwig) diff --git a/modules/nf-core/modules/deeptools/computematrix/meta.yml b/modules/nf-core/deeptools/computematrix/meta.yml similarity index 98% rename from modules/nf-core/modules/deeptools/computematrix/meta.yml rename to modules/nf-core/deeptools/computematrix/meta.yml index eaa990dd..8a148f5d 100644 --- a/modules/nf-core/modules/deeptools/computematrix/meta.yml +++ b/modules/nf-core/deeptools/computematrix/meta.yml @@ -8,7 +8,6 @@ keywords: tools: - deeptools: description: A set of user-friendly tools for normalization and visualization of deep-sequencing data - homepage: documentation: https://deeptools.readthedocs.io/en/develop/index.html tool_dev_url: https://github.com/deeptools/deepTools doi: "10.1093/nar/gku365" diff --git a/modules/nf-core/modules/deeptools/plotfingerprint/main.nf b/modules/nf-core/deeptools/plotfingerprint/main.nf similarity index 91% rename from modules/nf-core/modules/deeptools/plotfingerprint/main.nf rename to modules/nf-core/deeptools/plotfingerprint/main.nf index 83613be7..3159e941 100644 --- a/modules/nf-core/modules/deeptools/plotfingerprint/main.nf +++ b/modules/nf-core/deeptools/plotfingerprint/main.nf @@ -2,10 +2,10 @@ process DEEPTOOLS_PLOTFINGERPRINT { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? 'bioconda::deeptools=3.5.1' : null) + conda "bioconda::deeptools=3.5.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/deeptools:3.5.1--py_0' : - 'quay.io/biocontainers/deeptools:3.5.1--py_0' }" + 'biocontainers/deeptools:3.5.1--py_0' }" input: tuple val(meta), path(bams), path(bais) diff --git a/modules/nf-core/modules/deeptools/plotfingerprint/meta.yml b/modules/nf-core/deeptools/plotfingerprint/meta.yml similarity index 98% rename from modules/nf-core/modules/deeptools/plotfingerprint/meta.yml rename to modules/nf-core/deeptools/plotfingerprint/meta.yml index 07c25748..29ddf893 100644 --- a/modules/nf-core/modules/deeptools/plotfingerprint/meta.yml +++ b/modules/nf-core/deeptools/plotfingerprint/meta.yml @@ -8,7 +8,6 @@ keywords: tools: - deeptools: description: A set of user-friendly tools for normalization and visualization of deep-sequencing data - homepage: documentation: https://deeptools.readthedocs.io/en/develop/index.html tool_dev_url: https://github.com/deeptools/deepTools doi: "10.1093/nar/gku365" diff --git a/modules/nf-core/modules/deeptools/plotheatmap/main.nf b/modules/nf-core/deeptools/plotheatmap/main.nf similarity index 88% rename from modules/nf-core/modules/deeptools/plotheatmap/main.nf rename to modules/nf-core/deeptools/plotheatmap/main.nf index 1e402e39..58309eea 100644 --- a/modules/nf-core/modules/deeptools/plotheatmap/main.nf +++ b/modules/nf-core/deeptools/plotheatmap/main.nf @@ -2,10 +2,10 @@ process DEEPTOOLS_PLOTHEATMAP { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? 'bioconda::deeptools=3.5.1' : null) + conda "bioconda::deeptools=3.5.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/deeptools:3.5.1--py_0' : - 'quay.io/biocontainers/deeptools:3.5.1--py_0' }" + 'biocontainers/deeptools:3.5.1--py_0' }" input: tuple val(meta), path(matrix) diff --git a/modules/nf-core/modules/deeptools/plotheatmap/meta.yml b/modules/nf-core/deeptools/plotheatmap/meta.yml similarity index 98% rename from modules/nf-core/modules/deeptools/plotheatmap/meta.yml rename to modules/nf-core/deeptools/plotheatmap/meta.yml index ea206fb6..9296f503 100644 --- a/modules/nf-core/modules/deeptools/plotheatmap/meta.yml +++ b/modules/nf-core/deeptools/plotheatmap/meta.yml @@ -8,7 +8,6 @@ keywords: tools: - deeptools: description: A set of user-friendly tools for normalization and visualization of deep-sequencing data - homepage: documentation: https://deeptools.readthedocs.io/en/develop/index.html tool_dev_url: https://github.com/deeptools/deepTools doi: "10.1093/nar/gku365" diff --git a/modules/nf-core/modules/deeptools/plotprofile/main.nf b/modules/nf-core/deeptools/plotprofile/main.nf similarity index 88% rename from modules/nf-core/modules/deeptools/plotprofile/main.nf rename to modules/nf-core/deeptools/plotprofile/main.nf index d83a9493..b1ab454d 100644 --- a/modules/nf-core/modules/deeptools/plotprofile/main.nf +++ b/modules/nf-core/deeptools/plotprofile/main.nf @@ -2,10 +2,10 @@ process DEEPTOOLS_PLOTPROFILE { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? 'bioconda::deeptools=3.5.1' : null) + conda "bioconda::deeptools=3.5.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/deeptools:3.5.1--py_0' : - 'quay.io/biocontainers/deeptools:3.5.1--py_0' }" + 'biocontainers/deeptools:3.5.1--py_0' }" input: tuple val(meta), path(matrix) diff --git a/modules/nf-core/modules/deeptools/plotprofile/meta.yml b/modules/nf-core/deeptools/plotprofile/meta.yml similarity index 98% rename from modules/nf-core/modules/deeptools/plotprofile/meta.yml rename to modules/nf-core/deeptools/plotprofile/meta.yml index 795fda44..44612ea9 100644 --- a/modules/nf-core/modules/deeptools/plotprofile/meta.yml +++ b/modules/nf-core/deeptools/plotprofile/meta.yml @@ -8,7 +8,6 @@ keywords: tools: - deeptools: description: A set of user-friendly tools for normalization and visualization of deep-sequencing data - homepage: documentation: https://deeptools.readthedocs.io/en/develop/index.html tool_dev_url: https://github.com/deeptools/deepTools doi: "10.1093/nar/gku365" diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml new file mode 100644 index 00000000..1787b38a --- /dev/null +++ b/modules/nf-core/fastqc/environment.yml @@ -0,0 +1,7 @@ +name: fastqc +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::fastqc=0.12.1 diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf new file mode 100644 index 00000000..d79f1c86 --- /dev/null +++ b/modules/nf-core/fastqc/main.nf @@ -0,0 +1,61 @@ +process FASTQC { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' : + 'biocontainers/fastqc:0.12.1--hdfd78af_0' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.html"), emit: html + tuple val(meta), path("*.zip") , emit: zip + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // Make list of old name and new name pairs to use for renaming in the bash while loop + def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } + def rename_to = old_new_pairs*.join(' ').join(' ') + def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ') + + def memory_in_mb = MemoryUnit.of("${task.memory}").toUnit('MB') + // FastQC memory value allowed range (100 - 10000) + def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb) + + """ + printf "%s %s\\n" $rename_to | while read old_name new_name; do + [ -f "\${new_name}" ] || ln -s \$old_name \$new_name + done + + fastqc \\ + $args \\ + --threads $task.cpus \\ + --memory $fastqc_memory \\ + $renamed_files + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.html + touch ${prefix}.zip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml similarity index 95% rename from modules/nf-core/modules/fastqc/meta.yml rename to modules/nf-core/fastqc/meta.yml index 4da5bb5a..ee5507e0 100644 --- a/modules/nf-core/modules/fastqc/meta.yml +++ b/modules/nf-core/fastqc/meta.yml @@ -50,3 +50,8 @@ authors: - "@grst" - "@ewels" - "@FelixKrueger" +maintainers: + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test new file mode 100644 index 00000000..70edae4d --- /dev/null +++ b/modules/nf-core/fastqc/tests/main.nf.test @@ -0,0 +1,212 @@ +nextflow_process { + + name "Test Process FASTQC" + script "../main.nf" + process "FASTQC" + + tag "modules" + tag "modules_nfcore" + tag "fastqc" + + test("sarscov2 single-end [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [ id: 'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. + // looks like this:
Mon 2 Oct 2023
test.gz
+ // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_single") } + ) + } + } + + test("sarscov2 paired-end [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_paired") } + ) + } + } + + test("sarscov2 interleaved [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_interleaved") } + ) + } + } + + test("sarscov2 paired-end [bam]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_bam") } + ) + } + } + + test("sarscov2 multiple [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" }, + { assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" }, + { assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][2]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][3]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_multiple") } + ) + } + } + + test("sarscov2 custom_prefix") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'mysample', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_custom_prefix") } + ) + } + } + + test("sarscov2 single-end [fastq] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id: 'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.html.collect { file(it[1]).getName() } + + process.out.zip.collect { file(it[1]).getName() } + + process.out.versions ).match("fastqc_stub") } + ) + } + } + +} diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap new file mode 100644 index 00000000..86f7c311 --- /dev/null +++ b/modules/nf-core/fastqc/tests/main.nf.test.snap @@ -0,0 +1,88 @@ +{ + "fastqc_versions_interleaved": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:40:07.293713" + }, + "fastqc_stub": { + "content": [ + [ + "test.html", + "test.zip", + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:31:01.425198" + }, + "fastqc_versions_multiple": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:40:55.797907" + }, + "fastqc_versions_bam": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:40:26.795862" + }, + "fastqc_versions_single": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:39:27.043675" + }, + "fastqc_versions_paired": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:39:47.584191" + }, + "fastqc_versions_custom_prefix": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:41:14.576531" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastqc/tests/tags.yml b/modules/nf-core/fastqc/tests/tags.yml new file mode 100644 index 00000000..7834294b --- /dev/null +++ b/modules/nf-core/fastqc/tests/tags.yml @@ -0,0 +1,2 @@ +fastqc: + - modules/nf-core/fastqc/** diff --git a/modules/nf-core/modules/gffread/main.nf b/modules/nf-core/gffread/main.nf similarity index 85% rename from modules/nf-core/modules/gffread/main.nf rename to modules/nf-core/gffread/main.nf index 7c575c97..f4472b0e 100644 --- a/modules/nf-core/modules/gffread/main.nf +++ b/modules/nf-core/gffread/main.nf @@ -2,10 +2,10 @@ process GFFREAD { tag "$gff" label 'process_low' - conda (params.enable_conda ? "bioconda::gffread=0.12.1" : null) + conda "bioconda::gffread=0.12.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gffread:0.12.1--h8b12597_0' : - 'quay.io/biocontainers/gffread:0.12.1--h8b12597_0' }" + 'biocontainers/gffread:0.12.1--h8b12597_0' }" input: path gff diff --git a/modules/nf-core/modules/gffread/meta.yml b/modules/nf-core/gffread/meta.yml similarity index 100% rename from modules/nf-core/modules/gffread/meta.yml rename to modules/nf-core/gffread/meta.yml diff --git a/modules/nf-core/modules/gunzip/main.nf b/modules/nf-core/gunzip/main.nf similarity index 90% rename from modules/nf-core/modules/gunzip/main.nf rename to modules/nf-core/gunzip/main.nf index 70367049..e7189d2f 100644 --- a/modules/nf-core/modules/gunzip/main.nf +++ b/modules/nf-core/gunzip/main.nf @@ -1,11 +1,11 @@ process GUNZIP { tag "$archive" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "conda-forge::sed=4.7" : null) + conda "conda-forge::sed=4.7" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'ubuntu:20.04' }" + 'nf-core/ubuntu:20.04' }" input: tuple val(meta), path(archive) diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml new file mode 100644 index 00000000..4cdcdf4c --- /dev/null +++ b/modules/nf-core/gunzip/meta.yml @@ -0,0 +1,35 @@ +name: gunzip +description: Compresses and decompresses files. +keywords: + - gunzip + - compression + - decompression +tools: + - gunzip: + description: | + gzip is a file format and a software application used for file compression and decompression. + documentation: https://www.gnu.org/software/gzip/manual/gzip.html + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Optional groovy Map containing meta information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be compressed/uncompressed + pattern: "*.*" +output: + - gunzip: + type: file + description: Compressed/uncompressed file + pattern: "*.*" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/modules/homer/annotatepeaks/main.nf b/modules/nf-core/homer/annotatepeaks/main.nf similarity index 90% rename from modules/nf-core/modules/homer/annotatepeaks/main.nf rename to modules/nf-core/homer/annotatepeaks/main.nf index 9056a5ab..cfc140f5 100644 --- a/modules/nf-core/modules/homer/annotatepeaks/main.nf +++ b/modules/nf-core/homer/annotatepeaks/main.nf @@ -3,10 +3,10 @@ process HOMER_ANNOTATEPEAKS { label 'process_medium' // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - conda (params.enable_conda ? "bioconda::homer=4.11" : null) + conda "bioconda::homer=4.11" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/homer:4.11--pl526hc9558a2_3' : - 'quay.io/biocontainers/homer:4.11--pl526hc9558a2_3' }" + 'biocontainers/homer:4.11--pl526hc9558a2_3' }" input: tuple val(meta), path(peak) diff --git a/modules/nf-core/modules/homer/annotatepeaks/meta.yml b/modules/nf-core/homer/annotatepeaks/meta.yml similarity index 100% rename from modules/nf-core/modules/homer/annotatepeaks/meta.yml rename to modules/nf-core/homer/annotatepeaks/meta.yml diff --git a/modules/nf-core/modules/khmer/uniquekmers/main.nf b/modules/nf-core/khmer/uniquekmers/main.nf similarity index 87% rename from modules/nf-core/modules/khmer/uniquekmers/main.nf rename to modules/nf-core/khmer/uniquekmers/main.nf index 8ad89a62..9576034f 100644 --- a/modules/nf-core/modules/khmer/uniquekmers/main.nf +++ b/modules/nf-core/khmer/uniquekmers/main.nf @@ -2,10 +2,10 @@ process KHMER_UNIQUEKMERS { tag "$fasta" label 'process_low' - conda (params.enable_conda ? "bioconda::khmer=3.0.0a3" : null) + conda "bioconda::khmer=3.0.0a3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/khmer:3.0.0a3--py37haa7609a_2' : - 'quay.io/biocontainers/khmer:3.0.0a3--py37haa7609a_2' }" + 'biocontainers/khmer:3.0.0a3--py37haa7609a_2' }" input: path fasta diff --git a/modules/nf-core/modules/khmer/uniquekmers/meta.yml b/modules/nf-core/khmer/uniquekmers/meta.yml similarity index 91% rename from modules/nf-core/modules/khmer/uniquekmers/meta.yml rename to modules/nf-core/khmer/uniquekmers/meta.yml index 31405cc1..f9f63972 100644 --- a/modules/nf-core/modules/khmer/uniquekmers/meta.yml +++ b/modules/nf-core/khmer/uniquekmers/meta.yml @@ -1,5 +1,5 @@ name: "khmer_uniquekmers" -description: +description: In-memory nucleotide sequence k-mer counting, filtering, graph traversal and more keywords: - khmer - k-mer diff --git a/modules/nf-core/macs3/callpeak/environment.yml b/modules/nf-core/macs3/callpeak/environment.yml new file mode 100644 index 00000000..d9caa561 --- /dev/null +++ b/modules/nf-core/macs3/callpeak/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "macs3_callpeak" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::macs3=3.0.1" diff --git a/modules/nf-core/modules/macs2/callpeak/main.nf b/modules/nf-core/macs3/callpeak/main.nf similarity index 66% rename from modules/nf-core/modules/macs2/callpeak/main.nf rename to modules/nf-core/macs3/callpeak/main.nf index 9aaf97a9..53a1a200 100644 --- a/modules/nf-core/modules/macs2/callpeak/main.nf +++ b/modules/nf-core/macs3/callpeak/main.nf @@ -1,15 +1,16 @@ -process MACS2_CALLPEAK { + +process MACS3_CALLPEAK { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::macs2=2.2.7.1" : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/macs2:2.2.7.1--py38h4a8c8d9_3' : - 'quay.io/biocontainers/macs2:2.2.7.1--py38h4a8c8d9_3' }" + 'https://depot.galaxyproject.org/singularity/macs3:3.0.1--py311h0152c62_3': + 'biocontainers/macs3:3.0.1--py311h0152c62_3' }" input: tuple val(meta), path(ipbam), path(controlbam) - val macs2_gsize + val macs3_gsize output: tuple val(meta), path("*.{narrowPeak,broadPeak}"), emit: peak @@ -36,10 +37,10 @@ process MACS2_CALLPEAK { args_list.remove(id) } """ - macs2 \\ + macs3 \\ callpeak \\ ${args_list.join(' ')} \\ - --gsize $macs2_gsize \\ + --gsize $macs3_gsize \\ --format $format \\ --name $prefix \\ --treatment $ipbam \\ @@ -47,7 +48,23 @@ process MACS2_CALLPEAK { cat <<-END_VERSIONS > versions.yml "${task.process}": - macs2: \$(macs2 --version | sed -e "s/macs2 //g") + macs3: \$(macs3 --version | sed -e "s/macs3 //g") + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.gappedPeak + touch ${prefix}.bed + touch ${prefix}.bdg + touch ${prefix}.narrowPeak + touch ${prefix}.xls + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + macs3: \$(macs3 --version | sed -e "s/macs3 //g") END_VERSIONS """ } diff --git a/modules/nf-core/modules/macs2/callpeak/meta.yml b/modules/nf-core/macs3/callpeak/meta.yml similarity index 55% rename from modules/nf-core/modules/macs2/callpeak/meta.yml rename to modules/nf-core/macs3/callpeak/meta.yml index 982bc5b2..1603b8e2 100644 --- a/modules/nf-core/modules/macs2/callpeak/meta.yml +++ b/modules/nf-core/macs3/callpeak/meta.yml @@ -1,4 +1,6 @@ -name: macs2_callpeak +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "macs3_callpeak" description: Peak calling of enriched genomic regions of ChIP-seq and ATAC-seq experiments keywords: - alignment @@ -6,32 +8,40 @@ keywords: - chip-seq - peak-calling tools: - - macs2: - description: Model Based Analysis for ChIP-Seq data - homepage: None - documentation: https://docs.csc.fi/apps/macs2/ - tool_dev_url: https://github.com/macs3-project/MACS - doi: "https://doi.org/10.1101/496521" - licence: ["BSD"] + - macs3: + description: "Model Based Analysis for ChIP-Seq data" + homepage: "https://macs3-project.github.io/MACS/" + documentation: "https://macs3-project.github.io/MACS/" + tool_dev_url: "https://github.com/macs3-project/MACS/" + doi: "10.1101/496521" + licence: ["BSD-3-clause"] input: - meta: type: map description: | Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] + e.g. `[ id:'sample_1', single_end:false ]` - ipbam: type: file description: The ChIP-seq treatment file - controlbam: type: file description: The control file - - macs2_gsize: + - macs3_gsize: type: string - description: Effective genome size. It can be 1.0e+9 or 1000000000, or shortcuts:'hs' for human (2.7e9), - 'mm' for mouse (1.87e9), 'ce' for C. elegans (9e7) and 'dm' for fruitfly (1.2e8) + description: | + Effective genome size. It can be 1.0e+9 or 1000000000, + or shortcuts:'hs' for human (2,913,022,398), 'mm' for mouse + (2,652,783,500), 'ce' for C. elegans (100,286,401) + and 'dm' for fruitfly (142,573,017), Default:hs. output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` - versions: type: file description: File containing software version @@ -58,6 +68,6 @@ output: pattern: "*.bdg" authors: - - "@ntoda03" - "@JoseEspinosa" - - "@jianhong" +maintainers: + - "@JoseEspinosa" diff --git a/modules/nf-core/macs3/callpeak/tests/bam.config b/modules/nf-core/macs3/callpeak/tests/bam.config new file mode 100644 index 00000000..217e3107 --- /dev/null +++ b/modules/nf-core/macs3/callpeak/tests/bam.config @@ -0,0 +1,5 @@ +process { + withName: 'MACS3_CALLPEAK' { + ext.args = '--qval 0.1' + } +} \ No newline at end of file diff --git a/modules/nf-core/macs3/callpeak/tests/bed.config b/modules/nf-core/macs3/callpeak/tests/bed.config new file mode 100644 index 00000000..19444006 --- /dev/null +++ b/modules/nf-core/macs3/callpeak/tests/bed.config @@ -0,0 +1,5 @@ +process { + withName: 'MACS3_CALLPEAK' { + ext.args = '--format BED --qval 10 --nomodel --extsize 200' + } +} \ No newline at end of file diff --git a/modules/nf-core/macs3/callpeak/tests/main.nf.test b/modules/nf-core/macs3/callpeak/tests/main.nf.test new file mode 100644 index 00000000..4338c96b --- /dev/null +++ b/modules/nf-core/macs3/callpeak/tests/main.nf.test @@ -0,0 +1,113 @@ +nextflow_process { + + name "Test Process MACS3_CALLPEAK" + script "../main.nf" + process "MACS3_CALLPEAK" + + tag "modules" + tag "modules_nfcore" + tag "macs3" + tag "macs3/callpeak" + + test("homo_sapiens - callpeak - bed") { + + when { + config "./bed.config" + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bed/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.clustered.singletons.merged.aligned_tc.bed', checkIfExists: true) ], + [] + ] + input[1] = 4000 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - callpeak - bam") { + + when { + config "./bam.config" + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.name.sorted.bam', checkIfExists: true) ], + [] + ] + input[1] = 40000 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - callpeak - control - bam") { + + when { + config "./bam.config" + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.name.sorted.bam', checkIfExists: true) ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.name.sorted.bam', checkIfExists: true) ] + ] + input[1] = 40000 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) ], + [] + ] + input[1] = 40000 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/macs3/callpeak/tests/main.nf.test.snap b/modules/nf-core/macs3/callpeak/tests/main.nf.test.snap new file mode 100644 index 00000000..d6d98292 --- /dev/null +++ b/modules/nf-core/macs3/callpeak/tests/main.nf.test.snap @@ -0,0 +1,358 @@ +{ + "homo_sapiens - callpeak - bam": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_peaks.narrowPeak:md5,2e4da1c1704595e12aaf99cc715ad70c" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test_peaks.xls:md5,221852e4639574d2f53cf1917efa4922" + ] + ], + "2": [ + "versions.yml:md5,cb33970f9aaa0730733abe2fd9cb2b74" + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "test_summits.bed:md5,26f0f97b6c14dbca129e947a58067c82" + ] + ], + "5": [ + + ], + "bdg": [ + + ], + "bed": [ + [ + { + "id": "test", + "single_end": false + }, + "test_summits.bed:md5,26f0f97b6c14dbca129e947a58067c82" + ] + ], + "gapped": [ + + ], + "peak": [ + [ + { + "id": "test", + "single_end": false + }, + "test_peaks.narrowPeak:md5,2e4da1c1704595e12aaf99cc715ad70c" + ] + ], + "versions": [ + "versions.yml:md5,cb33970f9aaa0730733abe2fd9cb2b74" + ], + "xls": [ + [ + { + "id": "test", + "single_end": false + }, + "test_peaks.xls:md5,221852e4639574d2f53cf1917efa4922" + ] + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-22T17:04:31.629715" + }, + "sarscov2 - bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.narrowPeak:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.xls:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,cb33970f9aaa0730733abe2fd9cb2b74" + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.gappedPeak:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bdg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bdg": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bdg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bed": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gapped": [ + [ + { + "id": "test", + "single_end": false + }, + "test.gappedPeak:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "peak": [ + [ + { + "id": "test", + "single_end": false + }, + "test.narrowPeak:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,cb33970f9aaa0730733abe2fd9cb2b74" + ], + "xls": [ + [ + { + "id": "test", + "single_end": false + }, + "test.xls:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-22T17:04:58.589844" + }, + "homo_sapiens - callpeak - control - bam": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_peaks.narrowPeak:md5,653e1108cc57ca07d0f60fc0f4fb8ba3" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test_peaks.xls:md5,9fce04613bdc9c8372a9f884aa0d5aa6" + ] + ], + "2": [ + "versions.yml:md5,cb33970f9aaa0730733abe2fd9cb2b74" + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "test_summits.bed:md5,4f3c7c53a1d730d90d1b3dd9d3197af4" + ] + ], + "5": [ + + ], + "bdg": [ + + ], + "bed": [ + [ + { + "id": "test", + "single_end": false + }, + "test_summits.bed:md5,4f3c7c53a1d730d90d1b3dd9d3197af4" + ] + ], + "gapped": [ + + ], + "peak": [ + [ + { + "id": "test", + "single_end": false + }, + "test_peaks.narrowPeak:md5,653e1108cc57ca07d0f60fc0f4fb8ba3" + ] + ], + "versions": [ + "versions.yml:md5,cb33970f9aaa0730733abe2fd9cb2b74" + ], + "xls": [ + [ + { + "id": "test", + "single_end": false + }, + "test_peaks.xls:md5,9fce04613bdc9c8372a9f884aa0d5aa6" + ] + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-22T17:04:44.063426" + }, + "homo_sapiens - callpeak - bed": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_peaks.narrowPeak:md5,10e7d4747f8a2513e5ebb04856a51673" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test_peaks.xls:md5,14efbf7137623df5aaf282b506ac9601" + ] + ], + "2": [ + "versions.yml:md5,cb33970f9aaa0730733abe2fd9cb2b74" + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "test_summits.bed:md5,28833eeb7816688f0d698f51670be946" + ] + ], + "5": [ + + ], + "bdg": [ + + ], + "bed": [ + [ + { + "id": "test", + "single_end": false + }, + "test_summits.bed:md5,28833eeb7816688f0d698f51670be946" + ] + ], + "gapped": [ + + ], + "peak": [ + [ + { + "id": "test", + "single_end": false + }, + "test_peaks.narrowPeak:md5,10e7d4747f8a2513e5ebb04856a51673" + ] + ], + "versions": [ + "versions.yml:md5,cb33970f9aaa0730733abe2fd9cb2b74" + ], + "xls": [ + [ + { + "id": "test", + "single_end": false + }, + "test_peaks.xls:md5,14efbf7137623df5aaf282b506ac9601" + ] + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-22T17:04:16.697163" + } +} \ No newline at end of file diff --git a/modules/nf-core/modules/bowtie2/align/main.nf b/modules/nf-core/modules/bowtie2/align/main.nf deleted file mode 100644 index c74e376f..00000000 --- a/modules/nf-core/modules/bowtie2/align/main.nf +++ /dev/null @@ -1,71 +0,0 @@ -process BOWTIE2_ALIGN { - tag "$meta.id" - label "process_high" - - conda (params.enable_conda ? "bioconda::bowtie2=2.4.4 bioconda::samtools=1.15.1 conda-forge::pigz=2.6" : null) - container "${ workflow.containerEngine == "singularity" && !task.ext.singularity_pull_docker_container ? - "https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:1744f68fe955578c63054b55309e05b41c37a80d-0" : - "quay.io/biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:1744f68fe955578c63054b55309e05b41c37a80d-0" }" - - input: - tuple val(meta), path(reads) - path index - val save_unaligned - val sort_bam - - output: - tuple val(meta), path("*.bam") , emit: bam - tuple val(meta), path("*.log") , emit: log - tuple val(meta), path("*fastq.gz"), emit: fastq, optional:true - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: "" - def args2 = task.ext.args2 ?: "" - def prefix = task.ext.prefix ?: "${meta.id}" - - def unaligned = "" - def reads_args = "" - if (meta.single_end) { - unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : "" - reads_args = "-U ${reads}" - } else { - unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : "" - reads_args = "-1 ${reads[0]} -2 ${reads[1]}" - } - - def samtools_command = sort_bam ? 'sort' : 'view' - - """ - INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/.rev.1.bt2//"` - [ -z "\$INDEX" ] && INDEX=`find -L ./ -name "*.rev.1.bt2l" | sed "s/.rev.1.bt2l//"` - [ -z "\$INDEX" ] && echo "Bowtie2 index files not found" 1>&2 && exit 1 - - bowtie2 \\ - -x \$INDEX \\ - $reads_args \\ - --threads $task.cpus \\ - $unaligned \\ - $args \\ - 2> ${prefix}.bowtie2.log \\ - | samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.bam - - - if [ -f ${prefix}.unmapped.fastq.1.gz ]; then - mv ${prefix}.unmapped.fastq.1.gz ${prefix}.unmapped_1.fastq.gz - fi - - if [ -f ${prefix}.unmapped.fastq.2.gz ]; then - mv ${prefix}.unmapped.fastq.2.gz ${prefix}.unmapped_2.fastq.gz - fi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf b/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf deleted file mode 100644 index 203e485a..00000000 --- a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf +++ /dev/null @@ -1,24 +0,0 @@ -process CUSTOM_DUMPSOFTWAREVERSIONS { - label 'process_low' - - // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda (params.enable_conda ? 'bioconda::multiqc=1.13a' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.13a--pyhdfd78af_1' : - 'quay.io/biocontainers/multiqc:1.13a--pyhdfd78af_1' }" - - input: - path versions - - output: - path "software_versions.yml" , emit: yml - path "software_versions_mqc.yml", emit: mqc_yml - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - template 'dumpsoftwareversions.py' -} diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml deleted file mode 100644 index 60b546a0..00000000 --- a/modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml +++ /dev/null @@ -1,34 +0,0 @@ -name: custom_dumpsoftwareversions -description: Custom module used to dump software versions within the nf-core pipeline template -keywords: - - custom - - version -tools: - - custom: - description: Custom module used to dump software versions within the nf-core pipeline template - homepage: https://github.com/nf-core/tools - documentation: https://github.com/nf-core/tools - licence: ["MIT"] -input: - - versions: - type: file - description: YML file containing software versions - pattern: "*.yml" - -output: - - yml: - type: file - description: Standard YML file containing software versions - pattern: "software_versions.yml" - - mqc_yml: - type: file - description: MultiQC custom content YML file containing software versions - pattern: "software_versions_mqc.yml" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - -authors: - - "@drpatelh" - - "@grst" diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py deleted file mode 100644 index 787bdb7b..00000000 --- a/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ /dev/null @@ -1,91 +0,0 @@ -#!/usr/bin/env python - -import platform -from textwrap import dedent - -import yaml - - -def _make_versions_html(versions): - html = [ - dedent( - """\\ - - - - - - - - - - """ - ) - ] - for process, tmp_versions in sorted(versions.items()): - html.append("") - for i, (tool, version) in enumerate(sorted(tmp_versions.items())): - html.append( - dedent( - f"""\\ - - - - - - """ - ) - ) - html.append("") - html.append("
Process Name Software Version
{process if (i == 0) else ''}{tool}{version}
") - return "\\n".join(html) - - -versions_this_module = {} -versions_this_module["${task.process}"] = { - "python": platform.python_version(), - "yaml": yaml.__version__, -} - -with open("$versions") as f: - versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module - -# aggregate versions by the module name (derived from fully-qualified process name) -versions_by_module = {} -for process, process_versions in versions_by_process.items(): - module = process.split(":")[-1] - try: - if versions_by_module[module] != process_versions: - raise AssertionError( - "We assume that software versions are the same between all modules. " - "If you see this error-message it means you discovered an edge-case " - "and should open an issue in nf-core/tools. " - ) - except KeyError: - versions_by_module[module] = process_versions - -versions_by_module["Workflow"] = { - "Nextflow": "$workflow.nextflow.version", - "$workflow.manifest.name": "$workflow.manifest.version", -} - -versions_mqc = { - "id": "software_versions", - "section_name": "${workflow.manifest.name} Software Versions", - "section_href": "https://github.com/${workflow.manifest.name}", - "plot_type": "html", - "description": "are collected at run time from the software output.", - "data": _make_versions_html(versions_by_module), -} - -with open("software_versions.yml", "w") as f: - yaml.dump(versions_by_module, f, default_flow_style=False) -with open("software_versions_mqc.yml", "w") as f: - yaml.dump(versions_mqc, f, default_flow_style=False) - -with open("versions.yml", "w") as f: - yaml.dump(versions_this_module, f, default_flow_style=False) diff --git a/modules/nf-core/modules/custom/getchromsizes/main.nf b/modules/nf-core/modules/custom/getchromsizes/main.nf deleted file mode 100644 index 0eabf3a4..00000000 --- a/modules/nf-core/modules/custom/getchromsizes/main.nf +++ /dev/null @@ -1,32 +0,0 @@ -process CUSTOM_GETCHROMSIZES { - tag "$fasta" - label 'process_low' - - conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : - 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" - - input: - path fasta - - output: - path '*.sizes' , emit: sizes - path '*.fai' , emit: fai - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - """ - samtools faidx $fasta - cut -f 1,2 ${fasta}.fai > ${fasta}.sizes - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - custom: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/fastqc/main.nf b/modules/nf-core/modules/fastqc/main.nf deleted file mode 100644 index 05730368..00000000 --- a/modules/nf-core/modules/fastqc/main.nf +++ /dev/null @@ -1,59 +0,0 @@ -process FASTQC { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::fastqc=0.11.9" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : - 'quay.io/biocontainers/fastqc:0.11.9--0' }" - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path("*.html"), emit: html - tuple val(meta), path("*.zip") , emit: zip - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - // Add soft-links to original FastQs for consistent naming in pipeline - def prefix = task.ext.prefix ?: "${meta.id}" - if (meta.single_end) { - """ - [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz - fastqc $args --threads $task.cpus ${prefix}.fastq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ - } else { - """ - [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz - [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz - fastqc $args --threads $task.cpus ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ - } - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.html - touch ${prefix}.zip - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/gunzip/meta.yml b/modules/nf-core/modules/gunzip/meta.yml deleted file mode 100644 index 4d2ebc84..00000000 --- a/modules/nf-core/modules/gunzip/meta.yml +++ /dev/null @@ -1,34 +0,0 @@ -name: gunzip -description: Compresses and decompresses files. -keywords: - - gunzip - - compression -tools: - - gunzip: - description: | - gzip is a file format and a software application used for file compression and decompression. - documentation: https://www.gnu.org/software/gzip/manual/gzip.html - licence: ["GPL-3.0-or-later"] -input: - - meta: - type: map - description: | - Optional groovy Map containing meta information - e.g. [ id:'test', single_end:false ] - - archive: - type: file - description: File to be compressed/uncompressed - pattern: "*.*" -output: - - gunzip: - type: file - description: Compressed/uncompressed file - pattern: "*.*" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@joseespinosa" - - "@drpatelh" - - "@jfy133" diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml new file mode 100644 index 00000000..ca39fb67 --- /dev/null +++ b/modules/nf-core/multiqc/environment.yml @@ -0,0 +1,7 @@ +name: multiqc +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.21 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf new file mode 100644 index 00000000..47ac352f --- /dev/null +++ b/modules/nf-core/multiqc/main.nf @@ -0,0 +1,55 @@ +process MULTIQC { + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/multiqc:1.21--pyhdfd78af_0' : + 'biocontainers/multiqc:1.21--pyhdfd78af_0' }" + + input: + path multiqc_files, stageAs: "?/*" + path(multiqc_config) + path(extra_multiqc_config) + path(multiqc_logo) + + output: + path "*multiqc_report.html", emit: report + path "*_data" , emit: data + path "*_plots" , optional:true, emit: plots + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def config = multiqc_config ? "--config $multiqc_config" : '' + def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + def logo = multiqc_logo ? /--cl-config 'custom_logo: "${multiqc_logo}"'/ : '' + """ + multiqc \\ + --force \\ + $args \\ + $config \\ + $extra_config \\ + $logo \\ + . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) + END_VERSIONS + """ + + stub: + """ + mkdir multiqc_data + touch multiqc_plots + touch multiqc_report.html + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml new file mode 100644 index 00000000..45a9bc35 --- /dev/null +++ b/modules/nf-core/multiqc/meta.yml @@ -0,0 +1,58 @@ +name: multiqc +description: Aggregate results from bioinformatics analyses across many samples into a single report +keywords: + - QC + - bioinformatics tools + - Beautiful stand-alone HTML report +tools: + - multiqc: + description: | + MultiQC searches a given directory for analysis logs and compiles a HTML report. + It's a general use tool, perfect for summarising the output from numerous bioinformatics tools. + homepage: https://multiqc.info/ + documentation: https://multiqc.info/docs/ + licence: ["GPL-3.0-or-later"] +input: + - multiqc_files: + type: file + description: | + List of reports / files recognised by MultiQC, for example the html and zip output of FastQC + - multiqc_config: + type: file + description: Optional config yml for MultiQC + pattern: "*.{yml,yaml}" + - extra_multiqc_config: + type: file + description: Second optional config yml for MultiQC. Will override common sections in multiqc_config. + pattern: "*.{yml,yaml}" + - multiqc_logo: + type: file + description: Optional logo file for MultiQC + pattern: "*.{png}" +output: + - report: + type: file + description: MultiQC report file + pattern: "multiqc_report.html" + - data: + type: directory + description: MultiQC data dir + pattern: "multiqc_data" + - plots: + type: file + description: Plots created by MultiQC + pattern: "*_data" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" +maintainers: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test new file mode 100644 index 00000000..f1c4242e --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -0,0 +1,84 @@ +nextflow_process { + + name "Test Process MULTIQC" + script "../main.nf" + process "MULTIQC" + + tag "modules" + tag "modules_nfcore" + tag "multiqc" + + test("sarscov2 single-end [fastqc]") { + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_single") } + ) + } + + } + + test("sarscov2 single-end [fastqc] [config]") { + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_config") } + ) + } + } + + test("sarscov2 single-end [fastqc] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.report.collect { file(it).getName() } + + process.out.data.collect { file(it).getName() } + + process.out.plots.collect { file(it).getName() } + + process.out.versions ).match("multiqc_stub") } + ) + } + + } +} diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap new file mode 100644 index 00000000..bfebd802 --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -0,0 +1,41 @@ +{ + "multiqc_versions_single": { + "content": [ + [ + "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T08:48:55.657331" + }, + "multiqc_stub": { + "content": [ + [ + "multiqc_report.html", + "multiqc_data", + "multiqc_plots", + "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T08:49:49.071937" + }, + "multiqc_versions_config": { + "content": [ + [ + "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T08:49:25.457567" + } +} \ No newline at end of file diff --git a/modules/nf-core/multiqc/tests/tags.yml b/modules/nf-core/multiqc/tests/tags.yml new file mode 100644 index 00000000..bea6c0d3 --- /dev/null +++ b/modules/nf-core/multiqc/tests/tags.yml @@ -0,0 +1,2 @@ +multiqc: + - modules/nf-core/multiqc/** diff --git a/modules/nf-core/phantompeakqualtools/environment.yml b/modules/nf-core/phantompeakqualtools/environment.yml new file mode 100644 index 00000000..095b7b4b --- /dev/null +++ b/modules/nf-core/phantompeakqualtools/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "phantompeakqualtools" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::phantompeakqualtools=1.2.2" diff --git a/modules/nf-core/modules/phantompeakqualtools/main.nf b/modules/nf-core/phantompeakqualtools/main.nf similarity index 65% rename from modules/nf-core/modules/phantompeakqualtools/main.nf rename to modules/nf-core/phantompeakqualtools/main.nf index 43fca683..f0f7dc63 100644 --- a/modules/nf-core/modules/phantompeakqualtools/main.nf +++ b/modules/nf-core/phantompeakqualtools/main.nf @@ -3,10 +3,10 @@ process PHANTOMPEAKQUALTOOLS { label 'process_medium' // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - conda (params.enable_conda ? "bioconda::phantompeakqualtools=1.2.2" : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/phantompeakqualtools:1.2.2--0' : - 'quay.io/biocontainers/phantompeakqualtools:1.2.2--0' }" + 'oras://community.wave.seqera.io/library/phantompeakqualtools:1.2.2--50be7727b2a72700' : + 'community.wave.seqera.io/library/phantompeakqualtools:1.2.2--f8026fe2526a5e18' }" input: tuple val(meta), path(bam) @@ -34,4 +34,18 @@ process PHANTOMPEAKQUALTOOLS { phantompeakqualtools: $VERSION END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '1.2.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + touch ${prefix}.spp.pdf + touch ${prefix}.spp.Rdata + touch ${prefix}.spp.out + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + phantompeakqualtools: $VERSION + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/phantompeakqualtools/meta.yml b/modules/nf-core/phantompeakqualtools/meta.yml similarity index 77% rename from modules/nf-core/modules/phantompeakqualtools/meta.yml rename to modules/nf-core/phantompeakqualtools/meta.yml index 6488500d..276f6fdd 100644 --- a/modules/nf-core/modules/phantompeakqualtools/meta.yml +++ b/modules/nf-core/phantompeakqualtools/meta.yml @@ -1,6 +1,9 @@ name: "phantompeakqualtools" - -description: +description: | + "This package computes informative enrichment and quality measures + for ChIP-seq/DNase-seq/FAIRE-seq/MNase-seq data. It can also be used + to obtain robust estimates of the predominant fragment length or + characteristic tag shift values in these assays." keywords: - "ChIP-Seq" - "QC" @@ -12,12 +15,10 @@ tools: for ChIP-seq/DNase-seq/FAIRE-seq/MNase-seq data. It can also be used to obtain robust estimates of the predominant fragment length or characteristic tag shift values in these assays." - homepage: "None" documentation: "https://github.com/kundajelab/phantompeakqualtools" tool_dev_url: "https://github.com/kundajelab/phantompeakqualtools" - doi: "https://doi.org/10.1101/gr.136184.111" - licence: "['BSD-3-clause']" - + doi: "10.1101/gr.136184.111" + licence: ["BSD-3-clause"] input: - meta: type: map @@ -28,7 +29,6 @@ input: type: file description: BAM/CRAM/SAM file pattern: "*.{bam,cram,sam}" - output: - meta: type: map @@ -53,8 +53,11 @@ output: type: file description: Rdata file containing the R session pattern: "*.{Rdata}" - authors: - "@drpatelh" - - "@Emiller88" + - "@edmundmiller" + - "@JoseEspinosa" +maintainers: + - "@drpatelh" + - "@edmundmiller" - "@JoseEspinosa" diff --git a/modules/nf-core/phantompeakqualtools/tests/main.nf.test b/modules/nf-core/phantompeakqualtools/tests/main.nf.test new file mode 100644 index 00000000..ea096bcf --- /dev/null +++ b/modules/nf-core/phantompeakqualtools/tests/main.nf.test @@ -0,0 +1,90 @@ +// nf-core modules test phantompeakqualtools +nextflow_process { + + name "Test Process PHANTOMPEAKQUALTOOLS" + script "../main.nf" + process "PHANTOMPEAKQUALTOOLS" + + tag "modules" + tag "modules_nfcore" + tag "phantompeakqualtools" + + test("sarscov2 - bam - single_end") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.bam', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.spp, + file(process.out.pdf.get(0).get(1)).name, + file(process.out.rdata.get(0).get(1)).name, + process.out.versions) + .match() + } + ) + } + + } + + test("sarscov2 - bam - paired_end") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.spp, + file(process.out.pdf.get(0).get(1)).name, + file(process.out.rdata.get(0).get(1)).name, + process.out.versions) + .match() + } + ) + } + + } + + test("sarscov2 - bam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/phantompeakqualtools/tests/main.nf.test.snap b/modules/nf-core/phantompeakqualtools/tests/main.nf.test.snap new file mode 100644 index 00000000..1021aeb8 --- /dev/null +++ b/modules/nf-core/phantompeakqualtools/tests/main.nf.test.snap @@ -0,0 +1,119 @@ +{ + "sarscov2 - bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.spp.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.spp.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.spp.Rdata:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,e0a48a40af2cf7d5de72c4c3cb47a4fc" + ], + "pdf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.spp.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "rdata": [ + [ + { + "id": "test", + "single_end": false + }, + "test.spp.Rdata:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "spp": [ + [ + { + "id": "test", + "single_end": false + }, + "test.spp.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e0a48a40af2cf7d5de72c4c3cb47a4fc" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-27T10:46:22.786363" + }, + "sarscov2 - bam - single_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.spp.out:md5,b01d976506b6fe45b66c821b1e8a1d15" + ] + ], + "test.spp.pdf", + "test.spp.Rdata", + [ + "versions.yml:md5,e0a48a40af2cf7d5de72c4c3cb47a4fc" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-27T16:21:12.000709154" + }, + "sarscov2 - bam - paired_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.spp.out:md5,eed46e75eab119224f397a7a8b5924e6" + ] + ], + "test.spp.pdf", + "test.spp.Rdata", + [ + "versions.yml:md5,e0a48a40af2cf7d5de72c4c3cb47a4fc" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-27T16:21:22.432426907" + } +} \ No newline at end of file diff --git a/modules/nf-core/phantompeakqualtools/tests/tags.yml b/modules/nf-core/phantompeakqualtools/tests/tags.yml new file mode 100644 index 00000000..9031749e --- /dev/null +++ b/modules/nf-core/phantompeakqualtools/tests/tags.yml @@ -0,0 +1,2 @@ +phantompeakqualtools: + - "modules/nf-core/phantompeakqualtools/**" diff --git a/modules/nf-core/modules/picard/collectmultiplemetrics/main.nf b/modules/nf-core/picard/collectmultiplemetrics/main.nf similarity index 83% rename from modules/nf-core/modules/picard/collectmultiplemetrics/main.nf rename to modules/nf-core/picard/collectmultiplemetrics/main.nf index 63f4e872..91fe9170 100644 --- a/modules/nf-core/modules/picard/collectmultiplemetrics/main.nf +++ b/modules/nf-core/picard/collectmultiplemetrics/main.nf @@ -1,16 +1,16 @@ process PICARD_COLLECTMULTIPLEMETRICS { tag "$meta.id" - label 'process_medium' + label 'process_single' - conda (params.enable_conda ? "bioconda::picard=2.27.4" : null) + conda "bioconda::picard=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.4--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.4--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:3.0.0--hdfd78af_1' : + 'biocontainers/picard:3.0.0--hdfd78af_1' }" input: - tuple val(meta), path(bam) - path fasta - path fai + tuple val(meta) , path(bam), path(bai) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) output: tuple val(meta), path("*_metrics"), emit: metrics @@ -24,15 +24,15 @@ process PICARD_COLLECTMULTIPLEMETRICS { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[Picard CollectMultipleMetrics] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ picard \\ - -Xmx${avail_mem}g \\ + -Xmx${avail_mem}M \\ CollectMultipleMetrics \\ $args \\ --INPUT $bam \\ diff --git a/modules/nf-core/modules/picard/collectmultiplemetrics/meta.yml b/modules/nf-core/picard/collectmultiplemetrics/meta.yml similarity index 75% rename from modules/nf-core/modules/picard/collectmultiplemetrics/meta.yml rename to modules/nf-core/picard/collectmultiplemetrics/meta.yml index c11b02cf..22656080 100644 --- a/modules/nf-core/modules/picard/collectmultiplemetrics/meta.yml +++ b/modules/nf-core/picard/collectmultiplemetrics/meta.yml @@ -23,11 +23,25 @@ input: e.g. [ id:'test', single_end:false ] - bam: type: file - description: BAM file - pattern: "*.{bam}" + description: SAM/BAM/CRAM file + pattern: "*.{sam,bam,cram}" + - bai: + type: file + description: Optional SAM/BAM/CRAM file index + pattern: "*.{sai,bai,crai}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome'] - fasta: type: file description: Genome fasta file + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome'] - fai: type: file description: Index of FASTA file. Only needed when fasta is supplied. diff --git a/modules/nf-core/picard/markduplicates/environment.yml b/modules/nf-core/picard/markduplicates/environment.yml new file mode 100644 index 00000000..58b795f5 --- /dev/null +++ b/modules/nf-core/picard/markduplicates/environment.yml @@ -0,0 +1,7 @@ +name: picard_markduplicates +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::picard=3.1.1 diff --git a/modules/nf-core/modules/picard/markduplicates/main.nf b/modules/nf-core/picard/markduplicates/main.nf similarity index 71% rename from modules/nf-core/modules/picard/markduplicates/main.nf rename to modules/nf-core/picard/markduplicates/main.nf index 4e559fea..80930cc4 100644 --- a/modules/nf-core/modules/picard/markduplicates/main.nf +++ b/modules/nf-core/picard/markduplicates/main.nf @@ -2,13 +2,15 @@ process PICARD_MARKDUPLICATES { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::picard=2.27.4" : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.4--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.4--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:3.1.1--hdfd78af_0' : + 'biocontainers/picard:3.1.1--hdfd78af_0' }" input: tuple val(meta), path(bam) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) output: tuple val(meta), path("*.bam") , emit: bam @@ -22,19 +24,23 @@ process PICARD_MARKDUPLICATES { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[Picard MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } + + if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ picard \\ - -Xmx${avail_mem}g \\ + -Xmx${avail_mem}M \\ MarkDuplicates \\ $args \\ --INPUT $bam \\ --OUTPUT ${prefix}.bam \\ + --REFERENCE_SEQUENCE $fasta \\ --METRICS_FILE ${prefix}.MarkDuplicates.metrics.txt cat <<-END_VERSIONS > versions.yml @@ -45,6 +51,7 @@ process PICARD_MARKDUPLICATES { stub: def prefix = task.ext.prefix ?: "${meta.id}" + if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ touch ${prefix}.bam touch ${prefix}.bam.bai diff --git a/modules/nf-core/modules/picard/markduplicates/meta.yml b/modules/nf-core/picard/markduplicates/meta.yml similarity index 70% rename from modules/nf-core/modules/picard/markduplicates/meta.yml rename to modules/nf-core/picard/markduplicates/meta.yml index 842817bc..1ab90c07 100644 --- a/modules/nf-core/modules/picard/markduplicates/meta.yml +++ b/modules/nf-core/picard/markduplicates/meta.yml @@ -24,7 +24,25 @@ input: - bam: type: file description: BAM file - pattern: "*.{bam}" + pattern: "*.{bam,cram,sam}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference genome fasta file + pattern: "*.{fasta,fa}" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Reference genome fasta index + pattern: "*.{fai}" output: - meta: type: map @@ -50,3 +68,8 @@ output: authors: - "@drpatelh" - "@projectoriented" + - "@ramprasadn" +maintainers: + - "@drpatelh" + - "@projectoriented" + - "@ramprasadn" diff --git a/modules/nf-core/picard/markduplicates/tests/main.nf.test b/modules/nf-core/picard/markduplicates/tests/main.nf.test new file mode 100644 index 00000000..c5a29b4b --- /dev/null +++ b/modules/nf-core/picard/markduplicates/tests/main.nf.test @@ -0,0 +1,104 @@ +nextflow_process { + + name "Test Process PICARD_MARKDUPLICATES" + script "../main.nf" + process "PICARD_MARKDUPLICATES" + config "./nextflow.config" + tag "modules" + tag "modules_nfcore" + tag "picard" + tag "picard/markduplicates" + + test("sarscov2 [unsorted bam]") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("unsorted_bam_name") }, + { assert snapshot(path(process.out.metrics.get(0).get(1)).readLines()[0..2]).match("unsorted_bam_metrics") }, + { assert snapshot(process.out.versions).match("unsorted_bam_versions") } + ) + } + } + + test("sarscov2 [sorted bam]") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("sorted_bam_name") }, + { assert snapshot(path(process.out.metrics.get(0).get(1)).readLines()[0..2]).match("sorted_bam_metrics") }, + { assert snapshot(process.out.versions).match("sorted_bam_versions") } + ) + } + } + + test("homo_sapiens [cram]") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("cram_name") }, + { assert snapshot(path(process.out.metrics.get(0).get(1)).readLines()[0..2]).match("cram_metrics") }, + { assert snapshot(process.out.versions).match("cram_versions") } + ) + } + } +} diff --git a/modules/nf-core/picard/markduplicates/tests/main.nf.test.snap b/modules/nf-core/picard/markduplicates/tests/main.nf.test.snap new file mode 100644 index 00000000..31c9130d --- /dev/null +++ b/modules/nf-core/picard/markduplicates/tests/main.nf.test.snap @@ -0,0 +1,74 @@ +{ + "sorted_bam_versions": { + "content": [ + [ + "versions.yml:md5,b699af51b1956f3810f8a7c066e0ab17" + ] + ], + "timestamp": "2024-01-19T10:26:45.092349" + }, + "unsorted_bam_name": { + "content": [ + "test.marked.bam" + ], + "timestamp": "2024-01-19T10:26:28.100755" + }, + "cram_metrics": { + "content": [ + [ + "## htsjdk.samtools.metrics.StringHeader", + "# MarkDuplicates --INPUT test.paired_end.sorted.cram --OUTPUT test.marked.bam --METRICS_FILE test.marked.MarkDuplicates.metrics.txt --ASSUME_SORT_ORDER queryname --REFERENCE_SEQUENCE genome.fasta --MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP 50000 --MAX_FILE_HANDLES_FOR_READ_ENDS_MAP 8000 --SORTING_COLLECTION_SIZE_RATIO 0.25 --TAG_DUPLICATE_SET_MEMBERS false --REMOVE_SEQUENCING_DUPLICATES false --TAGGING_POLICY DontTag --CLEAR_DT true --DUPLEX_UMI false --FLOW_MODE false --FLOW_QUALITY_SUM_STRATEGY false --USE_END_IN_UNPAIRED_READS false --USE_UNPAIRED_CLIPPED_END false --UNPAIRED_END_UNCERTAINTY 0 --FLOW_SKIP_FIRST_N_FLOWS 0 --FLOW_Q_IS_KNOWN_END false --FLOW_EFFECTIVE_QUALITY_THRESHOLD 15 --ADD_PG_TAG_TO_READS true --REMOVE_DUPLICATES false --ASSUME_SORTED false --DUPLICATE_SCORING_STRATEGY SUM_OF_BASE_QUALITIES --PROGRAM_RECORD_ID MarkDuplicates --PROGRAM_GROUP_NAME MarkDuplicates --READ_NAME_REGEX --OPTICAL_DUPLICATE_PIXEL_DISTANCE 100 --MAX_OPTICAL_DUPLICATE_SET_SIZE 300000 --VERBOSITY INFO --QUIET false --VALIDATION_STRINGENCY STRICT --COMPRESSION_LEVEL 5 --MAX_RECORDS_IN_RAM 500000 --CREATE_INDEX false --CREATE_MD5_FILE false --help false --version false --showHidden false --USE_JDK_DEFLATER false --USE_JDK_INFLATER false", + "## htsjdk.samtools.metrics.StringHeader" + ] + ], + "timestamp": "2024-01-19T10:27:03.253071" + }, + "sorted_bam_metrics": { + "content": [ + [ + "## htsjdk.samtools.metrics.StringHeader", + "# MarkDuplicates --INPUT test.paired_end.sorted.bam --OUTPUT test.marked.bam --METRICS_FILE test.marked.MarkDuplicates.metrics.txt --ASSUME_SORT_ORDER queryname --REFERENCE_SEQUENCE genome.fasta --MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP 50000 --MAX_FILE_HANDLES_FOR_READ_ENDS_MAP 8000 --SORTING_COLLECTION_SIZE_RATIO 0.25 --TAG_DUPLICATE_SET_MEMBERS false --REMOVE_SEQUENCING_DUPLICATES false --TAGGING_POLICY DontTag --CLEAR_DT true --DUPLEX_UMI false --FLOW_MODE false --FLOW_QUALITY_SUM_STRATEGY false --USE_END_IN_UNPAIRED_READS false --USE_UNPAIRED_CLIPPED_END false --UNPAIRED_END_UNCERTAINTY 0 --FLOW_SKIP_FIRST_N_FLOWS 0 --FLOW_Q_IS_KNOWN_END false --FLOW_EFFECTIVE_QUALITY_THRESHOLD 15 --ADD_PG_TAG_TO_READS true --REMOVE_DUPLICATES false --ASSUME_SORTED false --DUPLICATE_SCORING_STRATEGY SUM_OF_BASE_QUALITIES --PROGRAM_RECORD_ID MarkDuplicates --PROGRAM_GROUP_NAME MarkDuplicates --READ_NAME_REGEX --OPTICAL_DUPLICATE_PIXEL_DISTANCE 100 --MAX_OPTICAL_DUPLICATE_SET_SIZE 300000 --VERBOSITY INFO --QUIET false --VALIDATION_STRINGENCY STRICT --COMPRESSION_LEVEL 5 --MAX_RECORDS_IN_RAM 500000 --CREATE_INDEX false --CREATE_MD5_FILE false --help false --version false --showHidden false --USE_JDK_DEFLATER false --USE_JDK_INFLATER false", + "## htsjdk.samtools.metrics.StringHeader" + ] + ], + "timestamp": "2024-01-19T10:26:45.086503" + }, + "cram_name": { + "content": [ + "test.marked.bam" + ], + "timestamp": "2024-01-19T10:27:03.241617" + }, + "cram_versions": { + "content": [ + [ + "versions.yml:md5,b699af51b1956f3810f8a7c066e0ab17" + ] + ], + "timestamp": "2024-01-19T10:27:03.26989" + }, + "unsorted_bam_versions": { + "content": [ + [ + "versions.yml:md5,b699af51b1956f3810f8a7c066e0ab17" + ] + ], + "timestamp": "2024-01-19T10:26:28.159071" + }, + "unsorted_bam_metrics": { + "content": [ + [ + "## htsjdk.samtools.metrics.StringHeader", + "# MarkDuplicates --INPUT test.paired_end.bam --OUTPUT test.marked.bam --METRICS_FILE test.marked.MarkDuplicates.metrics.txt --ASSUME_SORT_ORDER queryname --REFERENCE_SEQUENCE genome.fasta --MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP 50000 --MAX_FILE_HANDLES_FOR_READ_ENDS_MAP 8000 --SORTING_COLLECTION_SIZE_RATIO 0.25 --TAG_DUPLICATE_SET_MEMBERS false --REMOVE_SEQUENCING_DUPLICATES false --TAGGING_POLICY DontTag --CLEAR_DT true --DUPLEX_UMI false --FLOW_MODE false --FLOW_QUALITY_SUM_STRATEGY false --USE_END_IN_UNPAIRED_READS false --USE_UNPAIRED_CLIPPED_END false --UNPAIRED_END_UNCERTAINTY 0 --FLOW_SKIP_FIRST_N_FLOWS 0 --FLOW_Q_IS_KNOWN_END false --FLOW_EFFECTIVE_QUALITY_THRESHOLD 15 --ADD_PG_TAG_TO_READS true --REMOVE_DUPLICATES false --ASSUME_SORTED false --DUPLICATE_SCORING_STRATEGY SUM_OF_BASE_QUALITIES --PROGRAM_RECORD_ID MarkDuplicates --PROGRAM_GROUP_NAME MarkDuplicates --READ_NAME_REGEX --OPTICAL_DUPLICATE_PIXEL_DISTANCE 100 --MAX_OPTICAL_DUPLICATE_SET_SIZE 300000 --VERBOSITY INFO --QUIET false --VALIDATION_STRINGENCY STRICT --COMPRESSION_LEVEL 5 --MAX_RECORDS_IN_RAM 500000 --CREATE_INDEX false --CREATE_MD5_FILE false --help false --version false --showHidden false --USE_JDK_DEFLATER false --USE_JDK_INFLATER false", + "## htsjdk.samtools.metrics.StringHeader" + ] + ], + "timestamp": "2024-01-19T10:26:28.143979" + }, + "sorted_bam_name": { + "content": [ + "test.marked.bam" + ], + "timestamp": "2024-01-19T10:26:45.080116" + } +} \ No newline at end of file diff --git a/modules/nf-core/picard/markduplicates/tests/nextflow.config b/modules/nf-core/picard/markduplicates/tests/nextflow.config new file mode 100644 index 00000000..02818dd6 --- /dev/null +++ b/modules/nf-core/picard/markduplicates/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + withName: PICARD_MARKDUPLICATES { + ext.prefix = { "${meta.id}.marked" } + ext.args = '--ASSUME_SORT_ORDER queryname' + } +} diff --git a/modules/nf-core/picard/markduplicates/tests/tags.yml b/modules/nf-core/picard/markduplicates/tests/tags.yml new file mode 100644 index 00000000..4f213d62 --- /dev/null +++ b/modules/nf-core/picard/markduplicates/tests/tags.yml @@ -0,0 +1,2 @@ +picard/markduplicates: + - modules/nf-core/picard/markduplicates/** diff --git a/modules/nf-core/modules/picard/mergesamfiles/main.nf b/modules/nf-core/picard/mergesamfiles/main.nf similarity index 82% rename from modules/nf-core/modules/picard/mergesamfiles/main.nf rename to modules/nf-core/picard/mergesamfiles/main.nf index cccf4d3d..fc23ea01 100644 --- a/modules/nf-core/modules/picard/mergesamfiles/main.nf +++ b/modules/nf-core/picard/mergesamfiles/main.nf @@ -2,10 +2,10 @@ process PICARD_MERGESAMFILES { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::picard=2.27.4" : null) + conda "bioconda::picard=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.4--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.4--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:3.0.0--hdfd78af_1' : + 'biocontainers/picard:3.0.0--hdfd78af_1' }" input: tuple val(meta), path(bams) @@ -21,16 +21,16 @@ process PICARD_MERGESAMFILES { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def bam_files = bams.sort() - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[Picard MergeSamFiles] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } if (bam_files.size() > 1) { """ picard \\ - -Xmx${avail_mem}g \\ + -Xmx${avail_mem}M \\ MergeSamFiles \\ $args \\ ${'--INPUT '+bam_files.join(' --INPUT ')} \\ diff --git a/modules/nf-core/modules/picard/mergesamfiles/meta.yml b/modules/nf-core/picard/mergesamfiles/meta.yml similarity index 100% rename from modules/nf-core/modules/picard/mergesamfiles/meta.yml rename to modules/nf-core/picard/mergesamfiles/meta.yml diff --git a/modules/nf-core/modules/preseq/lcextrap/main.nf b/modules/nf-core/preseq/lcextrap/main.nf similarity index 87% rename from modules/nf-core/modules/preseq/lcextrap/main.nf rename to modules/nf-core/preseq/lcextrap/main.nf index 97261557..12546f0a 100644 --- a/modules/nf-core/modules/preseq/lcextrap/main.nf +++ b/modules/nf-core/preseq/lcextrap/main.nf @@ -1,12 +1,12 @@ process PRESEQ_LCEXTRAP { tag "$meta.id" - label 'process_medium' + label 'process_single' label 'error_ignore' - conda (params.enable_conda ? "bioconda::preseq=3.1.2" : null) + conda "bioconda::preseq=3.1.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/preseq:3.1.2--h445547b_2': - 'quay.io/biocontainers/preseq:3.1.2--h445547b_2' }" + 'biocontainers/preseq:3.1.2--h445547b_2' }" input: tuple val(meta), path(bam) diff --git a/modules/nf-core/modules/preseq/lcextrap/meta.yml b/modules/nf-core/preseq/lcextrap/meta.yml old mode 100644 new mode 100755 similarity index 98% rename from modules/nf-core/modules/preseq/lcextrap/meta.yml rename to modules/nf-core/preseq/lcextrap/meta.yml index f1be05a2..1391961c --- a/modules/nf-core/modules/preseq/lcextrap/meta.yml +++ b/modules/nf-core/preseq/lcextrap/meta.yml @@ -10,7 +10,7 @@ tools: homepage: http://smithlabresearch.org/software/preseq/ documentation: http://smithlabresearch.org/wp-content/uploads/manual.pdf tool_dev_url: https://github.com/smithlabcode/preseq - doi: "" + licence: ["GPL"] input: diff --git a/modules/nf-core/samtools/flagstat/environment.yml b/modules/nf-core/samtools/flagstat/environment.yml new file mode 100644 index 00000000..dd0b5c19 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/environment.yml @@ -0,0 +1,8 @@ +name: samtools_flagstat +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.18 + - bioconda::htslib=1.18 diff --git a/modules/nf-core/modules/samtools/flagstat/main.nf b/modules/nf-core/samtools/flagstat/main.nf similarity index 64% rename from modules/nf-core/modules/samtools/flagstat/main.nf rename to modules/nf-core/samtools/flagstat/main.nf index 03ec2dcf..f1893d7c 100644 --- a/modules/nf-core/modules/samtools/flagstat/main.nf +++ b/modules/nf-core/samtools/flagstat/main.nf @@ -1,11 +1,11 @@ process SAMTOOLS_FLAGSTAT { tag "$meta.id" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : - 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1' : + 'biocontainers/samtools:1.18--h50ea8bc_1' }" input: tuple val(meta), path(bam), path(bai) @@ -23,7 +23,7 @@ process SAMTOOLS_FLAGSTAT { """ samtools \\ flagstat \\ - --threads ${task.cpus-1} \\ + --threads ${task.cpus} \\ $bam \\ > ${prefix}.flagstat @@ -32,4 +32,15 @@ process SAMTOOLS_FLAGSTAT { samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.flagstat + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/samtools/flagstat/meta.yml b/modules/nf-core/samtools/flagstat/meta.yml similarity index 93% rename from modules/nf-core/modules/samtools/flagstat/meta.yml rename to modules/nf-core/samtools/flagstat/meta.yml index 95269063..97991358 100644 --- a/modules/nf-core/modules/samtools/flagstat/meta.yml +++ b/modules/nf-core/samtools/flagstat/meta.yml @@ -14,7 +14,7 @@ tools: short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. These files are generated as output by short read aligners like BWA. homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html + documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] input: @@ -47,3 +47,5 @@ output: pattern: "versions.yml" authors: - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/modules/nf-core/samtools/flagstat/tests/main.nf.test b/modules/nf-core/samtools/flagstat/tests/main.nf.test new file mode 100644 index 00000000..c8dd8dc9 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/tests/main.nf.test @@ -0,0 +1,36 @@ +nextflow_process { + + name "Test Process SAMTOOLS_FLAGSTAT" + script "../main.nf" + process "SAMTOOLS_FLAGSTAT" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/flagstat" + + test("BAM") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.flagstat).match() }, + { assert path(process.out.versions.get(0)).getText().contains("samtools") } + ) + } + } +} diff --git a/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap b/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap new file mode 100644 index 00000000..880019f2 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap @@ -0,0 +1,16 @@ +{ + "BAM": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + ] + ] + ], + "timestamp": "2023-11-14T15:49:22.577133" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/flagstat/tests/tags.yml b/modules/nf-core/samtools/flagstat/tests/tags.yml new file mode 100644 index 00000000..2d2b7255 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/flagstat: + - modules/nf-core/samtools/flagstat/** diff --git a/modules/nf-core/samtools/idxstats/environment.yml b/modules/nf-core/samtools/idxstats/environment.yml new file mode 100644 index 00000000..de3ed47e --- /dev/null +++ b/modules/nf-core/samtools/idxstats/environment.yml @@ -0,0 +1,8 @@ +name: samtools_idxstats +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.18 + - bioconda::htslib=1.18 diff --git a/modules/nf-core/modules/samtools/idxstats/main.nf b/modules/nf-core/samtools/idxstats/main.nf similarity index 64% rename from modules/nf-core/modules/samtools/idxstats/main.nf rename to modules/nf-core/samtools/idxstats/main.nf index 4b245419..00d916bb 100644 --- a/modules/nf-core/modules/samtools/idxstats/main.nf +++ b/modules/nf-core/samtools/idxstats/main.nf @@ -1,11 +1,11 @@ process SAMTOOLS_IDXSTATS { tag "$meta.id" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : - 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1' : + 'biocontainers/samtools:1.18--h50ea8bc_1' }" input: tuple val(meta), path(bam), path(bai) @@ -24,6 +24,7 @@ process SAMTOOLS_IDXSTATS { """ samtools \\ idxstats \\ + --threads ${task.cpus-1} \\ $bam \\ > ${prefix}.idxstats @@ -32,4 +33,16 @@ process SAMTOOLS_IDXSTATS { samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.idxstats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/samtools/idxstats/meta.yml b/modules/nf-core/samtools/idxstats/meta.yml similarity index 93% rename from modules/nf-core/modules/samtools/idxstats/meta.yml rename to modules/nf-core/samtools/idxstats/meta.yml index 3710ab88..344e92a3 100644 --- a/modules/nf-core/modules/samtools/idxstats/meta.yml +++ b/modules/nf-core/samtools/idxstats/meta.yml @@ -15,7 +15,7 @@ tools: short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. These files are generated as output by short read aligners like BWA. homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html + documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] input: @@ -48,3 +48,5 @@ output: pattern: "versions.yml" authors: - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/modules/nf-core/samtools/idxstats/tests/main.nf.test b/modules/nf-core/samtools/idxstats/tests/main.nf.test new file mode 100644 index 00000000..f6c92150 --- /dev/null +++ b/modules/nf-core/samtools/idxstats/tests/main.nf.test @@ -0,0 +1,36 @@ +nextflow_process { + + name "Test Process SAMTOOLS_IDXSTATS" + script "../main.nf" + process "SAMTOOLS_IDXSTATS" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/idxstats" + + test("BAM") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.idxstats).match() }, + { assert path(process.out.versions.get(0)).getText().contains("samtools") } + ) + } + } +} diff --git a/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap b/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap new file mode 100644 index 00000000..4c6c12bd --- /dev/null +++ b/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap @@ -0,0 +1,16 @@ +{ + "BAM": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" + ] + ] + ], + "timestamp": "2023-11-14T15:52:19.875194" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/idxstats/tests/tags.yml b/modules/nf-core/samtools/idxstats/tests/tags.yml new file mode 100644 index 00000000..d3057c61 --- /dev/null +++ b/modules/nf-core/samtools/idxstats/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/idxstats: + - modules/nf-core/samtools/idxstats/** diff --git a/modules/nf-core/samtools/index/environment.yml b/modules/nf-core/samtools/index/environment.yml new file mode 100644 index 00000000..81f09391 --- /dev/null +++ b/modules/nf-core/samtools/index/environment.yml @@ -0,0 +1,8 @@ +name: samtools_index +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.18 + - bioconda::htslib=1.18 diff --git a/modules/nf-core/modules/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf similarity index 88% rename from modules/nf-core/modules/samtools/index/main.nf rename to modules/nf-core/samtools/index/main.nf index e04e63e8..8ad18fdc 100644 --- a/modules/nf-core/modules/samtools/index/main.nf +++ b/modules/nf-core/samtools/index/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_INDEX { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : - 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1' : + 'biocontainers/samtools:1.18--h50ea8bc_1' }" input: tuple val(meta), path(input) diff --git a/modules/nf-core/modules/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml similarity index 91% rename from modules/nf-core/modules/samtools/index/meta.yml rename to modules/nf-core/samtools/index/meta.yml index e5cadbc2..01a4ee03 100644 --- a/modules/nf-core/modules/samtools/index/meta.yml +++ b/modules/nf-core/samtools/index/meta.yml @@ -12,7 +12,7 @@ tools: short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. These files are generated as output by short read aligners like BWA. homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html + documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] input: @@ -51,3 +51,7 @@ authors: - "@drpatelh" - "@ewels" - "@maxulysse" +maintainers: + - "@drpatelh" + - "@ewels" + - "@maxulysse" diff --git a/modules/nf-core/samtools/index/tests/csi.nextflow.config b/modules/nf-core/samtools/index/tests/csi.nextflow.config new file mode 100644 index 00000000..0ed260ef --- /dev/null +++ b/modules/nf-core/samtools/index/tests/csi.nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SAMTOOLS_INDEX { + ext.args = '-c' + } + +} diff --git a/modules/nf-core/samtools/index/tests/main.nf.test b/modules/nf-core/samtools/index/tests/main.nf.test new file mode 100644 index 00000000..c76a9169 --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test @@ -0,0 +1,87 @@ +nextflow_process { + + name "Test Process SAMTOOLS_INDEX" + script "../main.nf" + process "SAMTOOLS_INDEX" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/index" + + test("sarscov2 [BAI]") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.bai).match("bai") }, + { assert path(process.out.versions.get(0)).getText().contains("samtools") } + ) + } + } + + test("homo_sapiens [CRAI]") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.crai).match("crai") }, + { assert path(process.out.versions.get(0)).getText().contains("samtools") } + ) + } + } + + test("homo_sapiens [CSI]") { + + config "./csi.nextflow.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert path(process.out.csi.get(0).get(1)).exists() }, + { assert path(process.out.versions.get(0)).getText().contains("samtools") } + ) + } + } +} diff --git a/modules/nf-core/samtools/index/tests/main.nf.test.snap b/modules/nf-core/samtools/index/tests/main.nf.test.snap new file mode 100644 index 00000000..b3baee7f --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test.snap @@ -0,0 +1,28 @@ +{ + "crai": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ] + ], + "timestamp": "2023-11-15T15:17:37.30801" + }, + "bai": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ] + ], + "timestamp": "2023-11-15T15:17:30.869234" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/index/tests/tags.yml b/modules/nf-core/samtools/index/tests/tags.yml new file mode 100644 index 00000000..e0f58a7a --- /dev/null +++ b/modules/nf-core/samtools/index/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/index: + - modules/nf-core/samtools/index/** diff --git a/modules/nf-core/samtools/sort/environment.yml b/modules/nf-core/samtools/sort/environment.yml new file mode 100644 index 00000000..f4064b72 --- /dev/null +++ b/modules/nf-core/samtools/sort/environment.yml @@ -0,0 +1,8 @@ +name: samtools_sort +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.18 + - bioconda::htslib=1.18 diff --git a/modules/nf-core/modules/samtools/sort/main.nf b/modules/nf-core/samtools/sort/main.nf similarity index 79% rename from modules/nf-core/modules/samtools/sort/main.nf rename to modules/nf-core/samtools/sort/main.nf index b4fc1cbe..4a666d42 100644 --- a/modules/nf-core/modules/samtools/sort/main.nf +++ b/modules/nf-core/samtools/sort/main.nf @@ -2,16 +2,17 @@ process SAMTOOLS_SORT { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : - 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1' : + 'biocontainers/samtools:1.18--h50ea8bc_1' }" input: tuple val(meta), path(bam) output: tuple val(meta), path("*.bam"), emit: bam + tuple val(meta), path("*.csi"), emit: csi, optional: true path "versions.yml" , emit: versions when: @@ -22,7 +23,13 @@ process SAMTOOLS_SORT { def prefix = task.ext.prefix ?: "${meta.id}" if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ - samtools sort $args -@ $task.cpus -o ${prefix}.bam -T $prefix $bam + samtools sort \\ + $args \\ + -@ $task.cpus \\ + -o ${prefix}.bam \\ + -T $prefix \\ + $bam + cat <<-END_VERSIONS > versions.yml "${task.process}": samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') diff --git a/modules/nf-core/modules/samtools/sort/meta.yml b/modules/nf-core/samtools/sort/meta.yml similarity index 85% rename from modules/nf-core/modules/samtools/sort/meta.yml rename to modules/nf-core/samtools/sort/meta.yml index a820c55a..2200de72 100644 --- a/modules/nf-core/modules/samtools/sort/meta.yml +++ b/modules/nf-core/samtools/sort/meta.yml @@ -12,7 +12,7 @@ tools: short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. These files are generated as output by short read aligners like BWA. homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html + documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] input: @@ -39,6 +39,13 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - csi: + type: file + description: BAM index file (optional) + pattern: "*.csi" authors: - "@drpatelh" - "@ewels" +maintainers: + - "@drpatelh" + - "@ewels" diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test b/modules/nf-core/samtools/sort/tests/main.nf.test new file mode 100644 index 00000000..abb80978 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/main.nf.test @@ -0,0 +1,73 @@ +nextflow_process { + + name "Test Process SAMTOOLS_SORT" + script "../main.nf" + process "SAMTOOLS_SORT" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/sort" + + test("test_samtools_sort") { + + config "./nextflow.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + [ + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("test_samtools_sort_stub") { + + config "./nextflow.config" + options "-stub-run" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + [ + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test.snap b/modules/nf-core/samtools/sort/tests/main.nf.test.snap new file mode 100644 index 00000000..ff722259 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/main.nf.test.snap @@ -0,0 +1,48 @@ +{ + "test_samtools_sort": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,ea6a0fef94eb534e901f107a05a33a06" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,33b6a403dc19a0d28e4219ccab0a1d80" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,ea6a0fef94eb534e901f107a05a33a06" + ] + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,33b6a403dc19a0d28e4219ccab0a1d80" + ] + } + ], + "timestamp": "2023-12-04T11:11:22.005628301" + }, + "test_samtools_sort_stub": { + "content": [ + "test.sorted.bam", + [ + "versions.yml:md5,33b6a403dc19a0d28e4219ccab0a1d80" + ] + ], + "timestamp": "2023-12-04T17:47:22.314445935" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/sort/tests/nextflow.config b/modules/nf-core/samtools/sort/tests/nextflow.config new file mode 100644 index 00000000..d0f35086 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SAMTOOLS_SORT { + ext.prefix = { "${meta.id}.sorted" } + } + +} diff --git a/modules/nf-core/samtools/sort/tests/tags.yml b/modules/nf-core/samtools/sort/tests/tags.yml new file mode 100644 index 00000000..cd63ea20 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/tags.yml @@ -0,0 +1,3 @@ +samtools/sort: + - modules/nf-core/samtools/sort/** + - tests/modules/nf-core/samtools/sort/** diff --git a/modules/nf-core/samtools/stats/environment.yml b/modules/nf-core/samtools/stats/environment.yml new file mode 100644 index 00000000..b45ba90c --- /dev/null +++ b/modules/nf-core/samtools/stats/environment.yml @@ -0,0 +1,8 @@ +name: samtools_stats +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.18 + - bioconda::htslib=1.18 diff --git a/modules/nf-core/modules/samtools/stats/main.nf b/modules/nf-core/samtools/stats/main.nf similarity index 83% rename from modules/nf-core/modules/samtools/stats/main.nf rename to modules/nf-core/samtools/stats/main.nf index 89b92d79..7539140a 100644 --- a/modules/nf-core/modules/samtools/stats/main.nf +++ b/modules/nf-core/samtools/stats/main.nf @@ -1,15 +1,15 @@ process SAMTOOLS_STATS { tag "$meta.id" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : - 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1' : + 'biocontainers/samtools:1.18--h50ea8bc_1' }" input: tuple val(meta), path(input), path(input_index) - path fasta + tuple val(meta2), path(fasta) output: tuple val(meta), path("*.stats"), emit: stats @@ -25,7 +25,7 @@ process SAMTOOLS_STATS { """ samtools \\ stats \\ - --threads ${task.cpus-1} \\ + --threads ${task.cpus} \\ ${reference} \\ ${input} \\ > ${prefix}.stats diff --git a/modules/nf-core/modules/samtools/stats/meta.yml b/modules/nf-core/samtools/stats/meta.yml similarity index 68% rename from modules/nf-core/modules/samtools/stats/meta.yml rename to modules/nf-core/samtools/stats/meta.yml index cac50b1c..735ff812 100644 --- a/modules/nf-core/modules/samtools/stats/meta.yml +++ b/modules/nf-core/samtools/stats/meta.yml @@ -13,7 +13,7 @@ tools: short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. These files are generated as output by short read aligners like BWA. homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html + documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] input: @@ -23,16 +23,21 @@ input: Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - input: - type: file - description: BAM/CRAM file from alignment - pattern: "*.{bam,cram}" + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" - input_index: - type: file - description: BAI/CRAI file from alignment - pattern: "*.{bai,crai}" + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fasta: - type: optional file - description: Reference file the CRAM was created with + type: file + description: Reference file the CRAM was created with (optional) pattern: "*.{fasta,fa}" output: - meta: @@ -51,3 +56,8 @@ output: authors: - "@drpatelh" - "@FriederikeHanssen" + - "@ramprasadn" +maintainers: + - "@drpatelh" + - "@FriederikeHanssen" + - "@ramprasadn" diff --git a/modules/nf-core/samtools/stats/tests/main.nf.test b/modules/nf-core/samtools/stats/tests/main.nf.test new file mode 100644 index 00000000..20c3efe1 --- /dev/null +++ b/modules/nf-core/samtools/stats/tests/main.nf.test @@ -0,0 +1,78 @@ +nextflow_process { + + name "Test Process SAMTOOLS_STATS" + script "../main.nf" + process "SAMTOOLS_STATS" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/stats" + + test("SAMTOOLS STATS Should run without failures") { + + when { + params { + + outdir = "$outputDir" + } + process { + """ + // define inputs of the process here. + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) + + ] + input[1] = [[],[]] + """ + + } + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } + + } + + test("SAMTOOLS CRAM Should run without failures") { + + when { + params { + + outdir = "$outputDir" + } + process { + """ + // define inputs of the process here + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram_crai'], checkIfExists: true) + + ] + input[1] = [ + [ id:'genome' ], + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + + + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } + + } + + +} diff --git a/modules/nf-core/samtools/stats/tests/main.nf.test.snap b/modules/nf-core/samtools/stats/tests/main.nf.test.snap new file mode 100644 index 00000000..025c83a5 --- /dev/null +++ b/modules/nf-core/samtools/stats/tests/main.nf.test.snap @@ -0,0 +1,64 @@ +{ + "SAMTOOLS STATS Should run without failures": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,045a48208b1c6f5b8af4347fe31f4def" + ] + ], + "1": [ + "versions.yml:md5,650a365c6635001436008350ae83337c" + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,045a48208b1c6f5b8af4347fe31f4def" + ] + ], + "versions": [ + "versions.yml:md5,650a365c6635001436008350ae83337c" + ] + } + ], + "timestamp": "2023-12-04T11:07:28.26821485" + }, + "SAMTOOLS CRAM Should run without failures": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,dfbfa130d4a6925ddd1931dcd8354a43" + ] + ], + "1": [ + "versions.yml:md5,650a365c6635001436008350ae83337c" + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,dfbfa130d4a6925ddd1931dcd8354a43" + ] + ], + "versions": [ + "versions.yml:md5,650a365c6635001436008350ae83337c" + ] + } + ], + "timestamp": "2023-12-04T11:07:50.356233402" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/stats/tests/tags.yml b/modules/nf-core/samtools/stats/tests/tags.yml new file mode 100644 index 00000000..7c28e30f --- /dev/null +++ b/modules/nf-core/samtools/stats/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/stats: + - modules/nf-core/samtools/stats/** diff --git a/modules/nf-core/modules/subread/featurecounts/main.nf b/modules/nf-core/subread/featurecounts/main.nf similarity index 91% rename from modules/nf-core/modules/subread/featurecounts/main.nf rename to modules/nf-core/subread/featurecounts/main.nf index 18e2a92b..a524b92f 100644 --- a/modules/nf-core/modules/subread/featurecounts/main.nf +++ b/modules/nf-core/subread/featurecounts/main.nf @@ -2,10 +2,10 @@ process SUBREAD_FEATURECOUNTS { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::subread=2.0.1" : null) + conda "bioconda::subread=2.0.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/subread:2.0.1--hed695b0_0' : - 'quay.io/biocontainers/subread:2.0.1--hed695b0_0' }" + 'biocontainers/subread:2.0.1--hed695b0_0' }" input: tuple val(meta), path(bams), path(annotation) diff --git a/modules/nf-core/modules/subread/featurecounts/meta.yml b/modules/nf-core/subread/featurecounts/meta.yml similarity index 100% rename from modules/nf-core/modules/subread/featurecounts/meta.yml rename to modules/nf-core/subread/featurecounts/meta.yml diff --git a/modules/nf-core/trimgalore/environment.yml b/modules/nf-core/trimgalore/environment.yml new file mode 100644 index 00000000..6cd0f51b --- /dev/null +++ b/modules/nf-core/trimgalore/environment.yml @@ -0,0 +1,7 @@ +name: trimgalore +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::trim-galore=0.6.7 diff --git a/modules/nf-core/modules/trimgalore/main.nf b/modules/nf-core/trimgalore/main.nf similarity index 62% rename from modules/nf-core/modules/trimgalore/main.nf rename to modules/nf-core/trimgalore/main.nf index 3a3fca90..24ead871 100644 --- a/modules/nf-core/modules/trimgalore/main.nf +++ b/modules/nf-core/trimgalore/main.nf @@ -2,22 +2,21 @@ process TRIMGALORE { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? 'bioconda::trim-galore=0.6.7' : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/trim-galore:0.6.7--hdfd78af_0' : - 'quay.io/biocontainers/trim-galore:0.6.7--hdfd78af_0' }" + 'biocontainers/trim-galore:0.6.7--hdfd78af_0' }" input: tuple val(meta), path(reads) output: - tuple val(meta), path("*{trimmed,val}*.fq.gz"), emit: reads - tuple val(meta), path("*report.txt") , emit: log - path "versions.yml" , emit: versions - - tuple val(meta), path("*unpaired*.fq.gz") , emit: unpaired, optional: true - tuple val(meta), path("*.html") , emit: html , optional: true - tuple val(meta), path("*.zip") , emit: zip , optional: true + tuple val(meta), path("*{3prime,5prime,trimmed,val}*.fq.gz"), emit: reads + tuple val(meta), path("*report.txt") , emit: log , optional: true + tuple val(meta), path("*unpaired*.fq.gz") , emit: unpaired, optional: true + tuple val(meta), path("*.html") , emit: html , optional: true + tuple val(meta), path("*.zip") , emit: zip , optional: true + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -32,26 +31,20 @@ process TRIMGALORE { cores = (task.cpus as int) - 4 if (meta.single_end) cores = (task.cpus as int) - 3 if (cores < 1) cores = 1 - if (cores > 4) cores = 4 + if (cores > 8) cores = 8 } - // Clipping presets have to be evaluated in the context of SE/PE - def c_r1 = params.clip_r1 > 0 ? "--clip_r1 ${params.clip_r1}" : '' - def c_r2 = params.clip_r2 > 0 ? "--clip_r2 ${params.clip_r2}" : '' - def tpc_r1 = params.three_prime_clip_r1 > 0 ? "--three_prime_clip_r1 ${params.three_prime_clip_r1}" : '' - def tpc_r2 = params.three_prime_clip_r2 > 0 ? "--three_prime_clip_r2 ${params.three_prime_clip_r2}" : '' - // Added soft-links to original fastqs for consistent naming in MultiQC def prefix = task.ext.prefix ?: "${meta.id}" if (meta.single_end) { + def args_list = args.split("\\s(?=--)").toList() + args_list.removeAll { it.toLowerCase().contains('_r2 ') } """ [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz trim_galore \\ - $args \\ + ${args_list.join(' ')} \\ --cores $cores \\ --gzip \\ - $c_r1 \\ - $tpc_r1 \\ ${prefix}.fastq.gz cat <<-END_VERSIONS > versions.yml @@ -69,10 +62,6 @@ process TRIMGALORE { --cores $cores \\ --paired \\ --gzip \\ - $c_r1 \\ - $c_r2 \\ - $tpc_r1 \\ - $tpc_r2 \\ ${prefix}_1.fastq.gz \\ ${prefix}_2.fastq.gz diff --git a/modules/nf-core/modules/trimgalore/meta.yml b/modules/nf-core/trimgalore/meta.yml similarity index 94% rename from modules/nf-core/modules/trimgalore/meta.yml rename to modules/nf-core/trimgalore/meta.yml index 439f566d..e649088c 100644 --- a/modules/nf-core/modules/trimgalore/meta.yml +++ b/modules/nf-core/trimgalore/meta.yml @@ -36,7 +36,7 @@ output: description: | List of input adapter trimmed FastQ files of size 1 and 2 for single-end and paired-end data, respectively. - pattern: "*.{fq.gz}" + pattern: "*{3prime,5prime,trimmed,val}*.fq.gz" - unpaired: type: file description: | @@ -62,3 +62,7 @@ authors: - "@drpatelh" - "@ewels" - "@FelixKrueger" +maintainers: + - "@drpatelh" + - "@ewels" + - "@FelixKrueger" diff --git a/modules/nf-core/trimgalore/tests/main.nf.test b/modules/nf-core/trimgalore/tests/main.nf.test new file mode 100644 index 00000000..bc6812cc --- /dev/null +++ b/modules/nf-core/trimgalore/tests/main.nf.test @@ -0,0 +1,105 @@ +nextflow_process { + + name "Test Process TRIMGALORE" + script "../main.nf" + process "TRIMGALORE" + tag "modules" + tag "modules_nfcore" + tag "trimgalore" + + test("test_trimgalore_single_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + def read_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) } + } + }, + { report1_lines.each { report1_line -> + { assert path(process.out.log.get(0).get(1)).getText().contains(report1_line) } + } + } + ) + } + } + + test("test_trimgalore_paired_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + def read1_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } + } + }, + { read2_lines.each { read2_line -> + { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } + } + }, + { report1_lines.each { report1_line -> + { assert path(process.out.log.get(0).get(1).get(0)).getText().contains(report1_line) } + } + }, + { report2_lines.each { report2_line -> + { assert path(process.out.log.get(0).get(1).get(1)).getText().contains(report2_line) } + } + } + ) + } + } +} diff --git a/modules/nf-core/trimgalore/tests/main.nf.test.snap b/modules/nf-core/trimgalore/tests/main.nf.test.snap new file mode 100644 index 00000000..84feacca --- /dev/null +++ b/modules/nf-core/trimgalore/tests/main.nf.test.snap @@ -0,0 +1,148 @@ +{ + "test_trimgalore_single_end": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test_trimmed.fq.gz:md5,e0a7516b8ea8d6467d6306acb2cd13c4" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastq.gz_trimming_report.txt:md5,a1ab3958205f1ddf48af623242b5b429" + ] + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + "versions.yml:md5,47d966cbb31c80eb8f7fe860d55659b7" + ], + "html": [ + + ], + "log": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastq.gz_trimming_report.txt:md5,a1ab3958205f1ddf48af623242b5b429" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test_trimmed.fq.gz:md5,e0a7516b8ea8d6467d6306acb2cd13c4" + ] + ], + "unpaired": [ + + ], + "versions": [ + "versions.yml:md5,47d966cbb31c80eb8f7fe860d55659b7" + ], + "zip": [ + + ] + } + ], + "timestamp": "2023-10-17T15:24:57.782141441" + }, + "test_trimgalore_paired_end": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1_val_1.fq.gz:md5,e0a7516b8ea8d6467d6306acb2cd13c4", + "test_2_val_2.fq.gz:md5,f3d61189e6d10202da7b8686f1dbb71b" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastq.gz_trimming_report.txt:md5,315d40465412f9909bbaabf52269274d", + "test_2.fastq.gz_trimming_report.txt:md5,34436303da1c78811103427a2fb57f7b" + ] + ] + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + "versions.yml:md5,47d966cbb31c80eb8f7fe860d55659b7" + ], + "html": [ + + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastq.gz_trimming_report.txt:md5,315d40465412f9909bbaabf52269274d", + "test_2.fastq.gz_trimming_report.txt:md5,34436303da1c78811103427a2fb57f7b" + ] + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1_val_1.fq.gz:md5,e0a7516b8ea8d6467d6306acb2cd13c4", + "test_2_val_2.fq.gz:md5,f3d61189e6d10202da7b8686f1dbb71b" + ] + ] + ], + "unpaired": [ + + ], + "versions": [ + "versions.yml:md5,47d966cbb31c80eb8f7fe860d55659b7" + ], + "zip": [ + + ] + } + ], + "timestamp": "2023-10-17T15:25:08.513589909" + } +} \ No newline at end of file diff --git a/modules/nf-core/trimgalore/tests/tags.yml b/modules/nf-core/trimgalore/tests/tags.yml new file mode 100644 index 00000000..e9937691 --- /dev/null +++ b/modules/nf-core/trimgalore/tests/tags.yml @@ -0,0 +1,2 @@ +trimgalore: + - modules/nf-core/trimgalore/** diff --git a/modules/nf-core/modules/ucsc/bedgraphtobigwig/main.nf b/modules/nf-core/ucsc/bedgraphtobigwig/main.nf similarity index 85% rename from modules/nf-core/modules/ucsc/bedgraphtobigwig/main.nf rename to modules/nf-core/ucsc/bedgraphtobigwig/main.nf index b18b190a..054924e7 100644 --- a/modules/nf-core/modules/ucsc/bedgraphtobigwig/main.nf +++ b/modules/nf-core/ucsc/bedgraphtobigwig/main.nf @@ -1,12 +1,12 @@ process UCSC_BEDGRAPHTOBIGWIG { tag "$meta.id" - label 'process_medium' + label 'process_single' // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - conda (params.enable_conda ? "bioconda::ucsc-bedgraphtobigwig=377" : null) + conda "bioconda::ucsc-bedgraphtobigwig=377" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ucsc-bedgraphtobigwig:377--h446ed27_1' : - 'quay.io/biocontainers/ucsc-bedgraphtobigwig:377--h446ed27_1' }" + 'biocontainers/ucsc-bedgraphtobigwig:377--h446ed27_1' }" input: tuple val(meta), path(bedgraph) diff --git a/modules/nf-core/modules/ucsc/bedgraphtobigwig/meta.yml b/modules/nf-core/ucsc/bedgraphtobigwig/meta.yml old mode 100644 new mode 100755 similarity index 88% rename from modules/nf-core/modules/ucsc/bedgraphtobigwig/meta.yml rename to modules/nf-core/ucsc/bedgraphtobigwig/meta.yml index 1be1a3b7..ba8915be --- a/modules/nf-core/modules/ucsc/bedgraphtobigwig/meta.yml +++ b/modules/nf-core/ucsc/bedgraphtobigwig/meta.yml @@ -6,10 +6,8 @@ keywords: tools: - ucsc: description: Convert a bedGraph file to bigWig format. - homepage: None - documentation: None - tool_dev_url: None - doi: "" + homepage: http://hgdownload.cse.ucsc.edu/admin/exe/ + documentation: https://genome.ucsc.edu/goldenPath/help/bigWig.html licence: ["varies; see http://genome.ucsc.edu/license"] input: diff --git a/modules/nf-core/umitools/extract/environment.yml b/modules/nf-core/umitools/extract/environment.yml new file mode 100644 index 00000000..7d08ac0e --- /dev/null +++ b/modules/nf-core/umitools/extract/environment.yml @@ -0,0 +1,7 @@ +name: umitools_extract +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::umi_tools=1.1.4 diff --git a/modules/nf-core/umitools/extract/main.nf b/modules/nf-core/umitools/extract/main.nf new file mode 100644 index 00000000..4bd79e79 --- /dev/null +++ b/modules/nf-core/umitools/extract/main.nf @@ -0,0 +1,56 @@ +process UMITOOLS_EXTRACT { + tag "$meta.id" + label "process_single" + label "process_long" + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/umi_tools:1.1.4--py38hbff2b2d_1' : + 'biocontainers/umi_tools:1.1.4--py38hbff2b2d_1' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.fastq.gz"), emit: reads + tuple val(meta), path("*.log") , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if (meta.single_end) { + """ + umi_tools \\ + extract \\ + -I $reads \\ + -S ${prefix}.umi_extract.fastq.gz \\ + $args \\ + > ${prefix}.umi_extract.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + umitools: \$( umi_tools --version | sed '/version:/!d; s/.*: //' ) + END_VERSIONS + """ + } else { + """ + umi_tools \\ + extract \\ + -I ${reads[0]} \\ + --read2-in=${reads[1]} \\ + -S ${prefix}.umi_extract_1.fastq.gz \\ + --read2-out=${prefix}.umi_extract_2.fastq.gz \\ + $args \\ + > ${prefix}.umi_extract.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + umitools: \$( umi_tools --version | sed '/version:/!d; s/.*: //' ) + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/umitools/extract/meta.yml b/modules/nf-core/umitools/extract/meta.yml new file mode 100644 index 00000000..7695b271 --- /dev/null +++ b/modules/nf-core/umitools/extract/meta.yml @@ -0,0 +1,48 @@ +name: umitools_extract +description: Extracts UMI barcode from a read and add it to the read name, leaving any sample barcode in place +keywords: + - UMI + - barcode + - extract + - umitools +tools: + - umi_tools: + description: > + UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes + documentation: https://umi-tools.readthedocs.io/en/latest/ + license: "MIT" +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: list + description: | + List of input FASTQ files whose UMIs will be extracted. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: > + Extracted FASTQ files. | For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. | For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz. + pattern: "*.{fastq.gz}" + - log: + type: file + description: Logfile for umi_tools + pattern: "*.{log}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@grst" +maintainers: + - "@drpatelh" + - "@grst" diff --git a/modules/nf-core/umitools/extract/tests/main.nf.test b/modules/nf-core/umitools/extract/tests/main.nf.test new file mode 100644 index 00000000..22242d1d --- /dev/null +++ b/modules/nf-core/umitools/extract/tests/main.nf.test @@ -0,0 +1,35 @@ +nextflow_process { + + name "Test Process UMITOOLS_EXTRACT" + script "../main.nf" + process "UMITOOLS_EXTRACT" + config "./nextflow.config" + tag "modules_nfcore" + tag "modules" + tag "umitools" + tag "umitools/extract" + + test("Should run without failures") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } +} \ No newline at end of file diff --git a/modules/nf-core/umitools/extract/tests/main.nf.test.snap b/modules/nf-core/umitools/extract/tests/main.nf.test.snap new file mode 100644 index 00000000..6d5944f1 --- /dev/null +++ b/modules/nf-core/umitools/extract/tests/main.nf.test.snap @@ -0,0 +1,10 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,5a18da2d3a5a4de15e7aaae9082d7abb" + ] + ], + "timestamp": "2023-12-08T09:41:43.540658352" + } +} \ No newline at end of file diff --git a/modules/nf-core/umitools/extract/tests/nextflow.config b/modules/nf-core/umitools/extract/tests/nextflow.config new file mode 100644 index 00000000..c866f5a0 --- /dev/null +++ b/modules/nf-core/umitools/extract/tests/nextflow.config @@ -0,0 +1,9 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: UMITOOLS_EXTRACT { + ext.args = '--bc-pattern="NNNN"' + } + +} diff --git a/modules/nf-core/umitools/extract/tests/tags.yml b/modules/nf-core/umitools/extract/tests/tags.yml new file mode 100644 index 00000000..c3fb23de --- /dev/null +++ b/modules/nf-core/umitools/extract/tests/tags.yml @@ -0,0 +1,2 @@ +umitools/extract: + - modules/nf-core/umitools/extract/** diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf new file mode 100644 index 00000000..8cd1856c --- /dev/null +++ b/modules/nf-core/untar/main.nf @@ -0,0 +1,63 @@ +process UNTAR { + tag "$archive" + label 'process_single' + + conda "conda-forge::sed=4.7 bioconda::grep=3.4 conda-forge::tar=1.34" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("$prefix"), emit: untar + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.tar$/, "")) + + """ + mkdir $prefix + + ## Ensures --strip-components only applied when top level of tar contents is a directory + ## If just files or multiple directories, place all in prefix + if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then + tar \\ + -C $prefix --strip-components 1 \\ + -xavf \\ + $args \\ + $archive \\ + $args2 + else + tar \\ + -C $prefix \\ + -xavf \\ + $args \\ + $archive \\ + $args2 + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.toString().replaceFirst(/\.[^\.]+(.gz)?$/, "")) + """ + mkdir $prefix + touch ${prefix}/file.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/untar/meta.yml b/modules/nf-core/untar/meta.yml similarity index 84% rename from modules/nf-core/modules/untar/meta.yml rename to modules/nf-core/untar/meta.yml index d426919b..db241a6e 100644 --- a/modules/nf-core/modules/untar/meta.yml +++ b/modules/nf-core/untar/meta.yml @@ -3,6 +3,7 @@ description: Extract files. keywords: - untar - uncompress + - extract tools: - untar: description: | @@ -26,9 +27,9 @@ output: Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - untar: - type: file - description: - pattern: "*.*" + type: directory + description: Directory containing contents of archive + pattern: "*/" - versions: type: file description: File containing software versions @@ -36,3 +37,5 @@ output: authors: - "@joseespinosa" - "@drpatelh" + - "@matthdsm" + - "@jfy133" diff --git a/modules/nf-core/modules/untar/main.nf b/modules/nf-core/untarfiles/main.nf similarity index 61% rename from modules/nf-core/modules/untar/main.nf rename to modules/nf-core/untarfiles/main.nf index 29ab10a5..59111b5f 100644 --- a/modules/nf-core/modules/untar/main.nf +++ b/modules/nf-core/untarfiles/main.nf @@ -1,18 +1,18 @@ -process UNTAR { +process UNTARFILES { tag "$archive" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "conda-forge::sed=4.7" : null) + conda "conda-forge::sed=4.7 bioconda::grep=3.4 conda-forge::tar=1.34" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'ubuntu:20.04' }" + 'nf-core/ubuntu:20.04' }" input: tuple val(meta), path(archive) output: - tuple val(meta), path("$untar"), emit: untar - path "versions.yml" , emit: versions + tuple val(meta), path("${prefix}/**") , emit: files + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -20,20 +20,18 @@ process UNTAR { script: def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' - untar = archive.toString() - '.tar.gz' + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.tar$/, "")) """ - mkdir output + mkdir $prefix tar \\ - -C output --strip-components 1 \\ - -xzvf \\ + -C $prefix \\ + -xavf \\ $args \\ $archive \\ $args2 - mv output ${untar} - cat <<-END_VERSIONS > versions.yml "${task.process}": untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') @@ -41,9 +39,10 @@ process UNTAR { """ stub: - untar = archive.toString() - '.tar.gz' + prefix = task.ext.prefix ?: "${meta.id}" """ - touch $untar + mkdir $prefix + touch ${prefix}/file.txt cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/untarfiles/meta.yml b/modules/nf-core/untarfiles/meta.yml new file mode 100644 index 00000000..098490fa --- /dev/null +++ b/modules/nf-core/untarfiles/meta.yml @@ -0,0 +1,42 @@ +name: untarfiles +description: Extract files. +keywords: + - untar + - uncompress + - files +tools: + - untar: + description: | + Extract tar.gz files. + documentation: https://www.gnu.org/software/tar/manual/ + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be untar + pattern: "*.{tar}.{gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - files: + type: string + description: A list containing references to individual archive files + pattern: "*/**" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" + - "@pinin4fjords" diff --git a/nextflow.config b/nextflow.config index e7c2b71d..373904ec 100644 --- a/nextflow.config +++ b/nextflow.config @@ -17,7 +17,7 @@ params { // References genome = null - igenomes_base = 's3://ngi-igenomes/igenomes' + igenomes_base = 's3://ngi-igenomes/igenomes/' igenomes_ignore = false save_reference = false @@ -71,29 +71,29 @@ params { // MultiQC options multiqc_config = null multiqc_title = null + multiqc_logo = null max_multiqc_email_size = '25.MB' + multiqc_methods_description = null // Boilerplate options - outdir = null - tracedir = "${params.outdir}/pipeline_info" - publish_dir_mode = 'copy' - email = null - email_on_fail = null - plaintext_email = false - monochrome_logs = false - help = false - validate_params = true - show_hidden_params = false - schema_ignore_params = 'genomes' - enable_conda = false + outdir = null + publish_dir_mode = 'copy' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + hook_url = null + help = false + version = false + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' // Config options + config_profile_name = null + config_profile_description = null custom_config_version = 'master' custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_description = null config_profile_contact = null config_profile_url = null - config_profile_name = null // Max resource options // Defaults only, expecting to be overwritten @@ -101,6 +101,13 @@ params { max_cpus = 16 max_time = '240.h' + // Schema validation default options + validationFailUnrecognisedParams = false + validationLenientMode = false + validationSchemaIgnoreParams = 'genomes,igenomes_base' + validationShowHiddenParams = false + validate_params = true + } // Load base.config by default for all pipelines @@ -114,78 +121,127 @@ try { } // Load nf-core/chipseq custom profiles from different institutions. -// Warning: Uncomment only if a pipeline-specific instititutional config already exists on nf-core/configs! -// try { -// includeConfig "${params.custom_config_base}/pipeline/chipseq.config" -// } catch (Exception e) { -// System.err.println("WARNING: Could not load nf-core/config/chipseq profiles: ${params.custom_config_base}/pipeline/chipseq.config") -// } - +try { + includeConfig "${params.custom_config_base}/pipeline/chipseq.config" +} catch (Exception e) { + System.err.println("WARNING: Could not load nf-core/config/chipseq profiles: ${params.custom_config_base}/pipeline/chipseq.config") +} profiles { - debug { process.beforeScript = 'echo $HOSTNAME' } + debug { + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false + nextflow.enable.configProcessNamesValidation = true + } conda { - params.enable_conda = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + conda.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + conda.channels = ['conda-forge', 'bioconda', 'defaults'] + apptainer.enabled = false } mamba { - params.enable_conda = true - conda.useMamba = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + conda.enabled = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } docker { - docker.enabled = true - docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + docker.enabled = true + conda.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + docker.runOptions = '-u $(id -u):$(id -g)' + } + arm { + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + singularity.enabled = true + singularity.autoMounts = true + conda.enabled = false + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false + podman.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false + shifter.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false + charliecloud.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + apptainer.enabled = false + } + apptainer { + apptainer.enabled = true + apptainer.autoMounts = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + wave { + apptainer.ociAutoPull = true + singularity.ociAutoPull = true + wave.enabled = true + wave.freeze = true + wave.strategy = 'conda,container' } gitpod { - executor.name = 'local' - executor.cpus = 16 - executor.memory = 60.GB + executor.name = 'local' + executor.cpus = 4 + executor.memory = 8.GB } test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } } +// Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile +// Will not be used unless Apptainer / Docker / Podman / Singularity are enabled +// Set to your registry if you have a mirror of containers +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' + +// Nextflow plugins +plugins { + id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet +} + // Load igenomes.config if required if (!params.igenomes_ignore) { includeConfig 'conf/igenomes.config' @@ -207,32 +263,36 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] +// Disable process selector warnings by default. Use debug profile to enable warnings. +nextflow.enable.configProcessNamesValidation = false + def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true - file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" } report { enabled = true - file = "${params.tracedir}/execution_report_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" } trace { enabled = true - file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" + file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" } dag { enabled = true - file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" } manifest { name = 'nf-core/chipseq' - author = 'Espinosa-Carrasco J, Patel H, Wang C, Ewels P' + author = """Espinosa-Carrasco J, Patel H, Wang C, Ewels P""" homePage = 'https://github.com/nf-core/chipseq' - description = 'ChIP-seq peak-calling and differential analysis pipeline.' + description = """ChIP-seq peak-calling and differential analysis pipeline.""" mainScript = 'main.nf' - nextflowVersion = '!>=21.10.3' - version = '2.0.0' + nextflowVersion = '!>=23.04.0' + version = '2.1.0dev' + doi = '' } // Load modules.config for DSL2 module specific options diff --git a/nextflow_schema.json b/nextflow_schema.json index 2a5f445c..7189751b 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,14 +10,15 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["outdir"], + "required": ["input", "outdir"], "properties": { "input": { "type": "string", "format": "file-path", + "exists": true, + "schema": "assets/schema_input.json", "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", - "schema": "assets/schema_input.json", "description": "Path to comma-separated file containing information about the samples in the experiment.", "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 5 columns, and a header row. See [usage docs](https://nf-co.re/chipseq/docs/usage#introduction).", "fa_icon": "fas fa-file-csv" @@ -35,8 +36,9 @@ }, "read_length": { "type": "integer", - "description": "Read length used to calculate MACS2 genome size for peak calling if `--macs_gsize` isn't provided.", + "description": "Read length used to calculate MACS3 genome size for peak calling if `--macs_gsize` isn't provided.", "fa_icon": "fas fa-chart-area", + "help_text": "Read length together with the genome fasta are used to calculate MACS3 genome size using the `khmer` program as explained [here](https://deeptools.readthedocs.io/en/develop/content/feature/effectiveGenomeSize.html#effective-genome-size). For all the genomes present in the `igenomes.config` the genome size has been already precomputed and the read length is then used to retrieve the corresponding value", "enum": [50, 75, 100, 150, 200] }, "outdir": { @@ -74,6 +76,7 @@ "fasta": { "type": "string", "format": "file-path", + "exists": true, "mimetype": "text/plain", "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", "description": "Path to FASTA genome file.", @@ -83,6 +86,7 @@ "gtf": { "type": "string", "format": "file-path", + "exists": true, "mimetype": "text/plain", "pattern": "^\\S+\\.gtf(\\.gz)?$", "description": "Path to GTF annotation file.", @@ -92,6 +96,7 @@ "gff": { "type": "string", "format": "file-path", + "exists": true, "mimetype": "text/plain", "pattern": "^\\S+\\.gff(\\.gz)?$", "fa_icon": "fas fa-code-branch", @@ -100,30 +105,36 @@ }, "bwa_index": { "type": "string", + "format": "path", + "exists": true, "description": "Path to directory or tar.gz archive for pre-built BWA index.", "fa_icon": "fas fa-bezier-curve" }, "bowtie2_index": { "type": "string", "format": "path", + "exists": true, "fa_icon": "fas fa-bezier-curve", "description": "Path to directory or tar.gz archive for pre-built Bowtie2 index." }, "chromap_index": { "type": "string", "format": "path", + "exists": true, "fa_icon": "fas fa-bezier-curve", "description": "Path to directory or tar.gz archive for pre-built Chromap index." }, "star_index": { "type": "string", "format": "path", + "exists": true, "fa_icon": "fas fa-bezier-curve", "description": "Path to directory or tar.gz archive for pre-built STAR index." }, "gene_bed": { "type": "string", "format": "file-path", + "exists": true, "mimetype": "text/plain", "pattern": "^\\S+\\.bed(\\.gz)?$", "fa_icon": "fas fa-procedures", @@ -131,14 +142,16 @@ }, "macs_gsize": { "type": "number", - "description": "Effective genome size parameter required by MACS2.", - "help_text": "[Effective genome size](https://github.com/taoliu/MACS#-g--gsize) parameter required by MACS2. If using an iGenomes reference these have been provided when `--genome` is set as *GRCh37*, *GRCh38*, *GRCm38*, *WBcel235*, *BDGP6*, *R64-1-1*, *EF2*, *hg38*, *hg19* and *mm10*. For other genomes, if this parameter is not specified then the MACS2 peak-calling and differential analysis will be skipped.", + "description": "Effective genome size parameter required by MACS3.", + "help_text": "[Effective genome size](https://github.com/taoliu/MACS#-g--gsize) parameter required by MACS3. If using an iGenomes reference these have been provided for any of the genomes available in the igenomes.config, and for the following read lengths (50,75,100,150,200) that should be set using the `--read_length` parameter. For other genomes, if this parameter is not specified it will be inferred using the provided `--read_length` or otherwise the pipeline execution will stop with an error.", "fa_icon": "fas fa-arrows-alt-h" }, "blacklist": { "type": "string", + "format": "path", + "exists": true, "description": "Path to blacklist regions in BED format, used for filtering alignments.", - "help_text": "If provided, alignments that overlap with the regions in this file will be filtered out (see [ENCODE blacklists](https://sites.google.com/site/anshulkundaje/projects/blacklists)). The file should be in BED format. Blacklisted regions for *GRCh37*, *GRCh38*, *GRCm38*, *hg19*, *hg38*, *mm10* are bundled with the pipeline in the [`blacklists`](../assets/blacklists/) directory, and as such will be automatically used if any of those genomes are specified with the `--genome` parameter.", + "help_text": "If provided, alignments that overlap with the regions in this file will be filtered out (see [ENCODE blacklists](https://sites.google.com/site/anshulkundaje/projects/blacklists)). The file should be in BED format. Blacklisted regions for *GRCh37*, *GRCh38*, *GRCm38*, *hg19*, *hg38*, *mm10* are bundled with the pipeline in the [`/assets/blacklists`](https://github.com/nf-core/chipseq/tree/master/assets/blacklists) directory, and as such will be automatically used if any of those genomes are specified with the `--genome` parameter.", "fa_icon": "fas fa-book-dead" }, "save_reference": { @@ -151,7 +164,7 @@ "type": "string", "format": "directory-path", "description": "Directory / URL base for iGenomes references.", - "default": "s3://ngi-igenomes/igenomes", + "default": "s3://ngi-igenomes/igenomes/", "fa_icon": "fas fa-cloud-download-alt", "hidden": true }, @@ -162,7 +175,8 @@ "hidden": true, "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." } - } + }, + "required": ["fasta"] }, "adapter_trimming_options": { "title": "Adapter trimming options", @@ -247,11 +261,13 @@ "save_unaligned": { "type": "boolean", "fa_icon": "fas fa-save", - "description": "Where possible, save unaligned reads from either STAR, HISAT2 or Salmon to the results directory.", + "description": "Save unaligned sequences to the output directory (only available for Bowtie 2 and STAR.", "help_text": "This may either be in the form of FastQ or BAM files depending on the options available for that particular tool." }, "bamtools_filter_pe_config": { "type": "string", + "format": "path", + "exists": true, "default": "$projectDir/assets/bamtools_filter_pe.json", "hidden": true, "description": "BAMTools JSON file with custom filters for paired-end data.", @@ -259,6 +275,8 @@ }, "bamtools_filter_se_config": { "type": "string", + "format": "path", + "exists": true, "default": "$projectDir/assets/bamtools_filter_se.json", "hidden": true, "description": "BAMTools JSON file with custom filters for single-end data.", @@ -274,14 +292,14 @@ "properties": { "narrow_peak": { "type": "boolean", - "description": "Run MACS2 in narrowPeak mode.", - "help_text": "MACS2 is run by default with the [`--broad`](https://github.com/taoliu/MACS#--broad) flag. Specify this flag to call peaks in narrowPeak mode.", + "description": "Run MACS3 in narrowPeak mode.", + "help_text": "MACS3 is run by default with the [`--broad`](https://github.com/taoliu/MACS#--broad) flag. Specify this flag to call peaks in narrowPeak mode.", "fa_icon": "fas fa-arrows-alt-h" }, "broad_cutoff": { "type": "number", "default": 0.1, - "description": "Specifies broad cutoff value for MACS2. Only used when --narrow_peak isnt specified.", + "description": "Specifies broad cutoff value for MACS3. Only used when --narrow_peak isnt specified.", "fa_icon": "fas fa-hand-scissors" }, "macs_fdr": { @@ -303,18 +321,18 @@ }, "save_macs_pileup": { "type": "boolean", - "description": "Instruct MACS2 to create bedGraph files normalised to signal per million reads.", + "description": "Instruct MACS3 to create bedGraph files normalised to signal per million reads.", "fa_icon": "fas fa-save" }, "skip_peak_qc": { "type": "boolean", "fa_icon": "fas fa-fast-forward", - "description": "Skip MACS2 peak QC plot generation." + "description": "Skip MACS3 peak QC plot generation." }, "skip_peak_annotation": { "type": "boolean", "fa_icon": "fas fa-fast-forward", - "description": "Skip annotation of MACS2 and consensus peaks with HOMER." + "description": "Skip annotation of MACS3 and consensus peaks with HOMER." }, "skip_consensus_peaks": { "type": "boolean", @@ -465,7 +483,7 @@ "description": "Maximum amount of time that can be requested for any single job.", "default": "240.h", "fa_icon": "far fa-clock", - "pattern": "^(\\d+\\.?\\s*(s|m|h|day)\\s*)+$", + "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", "hidden": true, "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" } @@ -484,6 +502,12 @@ "fa_icon": "fas fa-question-circle", "hidden": true }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, "publish_dir_mode": { "type": "string", "default": "copy", @@ -528,19 +552,38 @@ "fa_icon": "fas fa-palette", "hidden": true }, + "hook_url": { + "type": "string", + "description": "Incoming hook URL for messaging service", + "fa_icon": "fas fa-people-group", + "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", + "hidden": true + }, "multiqc_config": { "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", "description": "Custom config file to supply to MultiQC.", "fa_icon": "fas fa-cog", "hidden": true }, - "tracedir": { + "multiqc_logo": { "type": "string", - "description": "Directory to keep pipeline Nextflow logs and reports.", - "default": "${params.outdir}/pipeline_info", - "fa_icon": "fas fa-cogs", + "format": "file-path", + "exists": true, + "description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file", + "fa_icon": "fas fa-image", "hidden": true }, + "multiqc_methods_description": { + "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", + "description": "Custom MultiQC yaml file containing HTML including a methods description.", + "fa_icon": "fas fa-cog" + }, "validate_params": { "type": "boolean", "description": "Boolean whether to validate parameters against the schema at runtime", @@ -548,18 +591,33 @@ "fa_icon": "fas fa-check-square", "hidden": true }, - "show_hidden_params": { + "validationShowHiddenParams": { "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." }, - "enable_conda": { + "validationFailUnrecognisedParams": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters fails when an unrecognised parameter is found.", + "hidden": true, + "help_text": "By default, when an unrecognised parameter is found, it returns a warning." + }, + "validationLenientMode": { "type": "boolean", - "description": "Run this workflow with Conda. You can also use '-profile conda' instead of providing this parameter.", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters in lenient more.", "hidden": true, - "fa_icon": "fas fa-bacon" + "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." + }, + "pipelines_testdata_base_path": { + "type": "string", + "fa_icon": "far fa-check-circle", + "description": "Base URL or local path to location of pipeline test dataset files", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/", + "hidden": true } } } diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index 0d62beb6..00000000 --- a/pyproject.toml +++ /dev/null @@ -1,10 +0,0 @@ -# Config file for Python. Mostly used to configure linting of bin/check_samplesheet.py with Black. -# Should be kept the same as nf-core/tools to avoid fighting with template synchronisation. -[tool.black] -line-length = 120 -target_version = ["py37", "py38", "py39", "py310"] - -[tool.isort] -profile = "black" -known_first_party = ["nf_core"] -multi_line_output = 3 diff --git a/subworkflows/local/align_star.nf b/subworkflows/local/align_star.nf new file mode 100644 index 00000000..843f303e --- /dev/null +++ b/subworkflows/local/align_star.nf @@ -0,0 +1,48 @@ +/* + * Map reads, sort, index BAM file and run samtools stats, flagstat and idxstats + */ + +include { STAR_ALIGN } from '../../modules/local/star_align' +include { BAM_SORT_STATS_SAMTOOLS } from '../nf-core/bam_sort_stats_samtools/main' + +workflow ALIGN_STAR { + take: + ch_reads // channel: [ val(meta), [ reads ] ] + ch_index // channel: /path/to/star/index/ + ch_fasta // channel: /path/to/fasta + seq_center // string: sequencing center + + main: + + ch_versions = Channel.empty() + + // + // Map reads with STAR + // + STAR_ALIGN ( ch_reads, ch_index, seq_center ) + ch_versions = ch_versions.mix(STAR_ALIGN.out.versions.first()) + + // + // Sort, index BAM file and run samtools stats, flagstat and idxstats + // + BAM_SORT_STATS_SAMTOOLS ( STAR_ALIGN.out.bam, ch_fasta ) + ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) + + emit: + orig_bam = STAR_ALIGN.out.bam // channel: [ val(meta), bam ] + log_final = STAR_ALIGN.out.log_final // channel: [ val(meta), log_final ] + log_out = STAR_ALIGN.out.log_out // channel: [ val(meta), log_out ] + log_progress = STAR_ALIGN.out.log_progress // channel: [ val(meta), log_progress ] + bam_sorted = STAR_ALIGN.out.bam_sorted // channel: [ val(meta), bam_sorted ] + bam_transcript = STAR_ALIGN.out.bam_transcript // channel: [ val(meta), bam_transcript ] + fastq = STAR_ALIGN.out.fastq // channel: [ val(meta), fastq ] + tab = STAR_ALIGN.out.tab // channel: [ val(meta), tab ] + + bam = BAM_SORT_STATS_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] + bai = BAM_SORT_STATS_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] + stats = BAM_SORT_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_SORT_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/bam_bedgraph_bigwig_bedtools_ucsc.nf b/subworkflows/local/bam_bedgraph_bigwig_bedtools_ucsc.nf new file mode 100644 index 00000000..271c3aa3 --- /dev/null +++ b/subworkflows/local/bam_bedgraph_bigwig_bedtools_ucsc.nf @@ -0,0 +1,41 @@ +// +// Convert BAM to normalised bigWig via bedGraph using BEDTools and UCSC +// + +include { BEDTOOLS_GENOMECOV } from '../../modules/local/bedtools_genomecov' +include { UCSC_BEDGRAPHTOBIGWIG } from '../../modules/nf-core/ucsc/bedgraphtobigwig/main' + +workflow BAM_BEDGRAPH_BIGWIG_BEDTOOLS_UCSC { + take: + ch_bam_flagstat // channel: [ val(meta), [bam], [flagstat] ] + ch_chrom_sizes // channel: [ bed ] + + main: + + ch_versions = Channel.empty() + + // + // Create bedGraph coverage track + // + BEDTOOLS_GENOMECOV ( + ch_bam_flagstat + ) + ch_versions = ch_versions.mix(BEDTOOLS_GENOMECOV.out.versions.first()) + + // + // Create bigWig coverage tracks + // + UCSC_BEDGRAPHTOBIGWIG ( + BEDTOOLS_GENOMECOV.out.bedgraph, + ch_chrom_sizes + ) + ch_versions = ch_versions.mix(UCSC_BEDGRAPHTOBIGWIG.out.versions.first()) + + emit: + bedgraph = BEDTOOLS_GENOMECOV.out.bedgraph // channel: [ val(meta), [ bedgraph ] ] + scale_factor = BEDTOOLS_GENOMECOV.out.scale_factor // channel: [ val(meta), [ txt ] ] + + bigwig = UCSC_BEDGRAPHTOBIGWIG.out.bigwig // channel: [ val(meta), [ bigwig ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/bam_filter_bamtools.nf b/subworkflows/local/bam_filter_bamtools.nf new file mode 100644 index 00000000..a09876b2 --- /dev/null +++ b/subworkflows/local/bam_filter_bamtools.nf @@ -0,0 +1,94 @@ +include { SAMTOOLS_SORT } from '../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' +include { BAM_SORT_STATS_SAMTOOLS } from '../nf-core/bam_sort_stats_samtools/main' +include { BAM_STATS_SAMTOOLS } from '../nf-core/bam_stats_samtools/main' + +include { BAMTOOLS_FILTER } from '../../modules/local/bamtools_filter' +include { BAM_REMOVE_ORPHANS } from '../../modules/local/bam_remove_orphans' + +workflow BAM_FILTER_BAMTOOLS { + take: + ch_bam_bai // channel: [ val(meta), [ bam ], [bai] ] + ch_bed // channel: [ bed ] + ch_fasta // channel: [ fasta ] + ch_bamtools_filter_se_config // channel: [ config_file ] + ch_bamtools_filter_pe_config // channel: [ config_file ] + + main: + + ch_versions = Channel.empty() + + // + // Filter BAM file with BAMTools + // + BAMTOOLS_FILTER ( + ch_bam_bai, + ch_bed, + ch_bamtools_filter_se_config, + ch_bamtools_filter_pe_config + ) + ch_versions = ch_versions.mix(BAMTOOLS_FILTER.out.versions.first()) + + BAMTOOLS_FILTER + .out + .bam + .branch { + meta, bam -> + single_end: meta.single_end + return [ meta, bam ] + paired_end: !meta.single_end + return [ meta, bam ] + } + .set { ch_bam } + + // + // Index SE BAM file + // + SAMTOOLS_INDEX { + ch_bam.single_end + } + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + + // + // Run samtools stats, flagstat and idxstats on SE BAM + // + BAM_STATS_SAMTOOLS ( + ch_bam.single_end.join(SAMTOOLS_INDEX.out.bai), + ch_fasta + ) + ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions.first()) + + // + // Name sort PE BAM before filtering with pysam + // + SAMTOOLS_SORT ( + ch_bam.paired_end + ) + ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first()) + + // + // Remove orphan reads from PE BAM file + // + BAM_REMOVE_ORPHANS ( + SAMTOOLS_SORT.out.bam + ) + ch_versions = ch_versions.mix(BAM_REMOVE_ORPHANS.out.versions.first()) + + // + // Sort, index PE BAM file and run samtools stats, flagstat and idxstats + // + BAM_SORT_STATS_SAMTOOLS ( + BAM_REMOVE_ORPHANS.out.bam, + ch_fasta + ) + ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions.first()) + + emit: + name_bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ] + bam = BAM_SORT_STATS_SAMTOOLS.out.bam.mix(ch_bam.single_end) // channel: [ val(meta), [ bam ] ] + bai = BAM_SORT_STATS_SAMTOOLS.out.bai.mix(SAMTOOLS_INDEX.out.bai) // channel: [ val(meta), [ bai ] ] + stats = BAM_SORT_STATS_SAMTOOLS.out.stats.mix(BAM_STATS_SAMTOOLS.out.stats) // channel: [ val(meta), [ stats ] ] + flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat.mix(BAM_STATS_SAMTOOLS.out.flagstat) // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_SORT_STATS_SAMTOOLS.out.idxstats.mix(BAM_STATS_SAMTOOLS.out.idxstats) // channel: [ val(meta), [ idxstats ] ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/bam_peaks_call_qc_annotate_macs3_homer.nf b/subworkflows/local/bam_peaks_call_qc_annotate_macs3_homer.nf new file mode 100644 index 00000000..69ecb99f --- /dev/null +++ b/subworkflows/local/bam_peaks_call_qc_annotate_macs3_homer.nf @@ -0,0 +1,155 @@ +// +// Call peaks with MACS3, annotate with HOMER and perform downstream QC +// + +include { MACS3_CALLPEAK } from '../../modules/nf-core/macs3/callpeak/main' +include { HOMER_ANNOTATEPEAKS } from '../../modules/nf-core/homer/annotatepeaks/main' + +include { FRIP_SCORE } from '../../modules/local/frip_score' +include { MULTIQC_CUSTOM_PEAKS } from '../../modules/local/multiqc_custom_peaks' +include { PLOT_MACS3_QC } from '../../modules/local/plot_macs3_qc' +include { PLOT_HOMER_ANNOTATEPEAKS } from '../../modules/local/plot_homer_annotatepeaks' + +workflow BAM_PEAKS_CALL_QC_ANNOTATE_MACS3_HOMER { + take: + ch_bam // channel: [ val(meta), [ ip_bam ], [ control_bam ] ] + ch_fasta // channel: [ fasta ] + ch_gtf // channel: [ gtf ] + macs_gsize // integer: value for --macs_gsize parameter + annotate_peaks_suffix // string: suffix for input HOMER annotate peaks files to be trimmed off + ch_peak_count_header_multiqc // channel: [ header_file ] + ch_frip_score_multiqc // channel: [ header_file ] + ch_peak_annotation_header_multiqc // channel: [ header_file ] + is_narrow_peak // boolean: true/false + skip_peak_annotation // boolean: true/false + skip_peak_qc // boolean: true/false + + main: + + ch_versions = Channel.empty() + + // + // Call peaks with MACS3 + // + MACS3_CALLPEAK ( + ch_bam, + macs_gsize + ) + ch_versions = ch_versions.mix(MACS3_CALLPEAK.out.versions.first()) + + // + // Filter out samples with 0 MACS3 peaks called + // + MACS3_CALLPEAK + .out + .peak + .filter { + meta, peaks -> + peaks.size() > 0 + } + .set { ch_macs3_peaks } + + // Create channels: [ meta, ip_bam, peaks ] + ch_bam + .join(ch_macs3_peaks, by: [0]) + .map { + meta, ip_bam, control_bam, peaks -> + [ meta, ip_bam, peaks ] + } + .set { ch_bam_peaks } + + // + // Calculate FRiP score + // + FRIP_SCORE ( + ch_bam_peaks + ) + ch_versions = ch_versions.mix(FRIP_SCORE.out.versions.first()) + + // Create channels: [ meta, peaks, frip ] + ch_bam_peaks + .join(FRIP_SCORE.out.txt, by: [0]) + .map { + meta, ip_bam, peaks, frip -> + [ meta, peaks, frip ] + } + .set { ch_bam_peak_frip } + + // + // FRiP score custom content for MultiQC + // + MULTIQC_CUSTOM_PEAKS ( + ch_bam_peak_frip, + ch_peak_count_header_multiqc, + ch_frip_score_multiqc + ) + ch_versions = ch_versions.mix(MULTIQC_CUSTOM_PEAKS.out.versions.first()) + + ch_homer_annotatepeaks = Channel.empty() + ch_plot_macs3_qc_txt = Channel.empty() + ch_plot_macs3_qc_pdf = Channel.empty() + ch_plot_homer_annotatepeaks_txt = Channel.empty() + ch_plot_homer_annotatepeaks_pdf = Channel.empty() + ch_plot_homer_annotatepeaks_tsv = Channel.empty() + if (!skip_peak_annotation) { + // + // Annotate peaks with HOMER + // + HOMER_ANNOTATEPEAKS ( + ch_macs3_peaks, + ch_fasta, + ch_gtf + ) + ch_homer_annotatepeaks = HOMER_ANNOTATEPEAKS.out.txt + ch_versions = ch_versions.mix(HOMER_ANNOTATEPEAKS.out.versions.first()) + + if (!skip_peak_qc) { + // + // MACS3 QC plots with R + // + PLOT_MACS3_QC ( + ch_macs3_peaks.collect{it[1]}, + is_narrow_peak + ) + ch_plot_macs3_qc_txt = PLOT_MACS3_QC.out.txt + ch_plot_macs3_qc_pdf = PLOT_MACS3_QC.out.pdf + ch_versions = ch_versions.mix(PLOT_MACS3_QC.out.versions) + + // + // Peak annotation QC plots with R + // + PLOT_HOMER_ANNOTATEPEAKS ( + HOMER_ANNOTATEPEAKS.out.txt.collect{it[1]}, + ch_peak_annotation_header_multiqc, + annotate_peaks_suffix + ) + ch_plot_homer_annotatepeaks_txt = PLOT_HOMER_ANNOTATEPEAKS.out.txt + ch_plot_homer_annotatepeaks_pdf = PLOT_HOMER_ANNOTATEPEAKS.out.pdf + ch_plot_homer_annotatepeaks_tsv = PLOT_HOMER_ANNOTATEPEAKS.out.tsv + ch_versions = ch_versions.mix(PLOT_HOMER_ANNOTATEPEAKS.out.versions) + } + } + + emit: + peaks = ch_macs3_peaks // channel: [ val(meta), [ peaks ] ] + xls = MACS3_CALLPEAK.out.xls // channel: [ val(meta), [ xls ] ] + gapped_peaks = MACS3_CALLPEAK.out.gapped // channel: [ val(meta), [ gapped_peak ] ] + bed = MACS3_CALLPEAK.out.bed // channel: [ val(meta), [ bed ] ] + bedgraph = MACS3_CALLPEAK.out.bdg // channel: [ val(meta), [ bedgraph ] ] + + frip_txt = FRIP_SCORE.out.txt // channel: [ val(meta), [ txt ] ] + + frip_multiqc = MULTIQC_CUSTOM_PEAKS.out.frip // channel: [ val(meta), [ frip ] ] + peak_count_multiqc = MULTIQC_CUSTOM_PEAKS.out.count // channel: [ val(meta), [ counts ] ] + + homer_annotatepeaks = ch_homer_annotatepeaks // channel: [ val(meta), [ txt ] ] + + plot_macs3_qc_txt = ch_plot_macs3_qc_txt // channel: [ txt ] + plot_macs3_qc_pdf = ch_plot_macs3_qc_pdf // channel: [ pdf ] + + plot_homer_annotatepeaks_txt = ch_plot_homer_annotatepeaks_txt // channel: [ txt ] + plot_homer_annotatepeaks_pdf = ch_plot_homer_annotatepeaks_pdf // channel: [ pdf ] + plot_homer_annotatepeaks_tsv = ch_plot_homer_annotatepeaks_tsv // channel: [ tsv ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/bed_consensus_quantify_qc_bedtools_featurecounts_deseq2.nf b/subworkflows/local/bed_consensus_quantify_qc_bedtools_featurecounts_deseq2.nf new file mode 100644 index 00000000..294a0eec --- /dev/null +++ b/subworkflows/local/bed_consensus_quantify_qc_bedtools_featurecounts_deseq2.nf @@ -0,0 +1,158 @@ +// +// Call consensus peaks with BEDTools and custom scripts, annotate with HOMER, quantify with featureCounts and QC with DESeq2 +// + +include { HOMER_ANNOTATEPEAKS } from '../../modules/nf-core/homer/annotatepeaks/main' +include { SUBREAD_FEATURECOUNTS } from '../../modules/nf-core/subread/featurecounts/main' + +include { MACS3_CONSENSUS } from '../../modules/local/macs3_consensus' +include { ANNOTATE_BOOLEAN_PEAKS } from '../../modules/local/annotate_boolean_peaks' +include { DESEQ2_QC } from '../../modules/local/deseq2_qc' + +workflow BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2 { + take: + ch_peaks // channel: [ val(meta), [ peaks ] ] + ch_bams // channel: [ val(meta), [ ip_bams ] ] + ch_fasta // channel: [ fasta ] + ch_gtf // channel: [ gtf ] + ch_deseq2_pca_header_multiqc // channel: [ header_file ] + ch_deseq2_clustering_header_multiqc // channel: [ header_file ] + is_narrow_peak // boolean: true/false + skip_peak_annotation // boolean: true/false + skip_deseq2_qc // boolean: true/false + + main: + + ch_versions = Channel.empty() + + // Create channels: [ meta , [ peaks ] ] + // Where meta = [ id:antibody, multiple_groups:true/false, replicates_exist:true/false ] + ch_peaks + .map { + meta, peak -> + [ meta.antibody, meta.id - ~/_T\d+$/, peak ] + } + .groupTuple() + .map { + antibody, groups, peaks -> + [ + antibody, + groups.groupBy().collectEntries { [(it.key) : it.value.size()] }, + peaks + ] + } + .map { + antibody, groups, peaks -> + def meta_new = [:] + meta_new.id = antibody + meta_new.multiple_groups = groups.size() > 1 + meta_new.replicates_exist = groups.max { groups.value }.value > 1 + [ meta_new, peaks ] + } + .set { ch_antibody_peaks } + + // + // Generate consensus peaks across samples + // + MACS3_CONSENSUS ( + ch_antibody_peaks, + is_narrow_peak + ) + ch_versions = ch_versions.mix(MACS3_CONSENSUS.out.versions) + + // + // Annotate consensus peaks + // + if (!skip_peak_annotation) { + HOMER_ANNOTATEPEAKS ( + MACS3_CONSENSUS.out.bed, + ch_fasta, + ch_gtf + ) + ch_versions = ch_versions.mix(HOMER_ANNOTATEPEAKS.out.versions) + + // + // MODULE: Add boolean fields to annotated consensus peaks to aid filtering + // + ANNOTATE_BOOLEAN_PEAKS ( + MACS3_CONSENSUS.out.boolean_txt.join(HOMER_ANNOTATEPEAKS.out.txt, by: [0]), + ) + ch_versions = ch_versions.mix(ANNOTATE_BOOLEAN_PEAKS.out.versions) + } + + // Create channels: [ meta, [ ip_bams ], saf ] + MACS3_CONSENSUS + .out + .saf + .map { + meta, saf -> + [ meta.id, meta, saf ] + } + .join(ch_bams) + .map { + antibody, meta, saf, bams -> + [ meta, bams.flatten().sort(), saf ] + } + .set { ch_bam_saf } + + // + // Quantify peaks across samples with featureCounts + // + SUBREAD_FEATURECOUNTS ( + ch_bam_saf + ) + ch_versions = ch_versions.mix(SUBREAD_FEATURECOUNTS.out.versions) + + // + // Generate QC plots with DESeq2 + // + ch_deseq2_qc_pdf = Channel.empty() + ch_deseq2_qc_rdata = Channel.empty() + ch_deseq2_qc_rds = Channel.empty() + ch_deseq2_qc_pca_txt = Channel.empty() + ch_deseq2_qc_pca_multiqc = Channel.empty() + ch_deseq2_qc_dists_txt = Channel.empty() + ch_deseq2_qc_dists_multiqc = Channel.empty() + ch_deseq2_qc_log = Channel.empty() + ch_deseq2_qc_size_factors = Channel.empty() + if (!skip_deseq2_qc) { + DESEQ2_QC ( + SUBREAD_FEATURECOUNTS.out.counts, + ch_deseq2_pca_header_multiqc, + ch_deseq2_clustering_header_multiqc + ) + ch_deseq2_qc_pdf = DESEQ2_QC.out.pdf + ch_deseq2_qc_rdata = DESEQ2_QC.out.rdata + ch_deseq2_qc_rds = DESEQ2_QC.out.rds + ch_deseq2_qc_pca_txt = DESEQ2_QC.out.pca_txt + ch_deseq2_qc_pca_multiqc = DESEQ2_QC.out.pca_multiqc + ch_deseq2_qc_dists_txt = DESEQ2_QC.out.dists_txt + ch_deseq2_qc_dists_multiqc = DESEQ2_QC.out.dists_multiqc + ch_deseq2_qc_log = DESEQ2_QC.out.log + ch_deseq2_qc_size_factors = DESEQ2_QC.out.size_factors + ch_versions = ch_versions.mix(DESEQ2_QC.out.versions) + } + + emit: + consensus_bed = MACS3_CONSENSUS.out.bed // channel: [ bed ] + consensus_saf = MACS3_CONSENSUS.out.saf // channel: [ saf ] + consensus_pdf = MACS3_CONSENSUS.out.pdf // channel: [ pdf ] + consensus_txt = MACS3_CONSENSUS.out.txt // channel: [ pdf ] + consensus_boolean_txt = MACS3_CONSENSUS.out.boolean_txt // channel: [ txt ] + consensus_intersect_txt = MACS3_CONSENSUS.out.intersect_txt // channel: [ txt ] + + featurecounts_txt = SUBREAD_FEATURECOUNTS.out.counts // channel: [ txt ] + featurecounts_summary = SUBREAD_FEATURECOUNTS.out.summary // channel: [ txt ] + + deseq2_qc_pdf = ch_deseq2_qc_pdf // channel: [ pdf ] + deseq2_qc_rdata = ch_deseq2_qc_rdata // channel: [ rdata ] + deseq2_qc_rds = ch_deseq2_qc_rds // channel: [ rds ] + deseq2_qc_pca_txt = ch_deseq2_qc_pca_txt // channel: [ txt ] + deseq2_qc_pca_multiqc = ch_deseq2_qc_pca_multiqc // channel: [ txt ] + deseq2_qc_dists_txt = ch_deseq2_qc_dists_txt // channel: [ txt ] + deseq2_qc_dists_multiqc = ch_deseq2_qc_dists_multiqc // channel: [ txt ] + deseq2_qc_log = ch_deseq2_qc_log // channel: [ txt ] + deseq2_qc_size_factors = ch_deseq2_qc_size_factors // channel: [ txt ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/filter_bam_bamtools.nf b/subworkflows/local/filter_bam_bamtools.nf deleted file mode 100644 index 40e9b1be..00000000 --- a/subworkflows/local/filter_bam_bamtools.nf +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Filter BAM file - */ - -include { BAM_FILTER } from '../../modules/local/bam_filter' -include { BAM_REMOVE_ORPHANS } from '../../modules/local/bam_remove_orphans' -include { BAM_SORT_SAMTOOLS } from '../nf-core/bam_sort_samtools' - -workflow FILTER_BAM_BAMTOOLS { - take: - ch_bam_bai // channel: [ val(meta), [ bam ], [bai] ] - ch_bed // channel: [ bed ] - bamtools_filter_se_config // file: BAMtools filter JSON config file for SE data - bamtools_filter_pe_config // file: BAMtools filter JSON config file for PE data - - main: - ch_versions = Channel.empty() - - BAM_FILTER(ch_bam_bai, ch_bed, bamtools_filter_se_config, bamtools_filter_pe_config) - BAM_REMOVE_ORPHANS(BAM_FILTER.out.bam) - BAM_SORT_SAMTOOLS(BAM_REMOVE_ORPHANS.out.bam) - - ch_versions = ch_versions.mix(BAM_FILTER.out.versions, - BAM_REMOVE_ORPHANS.out.versions, - BAM_SORT_SAMTOOLS.out.versions) - - emit: - name_bam = BAM_REMOVE_ORPHANS.out.bam // channel: [ val(meta), [ bam ] ] - bam = BAM_SORT_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] - bai = BAM_SORT_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] - stats = BAM_SORT_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = BAM_SORT_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = BAM_SORT_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 648a2971..40e20bd1 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -29,9 +29,9 @@ def create_fastq_channel(LinkedHashMap row, String seq_center) { meta.antibody = row.antibody meta.control = row.control - def read_group = "\'@RG\\tID:${meta.id}\\tSM:${meta.id.split('_')[0..-2].join('_')}\\tPL:ILLUMINA\\tLB:${meta.id}\\tPU:1\'" + def read_group = "\'@RG\\tID:${meta.id}\\tSM:${meta.id - ~/_T\d+$/}\\tPL:ILLUMINA\\tLB:${meta.id}\\tPU:1\'" if (seq_center) { - read_group = "\'@RG\\tID:${meta.id}\\tSM:${meta.id.split('_')[0..-2].join('_')}\\tPL:ILLUMINA\\tLB:${meta.id}\\tPU:1\\tCN:${seq_center}\'" + read_group = "\'@RG\\tID:${meta.id}\\tSM:${meta.id - ~/_T\d+$/}\\tPL:ILLUMINA\\tLB:${meta.id}\\tPU:1\\tCN:${seq_center}\'" } meta.read_group = read_group diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf index 9b6422c6..0d91f377 100644 --- a/subworkflows/local/prepare_genome.nf +++ b/subworkflows/local/prepare_genome.nf @@ -7,19 +7,19 @@ include { GUNZIP as GUNZIP_GTF GUNZIP as GUNZIP_GFF GUNZIP as GUNZIP_GENE_BED - GUNZIP as GUNZIP_BLACKLIST } from '../../modules/nf-core/modules/gunzip/main' + GUNZIP as GUNZIP_BLACKLIST } from '../../modules/nf-core/gunzip/main' include { UNTAR as UNTAR_BWA_INDEX UNTAR as UNTAR_BOWTIE2_INDEX - UNTAR as UNTAR_CHROMAP_INDEX - UNTAR as UNTAR_STAR_INDEX } from '../../modules/nf-core/modules/untar/main' + UNTAR as UNTAR_STAR_INDEX } from '../../modules/nf-core/untar/main' -include { GFFREAD } from '../../modules/nf-core/modules/gffread/main' -include { CUSTOM_GETCHROMSIZES } from '../../modules/nf-core/modules/custom/getchromsizes/main' -include { BWA_INDEX } from '../../modules/nf-core/modules/bwa/index/main' -include { BOWTIE2_BUILD } from '../../modules/nf-core/modules/bowtie2/build/main' -include { CHROMAP_INDEX } from '../../modules/nf-core/modules/chromap/index/main' +include { UNTARFILES } from '../../modules/nf-core/untarfiles/main' +include { GFFREAD } from '../../modules/nf-core/gffread/main' +include { CUSTOM_GETCHROMSIZES } from '../../modules/nf-core/custom/getchromsizes/main' +include { BWA_INDEX } from '../../modules/nf-core/bwa/index/main' +include { BOWTIE2_BUILD } from '../../modules/nf-core/bowtie2/build/main' +include { CHROMAP_INDEX } from '../../modules/nf-core/chromap/index/main' include { GTF2BED } from '../../modules/local/gtf2bed' include { GENOME_BLACKLIST_REGIONS } from '../../modules/local/genome_blacklist_regions' @@ -27,7 +27,18 @@ include { STAR_GENOMEGENERATE } from '../../modules/local/star_genomegenera workflow PREPARE_GENOME { take: + genome // string: genome name + genomes // map: genome attributes prepare_tool_index // string : tool to prepare index for + fasta // path: path to genome fasta file + gtf // file: /path/to/genome.gtf + gff // file: /path/to/genome.gff + blacklist // file: /path/to/blacklist.bed + gene_bed // file: /path/to/gene.bed + bwa_index // file: /path/to/bwa/index/ + bowtie2_index // file: /path/to/bowtie2/index/ + chromap_index // file: /path/to/chromap/index/ + star_index // file: /path/to/star/index/ main: @@ -37,35 +48,29 @@ workflow PREPARE_GENOME { // Uncompress genome fasta file if required // ch_fasta = Channel.empty() - if (params.fasta.endsWith('.gz')) { - ch_fasta = GUNZIP_FASTA ( [ [:], params.fasta ] ).gunzip.map{ it[1] } + if (fasta.endsWith('.gz')) { + ch_fasta = GUNZIP_FASTA ( [ [:], fasta ] ).gunzip.map{ it[1] } ch_versions = ch_versions.mix(GUNZIP_FASTA.out.versions) } else { - ch_fasta = file(params.fasta) - } - - // Make fasta file available if reference saved or IGV is run - if (params.save_reference || !params.skip_igv) { - file("${params.outdir}/genome/").mkdirs() - ch_fasta.copyTo("${params.outdir}/genome/") + ch_fasta = Channel.value(file(fasta)) } // // Uncompress GTF annotation file or create from GFF3 if required // - if (params.gtf) { - if (params.gtf.endsWith('.gz')) { - ch_gtf = GUNZIP_GTF ( [ [:], params.gtf ] ).gunzip.map{ it[1] } + if (gtf) { + if (gtf.endsWith('.gz')) { + ch_gtf = GUNZIP_GTF ( [ [:], gtf ] ).gunzip.map{ it[1] } ch_versions = ch_versions.mix(GUNZIP_GTF.out.versions) } else { - ch_gtf = file(params.gtf) + ch_gtf = Channel.value(file(gtf)) } - } else if (params.gff) { - if (params.gff.endsWith('.gz')) { - ch_gff = GUNZIP_GFF ( [ [:], params.gff ] ).gunzip.map{ it[1] } + } else if (gff) { + if (gff.endsWith('.gz')) { + ch_gff = GUNZIP_GFF ( [ [:], gff ] ).gunzip.map{ it[1] } ch_versions = ch_versions.mix(GUNZIP_GFF.out.versions) } else { - ch_gff = file(params.gff) + ch_gff = Channel.value(file(gff)) } ch_gtf = GFFREAD ( ch_gff ).gtf ch_versions = ch_versions.mix(GFFREAD.out.versions) @@ -75,12 +80,12 @@ workflow PREPARE_GENOME { // Uncompress blacklist file if required // ch_blacklist = Channel.empty() - if (params.blacklist) { - if (params.blacklist.endsWith('.gz')) { - ch_blacklist = GUNZIP_BLACKLIST ( [ [:], params.blacklist ] ).gunzip.map{ it[1] } + if (blacklist) { + if (blacklist.endsWith('.gz')) { + ch_blacklist = GUNZIP_BLACKLIST ( [ [:], blacklist ] ).gunzip.map{ it[1] } ch_versions = ch_versions.mix(GUNZIP_BLACKLIST.out.versions) } else { - ch_blacklist = Channel.fromPath(file(params.blacklist)) + ch_blacklist = Channel.value(file(blacklist)) } } @@ -91,10 +96,10 @@ workflow PREPARE_GENOME { // If --gtf is supplied along with --genome // Make gene bed from supplied --gtf instead of using iGenomes one automatically def make_bed = false - if (!params.gene_bed) { + if (!gene_bed) { make_bed = true - } else if (params.genome && params.gtf) { - if (params.genomes[ params.genome ].gtf != params.gtf) { + } else if (genome && gtf) { + if (genomes[ genome ].gtf != gtf) { make_bed = true } } @@ -103,18 +108,20 @@ workflow PREPARE_GENOME { ch_gene_bed = GTF2BED ( ch_gtf ).bed ch_versions = ch_versions.mix(GTF2BED.out.versions) } else { - if (params.gene_bed.endsWith('.gz')) { - ch_gene_bed = GUNZIP_GENE_BED ( [ [:], params.gene_bed ] ).gunzip.map{ it[1] } + if (gene_bed.endsWith('.gz')) { + ch_gene_bed = GUNZIP_GENE_BED ( [ [:], gene_bed ] ).gunzip.map{ it[1] } ch_versions = ch_versions.mix(GUNZIP_GENE_BED.out.versions) } else { - ch_gene_bed = file(params.gene_bed) + ch_gene_bed = Channel.value(file(gene_bed)) } } // // Create chromosome sizes file // - ch_chrom_sizes = CUSTOM_GETCHROMSIZES ( ch_fasta ).sizes + CUSTOM_GETCHROMSIZES ( ch_fasta.map { [ [:], it ] } ) + ch_chrom_sizes = CUSTOM_GETCHROMSIZES.out.sizes.map { it[1] } + ch_fai = CUSTOM_GETCHROMSIZES.out.fai.map{ it[1] } ch_versions = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions) // @@ -123,27 +130,26 @@ workflow PREPARE_GENOME { ch_genome_filtered_bed = Channel.empty() GENOME_BLACKLIST_REGIONS ( - CUSTOM_GETCHROMSIZES.out.sizes, + ch_chrom_sizes, ch_blacklist.ifEmpty([]) ) ch_genome_filtered_bed = GENOME_BLACKLIST_REGIONS.out.bed ch_versions = ch_versions.mix(GENOME_BLACKLIST_REGIONS.out.versions) - // // Uncompress BWA index or generate from scratch if required // ch_bwa_index = Channel.empty() if (prepare_tool_index == 'bwa') { - if (params.bwa_index) { - if (params.bwa_index.endsWith('.tar.gz')) { - ch_bwa_index = UNTAR_BWA_INDEX ( [ [:], params.bwa_index ] ).untar.map{ it[1] } + if (bwa_index) { + if (bwa_index.endsWith('.tar.gz')) { + ch_bwa_index = UNTAR_BWA_INDEX ( [ [:], bwa_index ] ).untar ch_versions = ch_versions.mix(UNTAR_BWA_INDEX.out.versions) } else { - ch_bwa_index = file(params.bwa_index) + ch_bwa_index = [ [:], file(bwa_index) ] } } else { - ch_bwa_index = BWA_INDEX ( ch_fasta ).index + ch_bwa_index = BWA_INDEX ( ch_fasta.map { [ [:], it ] } ).index ch_versions = ch_versions.mix(BWA_INDEX.out.versions) } } @@ -153,15 +159,15 @@ workflow PREPARE_GENOME { // ch_bowtie2_index = Channel.empty() if (prepare_tool_index == 'bowtie2') { - if (params.bowtie2_index) { - if (params.bowtie2_index.endsWith('.tar.gz')) { - ch_bowtie2_index = UNTAR_BOWTIE2_INDEX ( [ [:], params.bowtie2_index ] ).untar.map{ it[1] } + if (bowtie2_index) { + if (bowtie2_index.endsWith('.tar.gz')) { + ch_bowtie2_index = UNTAR_BOWTIE2_INDEX ( [ [:], bowtie2_index ] ).untar ch_versions = ch_versions.mix(UNTAR_BOWTIE2_INDEX.out.versions) } else { - ch_bowtie2_index = file(params.bowtie2_index) + ch_bowtie2_index = [ [:], file(bowtie2_index) ] } } else { - ch_bowtie2_index = BOWTIE2_BUILD ( ch_fasta ).index + ch_bowtie2_index = BOWTIE2_BUILD ( ch_fasta.map { [ [:], it ] } ).index ch_versions = ch_versions.mix(BOWTIE2_BUILD.out.versions) } } @@ -171,15 +177,15 @@ workflow PREPARE_GENOME { // ch_chromap_index = Channel.empty() if (prepare_tool_index == 'chromap') { - if (params.chromap_index) { - if (params.chromap_index.endsWith('.tar.gz')) { - ch_chromap_index = UNTAR_CHROMAP_INDEX ( [ [:], params.chromap_index ] ).untar.map{ it[1] } - ch_versions = ch_versions.mix(UNTAR.out.versions) + if (chromap_index) { + if (chromap_index.endsWith('.tar.gz')) { + ch_chromap_index = UNTARFILES ( [ [:], chromap_index ] ).files + ch_versions = ch_versions.mix(UNTARFILES.out.versions) } else { - ch_chromap_index = file(params.chromap_index) + ch_chromap_index = [ [:], file(chromap_index) ] } } else { - ch_chromap_index = CHROMAP_INDEX ( ch_fasta ).index + ch_chromap_index = CHROMAP_INDEX ( ch_fasta.map { [ [:], it ] } ).index ch_versions = ch_versions.mix(CHROMAP_INDEX.out.versions) } } @@ -189,12 +195,12 @@ workflow PREPARE_GENOME { // ch_star_index = Channel.empty() if (prepare_tool_index == 'star') { - if (params.star_index) { - if (params.star_index.endsWith('.tar.gz')) { - ch_star_index = UNTAR_STAR_INDEX ( [ [:], params.star_index ] ).untar.map{ it[1] } + if (star_index) { + if (star_index.endsWith('.tar.gz')) { + ch_star_index = UNTAR_STAR_INDEX ( [ [:], star_index ] ).untar.map{ it[1] } ch_versions = ch_versions.mix(UNTAR_STAR_INDEX.out.versions) } else { - ch_star_index = file(params.star_index) + ch_star_index = Channel.value(file(star_index)) } } else { ch_star_index = STAR_GENOMEGENERATE ( ch_fasta, ch_gtf ).index @@ -204,6 +210,7 @@ workflow PREPARE_GENOME { emit: fasta = ch_fasta // path: genome.fasta + fai = ch_fai // path: genome.fai gtf = ch_gtf // path: genome.gtf gene_bed = ch_gene_bed // path: gene.bed chrom_sizes = ch_chrom_sizes // path: genome.sizes @@ -212,6 +219,5 @@ workflow PREPARE_GENOME { bowtie2_index = ch_bowtie2_index // path: bowtie2/index/ chromap_index = ch_chromap_index // path: genome.index star_index = ch_star_index // path: star/index/ - - versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ] + versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ] } diff --git a/subworkflows/local/utils_nfcore_chipseq_pipeline/main.nf b/subworkflows/local/utils_nfcore_chipseq_pipeline/main.nf new file mode 100644 index 00000000..4ba55198 --- /dev/null +++ b/subworkflows/local/utils_nfcore_chipseq_pipeline/main.nf @@ -0,0 +1,247 @@ +// +// Subworkflow with functionality specific to the nf-core/chipseq pipeline +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { UTILS_NFVALIDATION_PLUGIN } from '../../nf-core/utils_nfvalidation_plugin' +include { paramsSummaryMap } from 'plugin/nf-validation' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' +include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { dashedLine } from '../../nf-core/utils_nfcore_pipeline' +include { nfCoreLogo } from '../../nf-core/utils_nfcore_pipeline' +include { imNotification } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline' + +/* +======================================================================================== + SUBWORKFLOW TO INITIALISE PIPELINE +======================================================================================== +*/ + +workflow PIPELINE_INITIALISATION { + + take: + version // boolean: Display version and exit + help // boolean: Display help text + validate_params // boolean: Boolean whether to validate parameters against the schema at runtime + monochrome_logs // boolean: Do not use coloured log outputs + nextflow_cli_args // array: List of positional nextflow CLI args + outdir // string: The output directory where the results will be saved + + main: + + // + // Print version and exit if required and dump pipeline parameters to JSON file + // + UTILS_NEXTFLOW_PIPELINE ( + version, + true, + outdir, + workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 + ) + + // + // Validate parameters and generate parameter summary to stdout + // + pre_help_text = nfCoreLogo(monochrome_logs) + post_help_text = '\n' + workflowCitation() + '\n' + dashedLine(monochrome_logs) + def String workflow_command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --genome GRCh37 --outdir " + UTILS_NFVALIDATION_PLUGIN ( + help, + workflow_command, + pre_help_text, + post_help_text, + validate_params, + "nextflow_schema.json" + ) + + // + // Check config provided to the pipeline + // + UTILS_NFCORE_PIPELINE ( + nextflow_cli_args + ) + // + // Custom validation for pipeline parameters + // + validateInputParameters() + +} + +/* +======================================================================================== + SUBWORKFLOW FOR PIPELINE COMPLETION +======================================================================================== +*/ + +workflow PIPELINE_COMPLETION { + + take: + email // string: email address + email_on_fail // string: email address sent on pipeline failure + plaintext_email // boolean: Send plain-text email instead of HTML + outdir // path: Path to output directory where results will be published + monochrome_logs // boolean: Disable ANSI colour codes in log output + hook_url // string: hook URL for notifications + multiqc_report // string: Path to MultiQC report + + main: + + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + + // + // Completion email and summary + // + workflow.onComplete { + if (email || email_on_fail) { + completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs, multiqc_report.toList()) + } + + completionSummary(monochrome_logs) + + if (hook_url) { + imNotification(summary_params, hook_url) + } + } + + workflow.onError { + log.error "Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting" + } +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ +// +// Check and validate pipeline parameters +// +def validateInputParameters() { + + genomeExistsError() + + if (!params.fasta) { + error("Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file.") + } + + if (!params.gtf && !params.gff) { + error("No GTF or GFF3 annotation specified! The pipeline requires at least one of these files.") + } + + if (params.gtf && params.gff) { + gtfGffWarn(log) + } + + if (!params.macs_gsize) { + macsGsizeWarn(log) + } + + if (!params.read_length && !params.macs_gsize) { + error ("Both '--read_length' and '--macs_gsize' not specified! Please specify either to infer MACS3 genome size for peak calling.") + } +} + +// +// Exit pipeline if incorrect --genome key provided +// +def genomeExistsError() { + if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + + " Currently, the available genome keys are:\n" + + " ${params.genomes.keySet().join(", ")}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + error(error_string) + } +} + +// +// Generate methods description for MultiQC +// +def toolCitationText() { + // TODO nf-core: Optionally add in-text citation tools to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def citation_text = [ + "Tools used in the workflow included:", + "FastQC (Andrews 2010),", + "MultiQC (Ewels et al. 2016)", + "." + ].join(' ').trim() + + return citation_text +} + +def toolBibliographyText() { + // TODO nf-core: Optionally add bibliographic entries to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + ].join(' ').trim() + + return reference_text +} + +def methodsDescriptionText(mqc_methods_yaml) { + // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = workflow.toMap() + meta["manifest_map"] = workflow.manifest.toMap() + + // Pipeline DOI + if (meta.manifest_map.doi) { + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + def temp_doi_ref = "" + String[] manifest_doi = meta.manifest_map.doi.tokenize(",") + for (String doi_ref: manifest_doi) temp_doi_ref += "(doi: ${doi_ref.replace("https://doi.org/", "").replace(" ", "")}), " + meta["doi_text"] = temp_doi_ref.substring(0, temp_doi_ref.length() - 2) + } else meta["doi_text"] = "" + meta["nodoi_text"] = meta.manifest_map.doi ? "" : "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + // TODO nf-core: Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! + // meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + // meta["tool_bibliography"] = toolBibliographyText() + def methods_text = mqc_methods_yaml.text + + def engine = new groovy.text.SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html.toString() +} + +// +// Print a warning if both GTF and GFF have been provided +// +def gtfGffWarn(log) { + log.warn "=============================================================================\n" + + " Both '--gtf' and '--gff' parameters have been provided.\n" + + " Using GTF file as priority.\n" + + "===================================================================================" +} + +// +// Print a warning if macs_gsize parameter has not been provided +// +def macsGsizeWarn(log) { + log.warn "=============================================================================\n" + + " --macs_gsize parameter has not been provided.\n" + + " It will be auto-calculated by 'khmer unique-kmers.py' using the '--read_length' parameter.\n" + + " Explicitly provide '--macs_gsize macs3_genome_size' to change this behaviour.\n" + + "===================================================================================" +} diff --git a/subworkflows/nf-core/align_bowtie2.nf b/subworkflows/nf-core/align_bowtie2.nf deleted file mode 100644 index 35219131..00000000 --- a/subworkflows/nf-core/align_bowtie2.nf +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Map reads, sort, index BAM file and run samtools stats, flagstat and idxstats - */ - -include { BOWTIE2_ALIGN } from '../../modules/nf-core/modules/bowtie2/align/main' -include { BAM_SORT_SAMTOOLS } from './bam_sort_samtools' - -workflow ALIGN_BOWTIE2 { - take: - reads // channel: [ val(meta), [ reads ] ] - index // path: /path/to/index - save_unaligned // boolean: true/false - - main: - - ch_versions = Channel.empty() - - // - // Map reads with BWA - // - BOWTIE2_ALIGN(reads, index, save_unaligned, false) - ch_versions = ch_versions.mix(BOWTIE2_ALIGN.out.versions.first()) - - // - // Sort, index BAM file and run samtools stats, flagstat and idxstats - // - BAM_SORT_SAMTOOLS(BOWTIE2_ALIGN.out.bam) - ch_versions = ch_versions.mix(BAM_SORT_SAMTOOLS.out.versions.first()) - - emit: - bam = BAM_SORT_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] - bai = BAM_SORT_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] - stats = BAM_SORT_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = BAM_SORT_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = BAM_SORT_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - - versions = ch_versions // path: versions.yml -} diff --git a/subworkflows/nf-core/align_bwa_mem.nf b/subworkflows/nf-core/align_bwa_mem.nf deleted file mode 100644 index 0c5dff08..00000000 --- a/subworkflows/nf-core/align_bwa_mem.nf +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Map reads, sort, index BAM file and run samtools stats, flagstat and idxstats - */ - -include { BWA_MEM } from '../../modules/nf-core/modules/bwa/mem/main' -include { BAM_SORT_SAMTOOLS } from './bam_sort_samtools' - -workflow ALIGN_BWA_MEM { - take: - reads // channel: [ val(meta), [ reads ] ] - index // path: /path/to/index - - main: - - ch_versions = Channel.empty() - - // - // Map reads with BWA - // - BWA_MEM(reads, index, false) - ch_versions = ch_versions.mix(BWA_MEM.out.versions.first()) - - // - // Sort, index BAM file and run samtools stats, flagstat and idxstats - // - BAM_SORT_SAMTOOLS(BWA_MEM.out.bam) - ch_versions = ch_versions.mix(BAM_SORT_SAMTOOLS.out.versions.first()) - - emit: - bam = BAM_SORT_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] - bai = BAM_SORT_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] - stats = BAM_SORT_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = BAM_SORT_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = BAM_SORT_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - - versions = ch_versions // path: versions.yml -} diff --git a/subworkflows/nf-core/align_chromap.nf b/subworkflows/nf-core/align_chromap.nf deleted file mode 100644 index 7eb73977..00000000 --- a/subworkflows/nf-core/align_chromap.nf +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Map reads, sort, index BAM file and run samtools stats, flagstat and idxstats - */ - -include { CHROMAP_CHROMAP } from '../../modules/nf-core/modules/chromap/chromap/main' -include { BAM_SORT_SAMTOOLS } from './bam_sort_samtools' - -workflow ALIGN_CHROMAP { - take: - reads // channel: [ val(meta), [ reads ] ] - index // path: /path/to/index - fasta // path: /path/to/fasta - - main: - - ch_versions = Channel.empty() - - // - // Map reads with CHROMAP - // - CHROMAP_CHROMAP(reads, fasta, index, [], [], [], []) - ch_versions = ch_versions.mix(CHROMAP_CHROMAP.out.versions.first()) - - // - // Sort, index BAM file and run samtools stats, flagstat and idxstats - // - BAM_SORT_SAMTOOLS(CHROMAP_CHROMAP.out.bam) - ch_versions = ch_versions.mix(BAM_SORT_SAMTOOLS.out.versions.first()) - - emit: - bam = BAM_SORT_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] - bai = BAM_SORT_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] - stats = BAM_SORT_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = BAM_SORT_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = BAM_SORT_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - - versions = ch_versions // path: versions.yml -} diff --git a/subworkflows/nf-core/align_star.nf b/subworkflows/nf-core/align_star.nf deleted file mode 100644 index 47462182..00000000 --- a/subworkflows/nf-core/align_star.nf +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Map reads, sort, index BAM file and run samtools stats, flagstat and idxstats - */ - -include { STAR_ALIGN } from '../../modules/local/star_align' -include { BAM_SORT_SAMTOOLS } from './bam_sort_samtools' - -workflow ALIGN_STAR { - take: - reads // channel: [ val(meta), [ reads ] ] - index // channel: /path/to/star/index/ - - main: - - ch_versions = Channel.empty() - - // - // Map reads with STAR - // - STAR_ALIGN ( reads, index ) - ch_versions = ch_versions.mix(STAR_ALIGN.out.versions.first()) - - // - // Sort, index BAM file and run samtools stats, flagstat and idxstats - // - BAM_SORT_SAMTOOLS ( STAR_ALIGN.out.bam ) - ch_versions = ch_versions.mix(BAM_SORT_SAMTOOLS.out.versions) - - emit: - orig_bam = STAR_ALIGN.out.bam // channel: [ val(meta), bam ] - log_final = STAR_ALIGN.out.log_final // channel: [ val(meta), log_final ] - log_out = STAR_ALIGN.out.log_out // channel: [ val(meta), log_out ] - log_progress = STAR_ALIGN.out.log_progress // channel: [ val(meta), log_progress ] - bam_sorted = STAR_ALIGN.out.bam_sorted // channel: [ val(meta), bam_sorted ] - bam_transcript = STAR_ALIGN.out.bam_transcript // channel: [ val(meta), bam_transcript ] - fastq = STAR_ALIGN.out.fastq // channel: [ val(meta), fastq ] - tab = STAR_ALIGN.out.tab // channel: [ val(meta), tab ] - - bam = BAM_SORT_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] - bai = BAM_SORT_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] - stats = BAM_SORT_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = BAM_SORT_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = BAM_SORT_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/nf-core/bam_markduplicates_picard/main.nf b/subworkflows/nf-core/bam_markduplicates_picard/main.nf new file mode 100644 index 00000000..de8130fb --- /dev/null +++ b/subworkflows/nf-core/bam_markduplicates_picard/main.nf @@ -0,0 +1,48 @@ +// +// Picard MarkDuplicates, index BAM file and run samtools stats, flagstat and idxstats +// + +include { PICARD_MARKDUPLICATES } from '../../../modules/nf-core/picard/markduplicates/main' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' +include { BAM_STATS_SAMTOOLS } from '../bam_stats_samtools/main' + +workflow BAM_MARKDUPLICATES_PICARD { + + take: + ch_bam // channel: [ val(meta), path(bam) ] + ch_fasta // channel: [ path(fasta) ] + ch_fai // channel: [ path(fai) ] + + main: + + ch_versions = Channel.empty() + + PICARD_MARKDUPLICATES ( ch_bam, ch_fasta, ch_fai ) + ch_versions = ch_versions.mix(PICARD_MARKDUPLICATES.out.versions.first()) + + SAMTOOLS_INDEX ( PICARD_MARKDUPLICATES.out.bam ) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + + ch_bam_bai = PICARD_MARKDUPLICATES.out.bam + .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) + .join(SAMTOOLS_INDEX.out.csi, by: [0], remainder: true) + .map{meta, bam, bai, csi -> + if (bai) [ meta, bam, bai ] + else [ meta, bam, csi ] + } + + BAM_STATS_SAMTOOLS ( ch_bam_bai, ch_fasta ) + ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) + + emit: + bam = PICARD_MARKDUPLICATES.out.bam // channel: [ val(meta), path(bam) ] + metrics = PICARD_MARKDUPLICATES.out.metrics // channel: [ val(meta), path(bam) ] + bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), path(bai) ] + csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), path(csi) ] + + stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), path(stats) ] + flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), path(flagstat) ] + idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), path(idxstats) ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/bam_markduplicates_picard/meta.yml b/subworkflows/nf-core/bam_markduplicates_picard/meta.yml new file mode 100644 index 00000000..fe63068e --- /dev/null +++ b/subworkflows/nf-core/bam_markduplicates_picard/meta.yml @@ -0,0 +1,63 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "bam_markduplicates_picard" +description: Picard MarkDuplicates, index BAM file and run samtools stats, flagstat and idxstats +keywords: + - markduplicates + - bam + - sam + - cram +components: + - picard/markduplicates + - samtools/index + - samtools/stats + - samtools/idxstats + - samtools/flagstat + - bam_stats_samtools +input: + - ch_bam: + description: | + BAM/CRAM/SAM file + Structure: [ val(meta), path(bam) ] + - ch_fasta: + description: | + Reference genome fasta file + Structure: [ path(fasta) ] + - ch_fasta: + description: | + Index of the reference genome fasta file + Structure: [ path(fai) ] +output: + - bam: + description: | + processed BAM/CRAM/SAM file + Structure: [ val(meta), path(bam) ] + - bai: + description: | + BAM/CRAM/SAM samtools index + Structure: [ val(meta), path(bai) ] + - csi: + description: | + CSI samtools index + Structure: [ val(meta), path(csi) ] + - stats: + description: | + File containing samtools stats output + Structure: [ val(meta), path(stats) ] + - flagstat: + description: | + File containing samtools flagstat output + Structure: [ val(meta), path(flagstat) ] + - idxstats: + description: | + File containing samtools idxstats output + Structure: [ val(meta), path(idxstats) ] + - versions: + description: | + Files containing software versions + Structure: [ path(versions.yml) ] +authors: + - "@dmarron" + - "@drpatelh" +maintainers: + - "@dmarron" + - "@drpatelh" diff --git a/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test b/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test new file mode 100644 index 00000000..d8d24290 --- /dev/null +++ b/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test @@ -0,0 +1,93 @@ +nextflow_workflow { + + name "Test Workflow BAM_MARKDUPLICATES_PICARD" + script "../main.nf" + workflow "BAM_MARKDUPLICATES_PICARD" + + tag "picard" + tag "picard/markduplicates" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "bam_markduplicates_picard" + tag "subworkflows/bam_markduplicates_picard" + tag "subworkflows/bam_stats_samtools" + tag "bam_stats_samtools" + tag "samtools" + tag "samtools/flagstat" + tag "samtools/idxstats" + tag "samtools/index" + tag "samtools/stats" + + test("sarscov2 - bam") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end: false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + path(workflow.out.bam[0][1]), + path(workflow.out.bai[0][1]), + path(workflow.out.flagstat[0][1]), + path(workflow.out.idxstats[0][1]), + path(workflow.out.stats[0][1]), + ).match("sarscov2 - bam") }, + { assert path(workflow.out.metrics.get(0).get(1)).getText().contains("97") } + ) + } + } + + test("homo_sapiens - cram") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + path(workflow.out.bam[0][1]), + path(workflow.out.bai[0][1]), + path(workflow.out.flagstat[0][1]), + path(workflow.out.idxstats[0][1]), + path(workflow.out.stats[0][1]), + ).match("homo_sapiens - cram") }, + { assert path(workflow.out.metrics.get(0).get(1)).getText().contains("0.999986") } + ) + } + } + +} diff --git a/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test.snap b/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test.snap new file mode 100644 index 00000000..a208d101 --- /dev/null +++ b/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test.snap @@ -0,0 +1,22 @@ +{ + "homo_sapiens - cram": { + "content": [ + "test.bam:md5,6641dc05efa8384a061f378d86d922cd", + "test.bam.bai:md5,c41c60d8a94adebe53b6df80b6e90d38", + "test.flagstat:md5,93b0ef463df947ede1f42ff60396c34d", + "test.idxstats:md5,e179601fa7b8ebce81ac3765206f6c15", + "test.stats:md5,0035ac8900d85e9a790f4c1f48b76947" + ], + "timestamp": "2023-12-05T17:45:12.484869" + }, + "sarscov2 - bam": { + "content": [ + "test.bam:md5,3091fe6ba1b7530f382fe40b9fd8f45b", + "test.bam.bai:md5,4d3ae8d013444b55e17aa0149a2ab404", + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783", + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2", + "test.stats:md5,e32e7e49dce1fbe327a89e0fb7bc01b1" + ], + "timestamp": "2023-12-05T17:43:58.582652" + } +} diff --git a/subworkflows/nf-core/bam_markduplicates_picard/tests/tags.yml b/subworkflows/nf-core/bam_markduplicates_picard/tests/tags.yml new file mode 100644 index 00000000..10b85270 --- /dev/null +++ b/subworkflows/nf-core/bam_markduplicates_picard/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/bam_markduplicates_picard: + - subworkflows/nf-core/bam_markduplicates_picard/** diff --git a/subworkflows/nf-core/bam_sort_samtools.nf b/subworkflows/nf-core/bam_sort_samtools.nf deleted file mode 100644 index 418e14cc..00000000 --- a/subworkflows/nf-core/bam_sort_samtools.nf +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Sort, index BAM file and run samtools stats, flagstat and idxstats - */ - -include { SAMTOOLS_SORT } from '../../modules/nf-core/modules/samtools/sort/main' -include { SAMTOOLS_INDEX } from '../../modules/nf-core/modules/samtools/index/main' -include { BAM_STATS_SAMTOOLS } from './bam_stats_samtools' - -workflow BAM_SORT_SAMTOOLS { - take: - ch_bam // channel: [ val(meta), [ bam ] ] - - main: - - ch_versions = Channel.empty() - - SAMTOOLS_SORT(ch_bam) - ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first()) - - SAMTOOLS_INDEX(SAMTOOLS_SORT.out.bam) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) - - BAM_STATS_SAMTOOLS(SAMTOOLS_SORT.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0])) - ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) - - emit: - bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ] - bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] - stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/main.nf b/subworkflows/nf-core/bam_sort_stats_samtools/main.nf new file mode 100644 index 00000000..fc1c652b --- /dev/null +++ b/subworkflows/nf-core/bam_sort_stats_samtools/main.nf @@ -0,0 +1,50 @@ +// +// Sort, index BAM file and run samtools stats, flagstat and idxstats +// + +include { SAMTOOLS_SORT } from '../../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' +include { BAM_STATS_SAMTOOLS } from '../bam_stats_samtools/main' + +workflow BAM_SORT_STATS_SAMTOOLS { + take: + ch_bam // channel: [ val(meta), [ bam ] ] + ch_fasta // channel: [ val(meta), path(fasta) ] + + main: + + ch_versions = Channel.empty() + + SAMTOOLS_SORT ( ch_bam ) + ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first()) + + SAMTOOLS_INDEX ( SAMTOOLS_SORT.out.bam ) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + + SAMTOOLS_SORT.out.bam + .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) + .join(SAMTOOLS_INDEX.out.csi, by: [0], remainder: true) + .map { + meta, bam, bai, csi -> + if (bai) { + [ meta, bam, bai ] + } else { + [ meta, bam, csi ] + } + } + .set { ch_bam_bai } + + BAM_STATS_SAMTOOLS ( ch_bam_bai, ch_fasta ) + ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) + + emit: + bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ] + bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] + csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), [ csi ] ] + + stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml b/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml new file mode 100644 index 00000000..e01f9ccf --- /dev/null +++ b/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml @@ -0,0 +1,70 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: bam_sort_stats_samtools +description: Sort SAM/BAM/CRAM file +keywords: + - sort + - bam + - sam + - cram +components: + - samtools/sort + - samtools/index + - samtools/stats + - samtools/idxstats + - samtools/flagstat + - bam_stats_samtools +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - fasta: + type: file + description: Reference genome fasta file + pattern: "*.{fasta,fa}" +# TODO Update when we decide on a standard for subworkflow docs +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - crai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - stats: + type: file + description: File containing samtools stats output + pattern: "*.{stats}" + - flagstat: + type: file + description: File containing samtools flagstat output + pattern: "*.{flagstat}" + - idxstats: + type: file + description: File containing samtools idxstats output + pattern: "*.{idxstats}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" +maintainers: + - "@drpatelh" + - "@ewels" diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test new file mode 100644 index 00000000..75b5b934 --- /dev/null +++ b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test @@ -0,0 +1,82 @@ +nextflow_workflow { + + name "Test Workflow BAM_SORT_STATS_SAMTOOLS" + script "../main.nf" + workflow "BAM_SORT_STATS_SAMTOOLS" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/bam_sort_stats_samtools" + tag "bam_sort_stats_samtools" + tag "subworkflows/bam_stats_samtools" + tag "bam_stats_samtools" + tag "samtools" + tag "samtools/index" + tag "samtools/sort" + tag "samtools/stats" + tag "samtools/idxstats" + tag "samtools/flagstat" + + test("test_bam_sort_stats_samtools_single_end") { + + when { + params { + outdir = "$outputDir" + } + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert workflow.out.bam.get(0).get(1) ==~ ".*.bam"}, + { assert workflow.out.bai.get(0).get(1) ==~ ".*.bai"}, + { assert snapshot(workflow.out.stats).match("test_bam_sort_stats_samtools_single_end_stats") }, + { assert snapshot(workflow.out.flagstat).match("test_bam_sort_stats_samtools_single_end_flagstats") }, + { assert snapshot(workflow.out.idxstats).match("test_bam_sort_stats_samtools_single_end_idxstats") } + ) + } + } + + test("test_bam_sort_stats_samtools_paired_end") { + + when { + params { + outdir = "$outputDir" + } + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert workflow.out.bam.get(0).get(1) ==~ ".*.bam"}, + { assert workflow.out.bai.get(0).get(1) ==~ ".*.bai"}, + { assert snapshot(workflow.out.stats).match("test_bam_sort_stats_samtools_paired_end_stats") }, + { assert snapshot(workflow.out.flagstat).match("test_bam_sort_stats_samtools_paired_end_flagstats") }, + { assert snapshot(workflow.out.idxstats).match("test_bam_sort_stats_samtools_paired_end_idxstats") } + ) + } + } +} diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap new file mode 100644 index 00000000..c159eef3 --- /dev/null +++ b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap @@ -0,0 +1,86 @@ +{ + "test_bam_sort_stats_samtools_paired_end_flagstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + ] + ] + ], + "timestamp": "2023-10-22T20:25:03.687121177" + }, + "test_bam_sort_stats_samtools_paired_end_idxstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" + ] + ] + ], + "timestamp": "2023-10-22T20:25:03.709648916" + }, + "test_bam_sort_stats_samtools_single_end_stats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,f281507081517414eb1a04b2d9c855b2" + ] + ] + ], + "timestamp": "2024-01-18T17:10:02.818694" + }, + "test_bam_sort_stats_samtools_paired_end_stats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,e32e7e49dce1fbe327a89e0fb7bc01b1" + ] + ] + ], + "timestamp": "2023-12-04T11:06:59.253905951" + }, + "test_bam_sort_stats_samtools_single_end_idxstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,613e048487662c694aa4a2f73ca96a20" + ] + ] + ], + "timestamp": "2024-01-18T17:10:02.84631" + }, + "test_bam_sort_stats_samtools_single_end_flagstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,2191911d72575a2358b08b1df64ccb53" + ] + ] + ], + "timestamp": "2024-01-18T17:10:02.829756" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/tests/tags.yml b/subworkflows/nf-core/bam_sort_stats_samtools/tests/tags.yml new file mode 100644 index 00000000..30b69d6a --- /dev/null +++ b/subworkflows/nf-core/bam_sort_stats_samtools/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/bam_sort_stats_samtools: + - subworkflows/nf-core/bam_sort_stats_samtools/** diff --git a/subworkflows/nf-core/bam_stats_samtools.nf b/subworkflows/nf-core/bam_stats_samtools.nf deleted file mode 100644 index 89a7338f..00000000 --- a/subworkflows/nf-core/bam_stats_samtools.nf +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Run SAMtools stats, flagstat and idxstats - */ - -include { SAMTOOLS_STATS } from '../../modules/nf-core/modules/samtools/stats/main' -include { SAMTOOLS_IDXSTATS } from '../../modules/nf-core/modules/samtools/idxstats/main' -include { SAMTOOLS_FLAGSTAT } from '../../modules/nf-core/modules/samtools/flagstat/main' - -workflow BAM_STATS_SAMTOOLS { - take: - ch_bam_bai // channel: [ val(meta), [ bam ], [bai] ] - - main: - - ch_versions = Channel.empty() - - SAMTOOLS_STATS ( ch_bam_bai, [] ) - ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first()) - - SAMTOOLS_FLAGSTAT ( ch_bam_bai ) - ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions.first()) - - SAMTOOLS_IDXSTATS ( ch_bam_bai ) - ch_versions = ch_versions.mix(SAMTOOLS_IDXSTATS.out.versions.first()) - - emit: - stats = SAMTOOLS_STATS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = SAMTOOLS_FLAGSTAT.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = SAMTOOLS_IDXSTATS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/nf-core/bam_stats_samtools/main.nf b/subworkflows/nf-core/bam_stats_samtools/main.nf new file mode 100644 index 00000000..44d4c010 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/main.nf @@ -0,0 +1,32 @@ +// +// Run SAMtools stats, flagstat and idxstats +// + +include { SAMTOOLS_STATS } from '../../../modules/nf-core/samtools/stats/main' +include { SAMTOOLS_IDXSTATS } from '../../../modules/nf-core/samtools/idxstats/main' +include { SAMTOOLS_FLAGSTAT } from '../../../modules/nf-core/samtools/flagstat/main' + +workflow BAM_STATS_SAMTOOLS { + take: + ch_bam_bai // channel: [ val(meta), path(bam), path(bai) ] + ch_fasta // channel: [ val(meta), path(fasta) ] + + main: + ch_versions = Channel.empty() + + SAMTOOLS_STATS ( ch_bam_bai, ch_fasta ) + ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions) + + SAMTOOLS_FLAGSTAT ( ch_bam_bai ) + ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions) + + SAMTOOLS_IDXSTATS ( ch_bam_bai ) + ch_versions = ch_versions.mix(SAMTOOLS_IDXSTATS.out.versions) + + emit: + stats = SAMTOOLS_STATS.out.stats // channel: [ val(meta), path(stats) ] + flagstat = SAMTOOLS_FLAGSTAT.out.flagstat // channel: [ val(meta), path(flagstat) ] + idxstats = SAMTOOLS_IDXSTATS.out.idxstats // channel: [ val(meta), path(idxstats) ] + + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/nf-core/bam_stats_samtools/meta.yml b/subworkflows/nf-core/bam_stats_samtools/meta.yml new file mode 100644 index 00000000..809bf736 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/meta.yml @@ -0,0 +1,43 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: bam_stats_samtools +description: Produces comprehensive statistics from SAM/BAM/CRAM file +keywords: + - statistics + - counts + - bam + - sam + - cram +components: + - samtools/stats + - samtools/idxstats + - samtools/flagstat +input: + - ch_bam_bai: + description: | + The input channel containing the BAM/CRAM and it's index + Structure: [ val(meta), path(bam), path(bai) ] + - ch_fasta: + description: | + Reference genome fasta file + Structure: [ path(fasta) ] +output: + - stats: + description: | + File containing samtools stats output + Structure: [ val(meta), path(stats) ] + - flagstat: + description: | + File containing samtools flagstat output + Structure: [ val(meta), path(flagstat) ] + - idxstats: + description: | + File containing samtools idxstats output + Structure: [ val(meta), path(idxstats)] + - versions: + description: | + Files containing software versions + Structure: [ path(versions.yml) ] +authors: + - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test new file mode 100644 index 00000000..c8b21f28 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test @@ -0,0 +1,108 @@ +nextflow_workflow { + + name "Test Workflow BAM_STATS_SAMTOOLS" + script "../main.nf" + workflow "BAM_STATS_SAMTOOLS" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "bam_stats_samtools" + tag "subworkflows/bam_stats_samtools" + tag "samtools" + tag "samtools/flagstat" + tag "samtools/idxstats" + tag "samtools/stats" + + test("test_bam_stats_samtools_single_end") { + + when { + params { + outdir = "$outputDir" + } + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out.stats).match("test_bam_stats_samtools_single_end_stats") }, + { assert snapshot(workflow.out.flagstat).match("test_bam_stats_samtools_single_end_flagstats") }, + { assert snapshot(workflow.out.idxstats).match("test_bam_stats_samtools_single_end_idxstats") } + ) + } + } + + test("test_bam_stats_samtools_paired_end") { + + when { + params { + outdir = "$outputDir" + } + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out.stats).match("test_bam_stats_samtools_paired_end_stats") }, + { assert snapshot(workflow.out.flagstat).match("test_bam_stats_samtools_paired_end_flagstats") }, + { assert snapshot(workflow.out.idxstats).match("test_bam_stats_samtools_paired_end_idxstats") } + ) + } + } + + test("test_bam_stats_samtools_paired_end_cram") { + + when { + params { + outdir = "$outputDir" + } + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out.stats).match("test_bam_stats_samtools_paired_end_cram_stats") }, + { assert snapshot(workflow.out.flagstat).match("test_bam_stats_samtools_paired_end_cram_flagstats") }, + { assert snapshot(workflow.out.idxstats).match("test_bam_stats_samtools_paired_end_cram_idxstats") } + ) + } + } + +} diff --git a/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap new file mode 100644 index 00000000..8bf0d379 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap @@ -0,0 +1,128 @@ +{ + "test_bam_stats_samtools_paired_end_cram_flagstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,a53f3d26e2e9851f7d528442bbfe9781" + ] + ] + ], + "timestamp": "2023-11-06T09:31:26.194017574" + }, + "test_bam_stats_samtools_paired_end_stats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,49e2b43344ff92bc4c02463a58f7ba4a" + ] + ] + ], + "timestamp": "2024-01-18T17:17:27.704335" + }, + "test_bam_stats_samtools_paired_end_flagstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + ] + ] + ], + "timestamp": "2024-01-18T17:17:27.717482" + }, + "test_bam_stats_samtools_single_end_flagstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,2191911d72575a2358b08b1df64ccb53" + ] + ] + ], + "timestamp": "2023-11-06T09:26:10.340046381" + }, + "test_bam_stats_samtools_paired_end_cram_idxstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,e179601fa7b8ebce81ac3765206f6c15" + ] + ] + ], + "timestamp": "2023-11-06T09:31:26.207052003" + }, + "test_bam_stats_samtools_single_end_stats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,5a6667d97806e5002731e9cf23674fad" + ] + ] + ], + "timestamp": "2023-12-04T11:07:06.676820877" + }, + "test_bam_stats_samtools_paired_end_idxstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" + ] + ] + ], + "timestamp": "2024-01-18T17:17:27.726719" + }, + "test_bam_stats_samtools_single_end_idxstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.idxstats:md5,613e048487662c694aa4a2f73ca96a20" + ] + ] + ], + "timestamp": "2023-11-06T09:26:10.349439801" + }, + "test_bam_stats_samtools_paired_end_cram_stats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,2cf2fe93596ee3d74f946097b204a629" + ] + ] + ], + "timestamp": "2023-12-04T11:07:22.30295557" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/bam_stats_samtools/tests/tags.yml b/subworkflows/nf-core/bam_stats_samtools/tests/tags.yml new file mode 100644 index 00000000..ec2f2d68 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/bam_stats_samtools: + - subworkflows/nf-core/bam_stats_samtools/** diff --git a/subworkflows/nf-core/fastq_align_bowtie2/main.nf b/subworkflows/nf-core/fastq_align_bowtie2/main.nf new file mode 100644 index 00000000..ba4420f7 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_bowtie2/main.nf @@ -0,0 +1,45 @@ +// +// Alignment with Bowtie2 +// + +include { BOWTIE2_ALIGN } from '../../../modules/nf-core/bowtie2/align/main' +include { BAM_SORT_STATS_SAMTOOLS } from '../bam_sort_stats_samtools/main' + +workflow FASTQ_ALIGN_BOWTIE2 { + take: + ch_reads // channel: [ val(meta), [ reads ] ] + ch_index // channel: /path/to/bowtie2/index/ + save_unaligned // val + sort_bam // val + ch_fasta // channel: /path/to/reference.fasta + + main: + + ch_versions = Channel.empty() + + // + // Map reads with Bowtie2 + // + BOWTIE2_ALIGN ( ch_reads, ch_index, save_unaligned, sort_bam ) + ch_versions = ch_versions.mix(BOWTIE2_ALIGN.out.versions) + + // + // Sort, index BAM file and run samtools stats, flagstat and idxstats + // + BAM_SORT_STATS_SAMTOOLS ( BOWTIE2_ALIGN.out.aligned, ch_fasta ) + ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) + + emit: + bam_orig = BOWTIE2_ALIGN.out.aligned // channel: [ val(meta), aligned ] + log_out = BOWTIE2_ALIGN.out.log // channel: [ val(meta), log ] + fastq = BOWTIE2_ALIGN.out.fastq // channel: [ val(meta), fastq ] + + bam = BAM_SORT_STATS_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] + bai = BAM_SORT_STATS_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] + csi = BAM_SORT_STATS_SAMTOOLS.out.csi // channel: [ val(meta), [ csi ] ] + stats = BAM_SORT_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_SORT_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/fastq_align_bowtie2/meta.yml b/subworkflows/nf-core/fastq_align_bowtie2/meta.yml new file mode 100644 index 00000000..58023a89 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_bowtie2/meta.yml @@ -0,0 +1,67 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: fastq_align_bowtie2 +description: Align reads to a reference genome using bowtie2 then sort with samtools +keywords: + - align + - fasta + - genome + - reference +components: + - bowtie2/align + - samtools/sort + - samtools/index + - samtools/stats + - samtools/idxstats + - samtools/flagstat + - bam_sort_stats_samtools +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ch_reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - ch_index: + type: file + description: Bowtie2 genome index files + pattern: "*.ebwt" + - save_unaligned: + type: boolean + description: | + Save reads that do not map to the reference (true) or discard them (false) + (default: false) + - sort_bam: + type: boolean + description: | + Save reads that do not map to the reference (true) or discard them (false) + default: false + - ch_fasta: + type: file + description: Reference fasta file + pattern: "*.{fasta,fa}" +# TODO Update when we decide on a standard for subworkflow docs +output: + - bam: + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - fastq: + type: file + description: Unaligned FastQ files + pattern: "*.fastq.gz" + - log: + type: file + description: Alignment log + pattern: "*.log" +authors: + - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/subworkflows/nf-core/fastq_align_bwa/main.nf b/subworkflows/nf-core/fastq_align_bwa/main.nf new file mode 100644 index 00000000..9192550d --- /dev/null +++ b/subworkflows/nf-core/fastq_align_bwa/main.nf @@ -0,0 +1,43 @@ +// +// Alignment with BWA +// + +include { BWA_MEM } from '../../../modules/nf-core/bwa/mem/main' +include { BAM_SORT_STATS_SAMTOOLS } from '../bam_sort_stats_samtools/main' + +workflow FASTQ_ALIGN_BWA { + take: + ch_reads // channel (mandatory): [ val(meta), [ path(reads) ] ] + ch_index // channel (mandatory): [ val(meta2), path(index) ] + val_sort_bam // boolean (mandatory): true or false + ch_fasta // channel (optional) : [ val(meta3), path(fasta) ] + + main: + ch_versions = Channel.empty() + + // + // Map reads with BWA + // + + BWA_MEM ( ch_reads, ch_index, val_sort_bam ) + ch_versions = ch_versions.mix(BWA_MEM.out.versions.first()) + + // + // Sort, index BAM file and run samtools stats, flagstat and idxstats + // + + BAM_SORT_STATS_SAMTOOLS ( BWA_MEM.out.bam, ch_fasta ) + ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) + + emit: + bam_orig = BWA_MEM.out.bam // channel: [ val(meta), path(bam) ] + + bam = BAM_SORT_STATS_SAMTOOLS.out.bam // channel: [ val(meta), path(bam) ] + bai = BAM_SORT_STATS_SAMTOOLS.out.bai // channel: [ val(meta), path(bai) ] + csi = BAM_SORT_STATS_SAMTOOLS.out.csi // channel: [ val(meta), path(csi) ] + stats = BAM_SORT_STATS_SAMTOOLS.out.stats // channel: [ val(meta), path(stats) ] + flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), path(flagstat) ] + idxstats = BAM_SORT_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), path(idxstats) ] + + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/nf-core/fastq_align_bwa/meta.yml b/subworkflows/nf-core/fastq_align_bwa/meta.yml new file mode 100644 index 00000000..fa218408 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_bwa/meta.yml @@ -0,0 +1,73 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: fastq_align_bwa +description: Align reads to a reference genome using bwa then sort with samtools +keywords: + - align + - fasta + - genome + - reference +components: + - bwa/mem + - bwa/align + - samtools/sort + - samtools/index + - samtools/stats + - samtools/idxstats + - samtools/flagstat + - bam_sort_stats_samtools +input: + - ch_reads: + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + Structure: [ val(meta), [ path(reads) ] ] + - ch_index: + description: | + BWA genome index files + Structure: [ val(meta), path(index) ] + - val_sort_bam: + type: boolean + description: If true bwa modules sort resulting bam files + pattern: "true|false" + - ch_fasta: + type: file + description: | + Optional reference fasta file. This only needs to be given if val_sort_bam = true. + Structure: [ val(meta), path(fasta) ] +output: + - bam_orig: + description: | + BAM file produced by bwa + Structure: [ val(meta), path(bam) ] + - bam: + description: | + BAM file ordered by samtools + Structure: [ val(meta), path(bam) ] + - bai: + description: | + BAI index of the ordered BAM file + Structure: [ val(meta), path(bai) ] + - csi: + description: | + CSI index of the ordered BAM file + Structure: [ val(meta), path(csi) ] + - stats: + description: | + File containing samtools stats output + Structure: [ val(meta), path(stats) ] + - flagstat: + description: | + File containing samtools flagstat output + Structure: [ val(meta), path(flagstat) ] + - idxstats: + description: | + File containing samtools idxstats output + Structure: [ val(meta), path(idxstats) ] + - versions: + description: | + Files containing software versions + Structure: [ path(versions.yml) ] +authors: + - "@JoseEspinosa" +maintainers: + - "@JoseEspinosa" diff --git a/subworkflows/nf-core/fastq_align_chromap/main.nf b/subworkflows/nf-core/fastq_align_chromap/main.nf new file mode 100644 index 00000000..26b6a4be --- /dev/null +++ b/subworkflows/nf-core/fastq_align_chromap/main.nf @@ -0,0 +1,41 @@ +/* + * Map reads, sort, index BAM file and run samtools stats, flagstat and idxstats + */ + +include { CHROMAP_CHROMAP } from '../../../modules/nf-core/chromap/chromap/main' +include { BAM_SORT_STATS_SAMTOOLS } from '../bam_sort_stats_samtools/main' + +workflow FASTQ_ALIGN_CHROMAP { + take: + ch_reads // channel (mandatory): [ val(meta), [ reads ] ] + ch_index // channel (mandatory): [ val(meta2, [ index ] ] + ch_fasta // channel (mandatory): [ val(meta2, [ fasta ] ] + ch_barcodes // channel (optional): [ barcodes ] + ch_whitelist // channel (optional): [ whitelist ] + ch_chr_order // channel (optional): [ chr_order ] + ch_pairs_chr_order // channel (optional): [ pairs_chr_order ] + + main: + ch_versions = Channel.empty() + + // + // Map reads with CHROMAP + // + CHROMAP_CHROMAP(ch_reads, ch_fasta, ch_index, ch_barcodes, ch_whitelist, ch_chr_order, ch_pairs_chr_order) + ch_versions = ch_versions.mix(CHROMAP_CHROMAP.out.versions) + + // + // Sort, index BAM file and run samtools stats, flagstat and idxstats + // + BAM_SORT_STATS_SAMTOOLS(CHROMAP_CHROMAP.out.bam, ch_fasta) + ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) + + emit: + bam = BAM_SORT_STATS_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] + bai = BAM_SORT_STATS_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] + stats = BAM_SORT_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_SORT_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // path: versions.yml +} diff --git a/subworkflows/nf-core/fastq_align_chromap/meta.yml b/subworkflows/nf-core/fastq_align_chromap/meta.yml new file mode 100644 index 00000000..1db3eff1 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_chromap/meta.yml @@ -0,0 +1,103 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "fastq_align_chromap" +description: Align high throughput chromatin profiles using Chromap then sort with samtools +keywords: + - align + - fasta + - genome + - reference + - chromatin profiles + - chip-seq + - atac-seq + - hic +components: + - chromap/chromap + - samtools/sort + - samtools/index + - samtools/stats + - samtools/idxstats + - samtools/flagstat + - bam_sort_stats_samtools +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ch_reads: + type: file + description: | + Structure: [val(meta), path(reads)] + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - ch_index: + type: file + description: | + Structure: [val(meta2), path(index)] + Chromap genome index files + pattern: "*.index" + - ch_fasta: + type: file + description: | + Structure: [val(meta2), path(fasta)] + Reference fasta file + pattern: "*.{fasta,fa}" + - ch_barcodes: + type: file + description: | + Structure: [path(barcodes)] + Cell barcode files + - ch_whitelist: + type: file + description: | + Structure: [path(whitelist)] + Cell barcode whitelist file + - ch_chr_order: + type: file + description: | + Structure: [path(chr_order)] + Custom chromosome order + - ch_pairs_chr_order: + type: file + description: | + Structure: [path(pairs_chr_order)] + Natural chromosome order for pairs flipping +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - bam: + type: file + description: BAM file + pattern: "*.bam" + - bai: + type: file + description: BAM index (currently only for snapaligner) + pattern: "*.bai" + - stats: + type: file + description: File containing samtools stats output + pattern: "*.{stats}" + - flagstat: + type: file + description: File containing samtools flagstat output + pattern: "*.{flagstat}" + - idxstats: + type: file + description: File containing samtools idxstats output + pattern: "*.{idxstats}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@JoseEspinosa" +maintainers: + - "@JoseEspinosa" diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/main.nf b/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/main.nf new file mode 100644 index 00000000..db2e5b32 --- /dev/null +++ b/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/main.nf @@ -0,0 +1,123 @@ +// +// Read QC, UMI extraction and trimming +// + +include { FASTQC } from '../../../modules/nf-core/fastqc/main' +include { UMITOOLS_EXTRACT } from '../../../modules/nf-core/umitools/extract/main' +include { TRIMGALORE } from '../../../modules/nf-core/trimgalore/main' + +// +// Function that parses TrimGalore log output file to get total number of reads after trimming +// +def getTrimGaloreReadsAfterFiltering(log_file) { + def total_reads = 0 + def filtered_reads = 0 + log_file.eachLine { line -> + def total_reads_matcher = line =~ /([\d\.]+)\ssequences processed in total/ + def filtered_reads_matcher = line =~ /shorter than the length cutoff[^:]+:\s([\d\.]+)/ + if (total_reads_matcher) total_reads = total_reads_matcher[0][1].toFloat() + if (filtered_reads_matcher) filtered_reads = filtered_reads_matcher[0][1].toFloat() + } + return total_reads - filtered_reads +} + +workflow FASTQ_FASTQC_UMITOOLS_TRIMGALORE { + take: + reads // channel: [ val(meta), [ reads ] ] + skip_fastqc // boolean: true/false + with_umi // boolean: true/false + skip_umi_extract // boolean: true/false + skip_trimming // boolean: true/false + umi_discard_read // integer: 0, 1 or 2 + min_trimmed_reads // integer: > 0 + + main: + ch_versions = Channel.empty() + fastqc_html = Channel.empty() + fastqc_zip = Channel.empty() + if (!skip_fastqc) { + FASTQC (reads) + fastqc_html = FASTQC.out.html + fastqc_zip = FASTQC.out.zip + ch_versions = ch_versions.mix(FASTQC.out.versions.first()) + } + + umi_reads = reads + umi_log = Channel.empty() + if (with_umi && !skip_umi_extract) { + UMITOOLS_EXTRACT (reads) + umi_reads = UMITOOLS_EXTRACT.out.reads + umi_log = UMITOOLS_EXTRACT.out.log + ch_versions = ch_versions.mix(UMITOOLS_EXTRACT.out.versions.first()) + + // Discard R1 / R2 if required + if (umi_discard_read in [1,2]) { + UMITOOLS_EXTRACT + .out + .reads + .map { + meta, reads -> + meta.single_end ? [ meta, reads ] : [ meta + ['single_end': true], reads[umi_discard_read % 2] ] + } + .set { umi_reads } + } + } + + trim_reads = umi_reads + trim_unpaired = Channel.empty() + trim_html = Channel.empty() + trim_zip = Channel.empty() + trim_log = Channel.empty() + trim_read_count = Channel.empty() + if (!skip_trimming) { + TRIMGALORE (umi_reads) + trim_unpaired = TRIMGALORE.out.unpaired + trim_html = TRIMGALORE.out.html + trim_zip = TRIMGALORE.out.zip + trim_log = TRIMGALORE.out.log + ch_versions = ch_versions.mix(TRIMGALORE.out.versions.first()) + + // + // Filter FastQ files based on minimum trimmed read count after adapter trimming + // + TRIMGALORE + .out + .reads + .join(trim_log, remainder: true) + .map { + meta, reads, trim_log -> + if (trim_log) { + num_reads = getTrimGaloreReadsAfterFiltering(meta.single_end ? trim_log : trim_log[-1]) + [ meta, reads, num_reads ] + } else { + [ meta, reads, min_trimmed_reads.toFloat() + 1 ] + } + } + .set { ch_num_trimmed_reads } + + ch_num_trimmed_reads + .filter { meta, reads, num_reads -> num_reads >= min_trimmed_reads.toFloat() } + .map { meta, reads, num_reads -> [ meta, reads ] } + .set { trim_reads } + + ch_num_trimmed_reads + .map { meta, reads, num_reads -> [ meta, num_reads ] } + .set { trim_read_count } + } + + emit: + reads = trim_reads // channel: [ val(meta), [ reads ] ] + + fastqc_html // channel: [ val(meta), [ html ] ] + fastqc_zip // channel: [ val(meta), [ zip ] ] + + umi_log // channel: [ val(meta), [ log ] ] + + trim_unpaired // channel: [ val(meta), [ reads ] ] + trim_html // channel: [ val(meta), [ html ] ] + trim_zip // channel: [ val(meta), [ zip ] ] + trim_log // channel: [ val(meta), [ txt ] ] + trim_read_count // channel: [ val(meta), val(count) ] + + versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/meta.yml b/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/meta.yml new file mode 100644 index 00000000..a7df97f7 --- /dev/null +++ b/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/meta.yml @@ -0,0 +1,101 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "fastq_fastqc_umitools_trimgalore" +description: Read QC, UMI extraction and trimming +keywords: + - fastq + - fastqc + - qc + - UMI + - trimming + - trimgalore +components: + - fastqc + - umitools/extract + - trimgalore +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - skip_fastqc: + type: boolean + description: | + Skip fastqc process + - with_umi: + type: boolean + description: | + With or without umi detection + - skip_umi_extract: + type: boolean + description: | + With or without umi extrection + - skip_trimming: + type: boolean + description: | + Allows to skip trimgalore execution + - umi_discard_read: + type: integer + description: | + Discard R1 / R2 if required + - min_trimmed_reads: + type: integer + description: | + Inputs with fewer than this reads will be filtered out of the "reads" output channel +output: + - reads: + type: file + description: > + Extracted FASTQ files. | For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. | + + + + For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz. + pattern: "*.{fastq.gz}" + - fastqc_html: + type: file + description: FastQC report + pattern: "*_{fastqc.html}" + - fastqc_zip: + type: file + description: FastQC report archive + pattern: "*_{fastqc.zip}" + - log: + type: file + description: Logfile for umi_tools + pattern: "*.{log}" + - trim_unpaired: + type: file + description: | + FastQ files containing unpaired reads from read 1 or read 2 + pattern: "*unpaired*.fq.gz" + - trim_html: + type: file + description: FastQC report (optional) + pattern: "*_{fastqc.html}" + - trim_zip: + type: file + description: FastQC report archive (optional) + pattern: "*_{fastqc.zip}" + - trim_log: + type: file + description: Trim Galore! trimming report + pattern: "*_{report.txt}" + - trim_read_count: + type: integer + description: Number of reads remaining after trimming for all input samples + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@KamilMaliszArdigen" +maintainers: + - "@drpatelh" + - "@KamilMaliszArdigen" diff --git a/subworkflows/nf-core/fastqc_trimgalore.nf b/subworkflows/nf-core/fastqc_trimgalore.nf deleted file mode 100644 index 966541ca..00000000 --- a/subworkflows/nf-core/fastqc_trimgalore.nf +++ /dev/null @@ -1,48 +0,0 @@ -// -// Read QC and trimming -// - -include { FASTQC } from '../../modules/nf-core/modules/fastqc/main' -include { TRIMGALORE } from '../../modules/nf-core/modules/trimgalore/main' - -workflow FASTQC_TRIMGALORE { - take: - reads // channel: [ val(meta), [ reads ] ] - skip_fastqc // boolean: true/false - skip_trimming // boolean: true/false - - main: - - ch_versions = Channel.empty() - fastqc_html = Channel.empty() - fastqc_zip = Channel.empty() - if (!skip_fastqc) { - FASTQC ( reads ).html.set { fastqc_html } - fastqc_zip = FASTQC.out.zip - ch_versions = ch_versions.mix(FASTQC.out.versions.first()) - } - - trim_reads = reads - trim_html = Channel.empty() - trim_zip = Channel.empty() - trim_log = Channel.empty() - if (!skip_trimming) { - TRIMGALORE ( reads ).reads.set { trim_reads } - trim_html = TRIMGALORE.out.html - trim_zip = TRIMGALORE.out.zip - trim_log = TRIMGALORE.out.log - ch_versions = ch_versions.mix(TRIMGALORE.out.versions.first()) - } - - emit: - reads = trim_reads // channel: [ val(meta), [ reads ] ] - - fastqc_html // channel: [ val(meta), [ html ] ] - fastqc_zip // channel: [ val(meta), [ zip ] ] - - trim_html // channel: [ val(meta), [ html ] ] - trim_zip // channel: [ val(meta), [ zip ] ] - trim_log // channel: [ val(meta), [ txt ] ] - - versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ] -} diff --git a/subworkflows/nf-core/mark_duplicates_picard.nf b/subworkflows/nf-core/mark_duplicates_picard.nf deleted file mode 100644 index 33e88bf5..00000000 --- a/subworkflows/nf-core/mark_duplicates_picard.nf +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Picard MarkDuplicates, sort, index BAM file and run samtools stats, flagstat and idxstats - */ - -include { PICARD_MARKDUPLICATES } from '../../modules/nf-core/modules/picard/markduplicates/main' -include { SAMTOOLS_INDEX } from '../../modules/nf-core/modules/samtools/index/main' -include { BAM_STATS_SAMTOOLS } from './bam_stats_samtools' - -workflow MARK_DUPLICATES_PICARD { - take: - bam // channel: [ val(meta), [ bam ] ] - - main: - - ch_versions = Channel.empty() - - // - // Picard MarkDuplicates - // - PICARD_MARKDUPLICATES(bam) - ch_versions = ch_versions.mix(PICARD_MARKDUPLICATES.out.versions.first()) - - // - // Index BAM file and run samtools stats, flagstat and idxstats - // - SAMTOOLS_INDEX(PICARD_MARKDUPLICATES.out.bam) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) - - BAM_STATS_SAMTOOLS(PICARD_MARKDUPLICATES.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0])) - ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) - - emit: - bam = PICARD_MARKDUPLICATES.out.bam // channel: [ val(meta), [ bam ] ] - metrics = PICARD_MARKDUPLICATES.out.metrics // channel: [ val(meta), [ metrics ] ] - - bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] - stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf new file mode 100644 index 00000000..ac31f28f --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf @@ -0,0 +1,126 @@ +// +// Subworkflow with functionality that may be useful for any Nextflow pipeline +// + +import org.yaml.snakeyaml.Yaml +import groovy.json.JsonOutput +import nextflow.extension.FilesEx + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NEXTFLOW_PIPELINE { + + take: + print_version // boolean: print version + dump_parameters // boolean: dump parameters + outdir // path: base directory used to publish pipeline results + check_conda_channels // boolean: check conda channels + + main: + + // + // Print workflow version and exit on --version + // + if (print_version) { + log.info "${workflow.manifest.name} ${getWorkflowVersion()}" + System.exit(0) + } + + // + // Dump pipeline parameters to a JSON file + // + if (dump_parameters && outdir) { + dumpParametersToJSON(outdir) + } + + // + // When running with Conda, warn if channels have not been set-up appropriately + // + if (check_conda_channels) { + checkCondaChannels() + } + + emit: + dummy_emit = true +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Generate version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Dump pipeline parameters to a JSON file +// +def dumpParametersToJSON(outdir) { + def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") + def jsonStr = JsonOutput.toJson(params) + temp_pf.text = JsonOutput.prettyPrint(jsonStr) + + FilesEx.copyTo(temp_pf.toPath(), "${outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() +} + +// +// When running with -profile conda, warn if channels have not been set-up appropriately +// +def checkCondaChannels() { + Yaml parser = new Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } catch(NullPointerException | IOException e) { + log.warn "Could not verify conda channel configuration." + return + } + + // Check that all channels are present + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean + + // Check that they are in the right order + def channel_priority_violation = false + def n = required_channels_in_order.size() + for (int i = 0; i < n - 1; i++) { + channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) + } + + if (channels_missing | channel_priority_violation) { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " There is a problem with your Conda configuration!\n\n" + + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + + " Please refer to https://bioconda.github.io/\n" + + " The observed channel order is \n" + + " ${channels}\n" + + " but the following channel order is required:\n" + + " ${required_channels_in_order}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml new file mode 100644 index 00000000..e5c3a0a8 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml @@ -0,0 +1,38 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NEXTFLOW_PIPELINE" +description: Subworkflow with functionality that may be useful for any Nextflow pipeline +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - print_version: + type: boolean + description: | + Print the version of the pipeline and exit + - dump_parameters: + type: boolean + description: | + Dump the parameters of the pipeline to a JSON file + - output_directory: + type: directory + description: Path to output dir to write JSON file to. + pattern: "results/" + - check_conda_channel: + type: boolean + description: | + Check if the conda channel priority is correct. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" + - "@drpatelh" +maintainers: + - "@adamrtalbot" + - "@drpatelh" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test new file mode 100644 index 00000000..68718e4f --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test @@ -0,0 +1,54 @@ + +nextflow_function { + + name "Test Functions" + script "subworkflows/nf-core/utils_nextflow_pipeline/main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Test Function getWorkflowVersion") { + + function "getWorkflowVersion" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dumpParametersToJSON") { + + function "dumpParametersToJSON" + + when { + function { + """ + // define inputs of the function here. Example: + input[0] = "$outputDir" + """.stripIndent() + } + } + + then { + assertAll( + { assert function.success } + ) + } + } + + test("Test Function checkCondaChannels") { + + function "checkCondaChannels" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 00000000..e3f0baf4 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,20 @@ +{ + "Test Function getWorkflowVersion": { + "content": [ + "v9.9.9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:05.308243" + }, + "Test Function checkCondaChannels": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:12.425833" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test new file mode 100644 index 00000000..ca964ce8 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,111 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NEXTFLOW_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + workflow "UTILS_NEXTFLOW_PIPELINE" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Should run no inputs") { + + when { + workflow { + """ + print_version = false + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should print version") { + + when { + workflow { + """ + print_version = true + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.stdout.contains("nextflow_workflow v9.9.9") } + ) + } + } + + test("Should dump params") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = 'results' + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should not create params JSON if no output directory") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = null + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config new file mode 100644 index 00000000..d0a926bf --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml new file mode 100644 index 00000000..f8476112 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nextflow_pipeline: + - subworkflows/nf-core/utils_nextflow_pipeline/** diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf new file mode 100644 index 00000000..14558c39 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -0,0 +1,446 @@ +// +// Subworkflow with utility functions specific to the nf-core pipeline template +// + +import org.yaml.snakeyaml.Yaml +import nextflow.extension.FilesEx + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFCORE_PIPELINE { + + take: + nextflow_cli_args + + main: + valid_config = checkConfigProvided() + checkProfileProvided(nextflow_cli_args) + + emit: + valid_config +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Warn if a -profile or Nextflow config has not been provided to run the pipeline +// +def checkConfigProvided() { + valid_config = true + if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { + log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + + " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + + " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + + " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + + "Please refer to the quick start section and usage docs for the pipeline.\n " + valid_config = false + } + return valid_config +} + +// +// Exit pipeline if --profile contains spaces +// +def checkProfileProvided(nextflow_cli_args) { + if (workflow.profile.endsWith(',')) { + error "The `-profile` option cannot end with a trailing comma, please remove it and re-run the pipeline!\n" + + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + } + if (nextflow_cli_args[0]) { + log.warn "nf-core pipelines do not accept positional arguments. The positional argument `${nextflow_cli_args[0]}` has been detected.\n" + + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + } +} + +// +// Citation string for pipeline +// +def workflowCitation() { + def temp_doi_ref = "" + String[] manifest_doi = workflow.manifest.doi.tokenize(",") + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + for (String doi_ref: manifest_doi) temp_doi_ref += " https://doi.org/${doi_ref.replace('https://doi.org/', '').replace(' ', '')}\n" + return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + + "* The pipeline\n" + + temp_doi_ref + "\n" + + "* The nf-core framework\n" + + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + + "* Software dependencies\n" + + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" +} + +// +// Generate workflow version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Get software versions for pipeline +// +def processVersionsFromYAML(yaml_file) { + Yaml yaml = new Yaml() + versions = yaml.load(yaml_file).collectEntries { k, v -> [ k.tokenize(':')[-1], v ] } + return yaml.dumpAsMap(versions).trim() +} + +// +// Get workflow version for pipeline +// +def workflowVersionToYAML() { + return """ + Workflow: + $workflow.manifest.name: ${getWorkflowVersion()} + Nextflow: $workflow.nextflow.version + """.stripIndent().trim() +} + +// +// Get channel of software versions used in pipeline in YAML format +// +def softwareVersionsToYAML(ch_versions) { + return ch_versions + .unique() + .map { processVersionsFromYAML(it) } + .unique() + .mix(Channel.of(workflowVersionToYAML())) +} + +// +// Get workflow summary for MultiQC +// +def paramsSummaryMultiqc(summary_params) { + def summary_section = '' + for (group in summary_params.keySet()) { + def group_params = summary_params.get(group) // This gets the parameters of that particular group + if (group_params) { + summary_section += "

    $group

    \n" + summary_section += "
    \n" + for (param in group_params.keySet()) { + summary_section += "
    $param
    ${group_params.get(param) ?: 'N/A'}
    \n" + } + summary_section += "
    \n" + } + } + + String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" + + return yaml_file_text +} + +// +// nf-core logo +// +def nfCoreLogo(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + String.format( + """\n + ${dashedLine(monochrome_logs)} + ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} + ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} + ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} + ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} + ${colors.green}`._,._,\'${colors.reset} + ${colors.purple} ${workflow.manifest.name} ${getWorkflowVersion()}${colors.reset} + ${dashedLine(monochrome_logs)} + """.stripIndent() + ) +} + +// +// Return dashed line +// +def dashedLine(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + return "-${colors.dim}----------------------------------------------------${colors.reset}-" +} + +// +// ANSII colours used for terminal logging +// +def logColours(monochrome_logs=true) { + Map colorcodes = [:] + + // Reset / Meta + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" + colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" + colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" + colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" + + // Regular Colors + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + + // Bold + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + + // Underline + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + + // High Intensity + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + + // Bold High Intensity + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + + return colorcodes +} + +// +// Attach the multiqc report to email +// +def attachMultiqcReport(multiqc_report) { + def mqc_report = null + try { + if (workflow.success) { + mqc_report = multiqc_report.getVal() + if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { + if (mqc_report.size() > 1) { + log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" + } + mqc_report = mqc_report[0] + } + } + } catch (all) { + if (multiqc_report) { + log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" + } + } + return mqc_report +} + +// +// Construct and send completion email +// +def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs=true, multiqc_report=null) { + + // Set up the e-mail variables + def subject = "[$workflow.manifest.name] Successful: $workflow.runName" + if (!workflow.success) { + subject = "[$workflow.manifest.name] FAILED: $workflow.runName" + } + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository + if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId + if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + def email_fields = [:] + email_fields['version'] = getWorkflowVersion() + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + + // On success try attach the multiqc report + def mqc_report = attachMultiqcReport(multiqc_report) + + // Check if we are only sending emails on failure + def email_address = email + if (!email && email_on_fail && !workflow.success) { + email_address = email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("${workflow.projectDir}/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("${workflow.projectDir}/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit + def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] + def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + Map colors = logColours(monochrome_logs) + if (email_address) { + try { + if (plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } + // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } + [ 'sendmail', '-t' ].execute() << sendmail_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" + } catch (all) { + // Catch failures and try with plaintext + def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] + mail_cmd.execute() << email_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" + } + } + + // Write summary e-mail HTML to a file + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + FilesEx.copyTo(output_hf.toPath(), "${outdir}/pipeline_info/pipeline_report.html"); + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + FilesEx.copyTo(output_tf.toPath(), "${outdir}/pipeline_info/pipeline_report.txt"); + output_tf.delete() +} + +// +// Print pipeline summary on completion +// +def completionSummary(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + if (workflow.success) { + if (workflow.stats.ignoredCount == 0) { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + } + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" + } +} + +// +// Construct and send a notification to a web server as JSON e.g. Microsoft Teams and Slack +// +def imNotification(summary_params, hook_url) { + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) misc_fields['repository'] = workflow.repository + if (workflow.commitId) misc_fields['commitid'] = workflow.commitId + if (workflow.revision) misc_fields['revision'] = workflow.revision + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = getWorkflowVersion() + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + // Different JSON depending on the service provider + // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("${workflow.projectDir}/assets/${json_path}") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection(); + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")); + def postRC = post.getResponseCode(); + if (! postRC.equals(200)) { + log.warn(post.getErrorStream().getText()); + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml new file mode 100644 index 00000000..d08d2434 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml @@ -0,0 +1,24 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFCORE_PIPELINE" +description: Subworkflow with utility functions specific to the nf-core pipeline template +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - nextflow_cli_args: + type: list + description: | + Nextflow CLI positional arguments +output: + - success: + type: boolean + description: | + Dummy output to indicate success +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test new file mode 100644 index 00000000..1dc317f8 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test @@ -0,0 +1,134 @@ + +nextflow_function { + + name "Test Functions" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Test Function checkConfigProvided") { + + function "checkConfigProvided" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function checkProfileProvided") { + + function "checkProfileProvided" + + when { + function { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function workflowCitation") { + + function "workflowCitation" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function nfCoreLogo") { + + function "nfCoreLogo" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dashedLine") { + + function "dashedLine" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function without logColours") { + + function "logColours" + + when { + function { + """ + input[0] = true + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function with logColours") { + function "logColours" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 00000000..1037232c --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,166 @@ +{ + "Test Function checkProfileProvided": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:03.360873" + }, + "Test Function checkConfigProvided": { + "content": [ + true + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:59.729647" + }, + "Test Function nfCoreLogo": { + "content": [ + "\n\n-\u001b[2m----------------------------------------------------\u001b[0m-\n \u001b[0;32m,--.\u001b[0;30m/\u001b[0;32m,-.\u001b[0m\n\u001b[0;34m ___ __ __ __ ___ \u001b[0;32m/,-._.--~'\u001b[0m\n\u001b[0;34m |\\ | |__ __ / ` / \\ |__) |__ \u001b[0;33m} {\u001b[0m\n\u001b[0;34m | \\| | \\__, \\__/ | \\ |___ \u001b[0;32m\\`-._,-`-,\u001b[0m\n \u001b[0;32m`._,._,'\u001b[0m\n\u001b[0;35m nextflow_workflow v9.9.9\u001b[0m\n-\u001b[2m----------------------------------------------------\u001b[0m-\n" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:10.562934" + }, + "Test Function workflowCitation": { + "content": [ + "If you use nextflow_workflow for your analysis please cite:\n\n* The pipeline\n https://doi.org/10.5281/zenodo.5070524\n\n* The nf-core framework\n https://doi.org/10.1038/s41587-020-0439-x\n\n* Software dependencies\n https://github.com/nextflow_workflow/blob/master/CITATIONS.md" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:07.019761" + }, + "Test Function without logColours": { + "content": [ + { + "reset": "", + "bold": "", + "dim": "", + "underlined": "", + "blink": "", + "reverse": "", + "hidden": "", + "black": "", + "red": "", + "green": "", + "yellow": "", + "blue": "", + "purple": "", + "cyan": "", + "white": "", + "bblack": "", + "bred": "", + "bgreen": "", + "byellow": "", + "bblue": "", + "bpurple": "", + "bcyan": "", + "bwhite": "", + "ublack": "", + "ured": "", + "ugreen": "", + "uyellow": "", + "ublue": "", + "upurple": "", + "ucyan": "", + "uwhite": "", + "iblack": "", + "ired": "", + "igreen": "", + "iyellow": "", + "iblue": "", + "ipurple": "", + "icyan": "", + "iwhite": "", + "biblack": "", + "bired": "", + "bigreen": "", + "biyellow": "", + "biblue": "", + "bipurple": "", + "bicyan": "", + "biwhite": "" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:17.969323" + }, + "Test Function dashedLine": { + "content": [ + "-\u001b[2m----------------------------------------------------\u001b[0m-" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:14.366181" + }, + "Test Function with logColours": { + "content": [ + { + "reset": "\u001b[0m", + "bold": "\u001b[1m", + "dim": "\u001b[2m", + "underlined": "\u001b[4m", + "blink": "\u001b[5m", + "reverse": "\u001b[7m", + "hidden": "\u001b[8m", + "black": "\u001b[0;30m", + "red": "\u001b[0;31m", + "green": "\u001b[0;32m", + "yellow": "\u001b[0;33m", + "blue": "\u001b[0;34m", + "purple": "\u001b[0;35m", + "cyan": "\u001b[0;36m", + "white": "\u001b[0;37m", + "bblack": "\u001b[1;30m", + "bred": "\u001b[1;31m", + "bgreen": "\u001b[1;32m", + "byellow": "\u001b[1;33m", + "bblue": "\u001b[1;34m", + "bpurple": "\u001b[1;35m", + "bcyan": "\u001b[1;36m", + "bwhite": "\u001b[1;37m", + "ublack": "\u001b[4;30m", + "ured": "\u001b[4;31m", + "ugreen": "\u001b[4;32m", + "uyellow": "\u001b[4;33m", + "ublue": "\u001b[4;34m", + "upurple": "\u001b[4;35m", + "ucyan": "\u001b[4;36m", + "uwhite": "\u001b[4;37m", + "iblack": "\u001b[0;90m", + "ired": "\u001b[0;91m", + "igreen": "\u001b[0;92m", + "iyellow": "\u001b[0;93m", + "iblue": "\u001b[0;94m", + "ipurple": "\u001b[0;95m", + "icyan": "\u001b[0;96m", + "iwhite": "\u001b[0;97m", + "biblack": "\u001b[1;90m", + "bired": "\u001b[1;91m", + "bigreen": "\u001b[1;92m", + "biyellow": "\u001b[1;93m", + "biblue": "\u001b[1;94m", + "bipurple": "\u001b[1;95m", + "bicyan": "\u001b[1;96m", + "biwhite": "\u001b[1;97m" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:21.714424" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test new file mode 100644 index 00000000..8940d32d --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,29 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFCORE_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + workflow "UTILS_NFCORE_PIPELINE" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Should run without failures") { + + when { + workflow { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap new file mode 100644 index 00000000..859d1030 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap @@ -0,0 +1,19 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + true + ], + "valid_config": [ + true + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:25.726491" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config new file mode 100644 index 00000000..d0a926bf --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml new file mode 100644 index 00000000..ac8523c9 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfcore_pipeline: + - subworkflows/nf-core/utils_nfcore_pipeline/** diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf new file mode 100644 index 00000000..2585b65d --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf @@ -0,0 +1,62 @@ +// +// Subworkflow that uses the nf-validation plugin to render help text and parameter summary +// + +/* +======================================================================================== + IMPORT NF-VALIDATION PLUGIN +======================================================================================== +*/ + +include { paramsHelp } from 'plugin/nf-validation' +include { paramsSummaryLog } from 'plugin/nf-validation' +include { validateParameters } from 'plugin/nf-validation' + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFVALIDATION_PLUGIN { + + take: + print_help // boolean: print help + workflow_command // string: default commmand used to run pipeline + pre_help_text // string: string to be printed before help text and summary log + post_help_text // string: string to be printed after help text and summary log + validate_params // boolean: validate parameters + schema_filename // path: JSON schema file, null to use default value + + main: + + log.debug "Using schema file: ${schema_filename}" + + // Default values for strings + pre_help_text = pre_help_text ?: '' + post_help_text = post_help_text ?: '' + workflow_command = workflow_command ?: '' + + // + // Print help message if needed + // + if (print_help) { + log.info pre_help_text + paramsHelp(workflow_command, parameters_schema: schema_filename) + post_help_text + System.exit(0) + } + + // + // Print parameter summary to stdout + // + log.info pre_help_text + paramsSummaryLog(workflow, parameters_schema: schema_filename) + post_help_text + + // + // Validate parameters relative to the parameter JSON schema + // + if (validate_params){ + validateParameters(parameters_schema: schema_filename) + } + + emit: + dummy_emit = true +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml new file mode 100644 index 00000000..3d4a6b04 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml @@ -0,0 +1,44 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFVALIDATION_PLUGIN" +description: Use nf-validation to initiate and validate a pipeline +keywords: + - utility + - pipeline + - initialise + - validation +components: [] +input: + - print_help: + type: boolean + description: | + Print help message and exit + - workflow_command: + type: string + description: | + The command to run the workflow e.g. "nextflow run main.nf" + - pre_help_text: + type: string + description: | + Text to print before the help message + - post_help_text: + type: string + description: | + Text to print after the help message + - validate_params: + type: boolean + description: | + Validate the parameters and error if invalid. + - schema_filename: + type: string + description: | + The filename of the schema to validate against. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test new file mode 100644 index 00000000..5784a33f --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test @@ -0,0 +1,200 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFVALIDATION_PLUGIN" + script "../main.nf" + workflow "UTILS_NFVALIDATION_PLUGIN" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "plugin/nf-validation" + tag "'plugin/nf-validation'" + tag "utils_nfvalidation_plugin" + tag "subworkflows/utils_nfvalidation_plugin" + + test("Should run nothing") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should run help") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with command") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with extra text") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = "pre-help-text" + post_help_text = "post-help-text" + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('pre-help-text') } }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } }, + { assert workflow.stdout.any { it.contains('post-help-text') } } + ) + } + } + + test("Should validate params") { + + when { + + params { + monochrome_logs = true + test_data = '' + outdir = 1 + } + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = true + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.stdout.any { it.contains('ERROR ~ ERROR: Validation of pipeline parameters failed!') } } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json new file mode 100644 index 00000000..7626c1c9 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json @@ -0,0 +1,96 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/./master/nextflow_schema.json", + "title": ". pipeline parameters", + "description": "", + "type": "object", + "definitions": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": ["outdir"], + "properties": { + "validate_params": { + "type": "boolean", + "description": "Validate parameters?", + "default": true, + "hidden": true + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open" + }, + "test_data_base": { + "type": "string", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/modules", + "description": "Base for test data directory", + "hidden": true + }, + "test_data": { + "type": "string", + "description": "Fake test data param", + "hidden": true + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "logo": { + "type": "boolean", + "default": true, + "description": "Display nf-core logo in console output.", + "fa_icon": "fas fa-image", + "hidden": true + }, + "singularity_pull_docker_container": { + "type": "boolean", + "description": "Pull Singularity container from Docker?", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Use monochrome_logs", + "hidden": true + } + } + } + }, + "allOf": [ + { + "$ref": "#/definitions/input_output_options" + }, + { + "$ref": "#/definitions/generic_options" + } + ] +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml new file mode 100644 index 00000000..60b1cfff --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfvalidation_plugin: + - subworkflows/nf-core/utils_nfvalidation_plugin/** diff --git a/tower.yml b/tower.yml index 5b1f5f90..5b981d67 100644 --- a/tower.yml +++ b/tower.yml @@ -1,9 +1,9 @@ reports: multiqc_report.html: display: "MultiQC HTML report" - macs2_peak.plots.pdf: - display: "All samples MACS2 peak QC PDF plots" - macs2_annotatePeaks.plots.pdf: + macs3_peak.plots.pdf: + display: "All samples MACS3 peak QC PDF plots" + macs3_annotatePeaks.plots.pdf: display: "All samples HOMER annotatePeaks.pl QC PDF plots" "*.consensus_peaks.plots.pdf": display: "Consensus peaks DESeq2 QC PDF plots" @@ -14,6 +14,6 @@ reports: "*.plotHeatmap.pdf": display: "Per-sample deepTools plotHeatmap PDF plots" "*_peaks.broadPeak": - display: "Per-sample MACS2 broadPeak file" + display: "Per-sample MACS3 broadPeak file" "*_peaks.narrowPeak": - display: "Per-sample MACS2 narrowPeak file" + display: "Per-sample MACS3 narrowPeak file" diff --git a/workflows/chipseq.nf b/workflows/chipseq.nf index ffe99518..491d5f53 100644 --- a/workflows/chipseq.nf +++ b/workflows/chipseq.nf @@ -1,86 +1,29 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE INPUTS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -def valid_params = [ - aligners : [ 'bwa', 'bowtie2', 'chromap', 'star' ] -] - -def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) - -// Validate input parameters -WorkflowChipseq.initialise(params, log, valid_params) - -// Check input path parameters to see if they exist -def checkPathParamList = [ - params.input, params.multiqc_config, - params.fasta, - params.gtf, params.gff, params.gene_bed, - params.bwa_index, params.bowtie2_index, params.chromap_index, params.star_index, - params.blacklist, - params.bamtools_filter_pe_config, params.bamtools_filter_se_config -] -for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } - -// Check mandatory parameters -if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } - -// Save AWS IGenomes file containing annotation version -def anno_readme = params.genomes[ params.genome ]?.readme -if (anno_readme && file(anno_readme).exists()) { - file("${params.outdir}/genome/").mkdirs() - file(anno_readme).copyTo("${params.outdir}/genome/") -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - CONFIG FILES -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -ch_multiqc_config = file("$projectDir/assets/multiqc_config.yml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config) : Channel.empty() - -// JSON files required by BAMTools for alignment filtering -ch_bamtools_filter_se_config = file(params.bamtools_filter_se_config, checkIfExists: true) -ch_bamtools_filter_pe_config = file(params.bamtools_filter_pe_config, checkIfExists: true) - -// Header files for MultiQC -ch_spp_nsc_header = file("$projectDir/assets/multiqc/spp_nsc_header.txt", checkIfExists: true) -ch_spp_rsc_header = file("$projectDir/assets/multiqc/spp_rsc_header.txt", checkIfExists: true) -ch_spp_correlation_header = file("$projectDir/assets/multiqc/spp_correlation_header.txt", checkIfExists: true) -ch_peak_count_header = file("$projectDir/assets/multiqc/peak_count_header.txt", checkIfExists: true) -ch_frip_score_header = file("$projectDir/assets/multiqc/frip_score_header.txt", checkIfExists: true) -ch_peak_annotation_header = file("$projectDir/assets/multiqc/peak_annotation_header.txt", checkIfExists: true) -ch_deseq2_pca_header = file("$projectDir/assets/multiqc/deseq2_pca_header.txt", checkIfExists: true) -ch_deseq2_clustering_header = file("$projectDir/assets/multiqc/deseq2_clustering_header.txt", checkIfExists: true) - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT LOCAL MODULES/SUBWORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { BEDTOOLS_GENOMECOV } from '../modules/local/bedtools_genomecov' -include { FRIP_SCORE } from '../modules/local/frip_score' -include { PLOT_MACS2_QC } from '../modules/local/plot_macs2_qc' -include { PLOT_HOMER_ANNOTATEPEAKS } from '../modules/local/plot_homer_annotatepeaks' -include { MACS2_CONSENSUS } from '../modules/local/macs2_consensus' -include { ANNOTATE_BOOLEAN_PEAKS } from '../modules/local/annotate_boolean_peaks' -include { DESEQ2_QC } from '../modules/local/deseq2_qc' +// +// MODULE: Loaded from modules/local/ +// include { IGV } from '../modules/local/igv' include { MULTIQC } from '../modules/local/multiqc' include { MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS } from '../modules/local/multiqc_custom_phantompeakqualtools' -include { MULTIQC_CUSTOM_PEAKS } from '../modules/local/multiqc_custom_peaks' // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // -include { INPUT_CHECK } from '../subworkflows/local/input_check' -include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' -include { FILTER_BAM_BAMTOOLS } from '../subworkflows/local/filter_bam_bamtools' +include { paramsSummaryMap } from 'plugin/nf-validation' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_chipseq_pipeline' +include { INPUT_CHECK } from '../subworkflows/local/input_check' +include { ALIGN_STAR } from '../subworkflows/local/align_star' +include { BAM_FILTER_BAMTOOLS } from '../subworkflows/local/bam_filter_bamtools' +include { BAM_BEDGRAPH_BIGWIG_BEDTOOLS_UCSC } from '../subworkflows/local/bam_bedgraph_bigwig_bedtools_ucsc' +include { BAM_PEAKS_CALL_QC_ANNOTATE_MACS3_HOMER } from '../subworkflows/local/bam_peaks_call_qc_annotate_macs3_homer.nf' +include { BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2 } from '../subworkflows/local/bed_consensus_quantify_qc_bedtools_featurecounts_deseq2.nf' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -92,33 +35,25 @@ include { FILTER_BAM_BAMTOOLS } from '../subworkflows/local/filter_bam_bamtools' // MODULE: Installed directly from nf-core/modules // -include { PICARD_MERGESAMFILES } from '../modules/nf-core/modules/picard/mergesamfiles/main' -include { PICARD_COLLECTMULTIPLEMETRICS } from '../modules/nf-core/modules/picard/collectmultiplemetrics/main' -include { PRESEQ_LCEXTRAP } from '../modules/nf-core/modules/preseq/lcextrap/main' -include { PHANTOMPEAKQUALTOOLS } from '../modules/nf-core/modules/phantompeakqualtools/main' -include { UCSC_BEDGRAPHTOBIGWIG } from '../modules/nf-core/modules/ucsc/bedgraphtobigwig/main' -include { DEEPTOOLS_COMPUTEMATRIX } from '../modules/nf-core/modules/deeptools/computematrix/main' -include { DEEPTOOLS_PLOTPROFILE } from '../modules/nf-core/modules/deeptools/plotprofile/main' -include { DEEPTOOLS_PLOTHEATMAP } from '../modules/nf-core/modules/deeptools/plotheatmap/main' -include { DEEPTOOLS_PLOTFINGERPRINT } from '../modules/nf-core/modules/deeptools/plotfingerprint/main' -include { KHMER_UNIQUEKMERS } from '../modules/nf-core/modules/khmer/uniquekmers/main' -include { MACS2_CALLPEAK } from '../modules/nf-core/modules/macs2/callpeak/main' -include { SUBREAD_FEATURECOUNTS } from '../modules/nf-core/modules/subread/featurecounts/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/dumpsoftwareversions/main' - -include { HOMER_ANNOTATEPEAKS as HOMER_ANNOTATEPEAKS_MACS2 } from '../modules/nf-core/modules/homer/annotatepeaks/main' -include { HOMER_ANNOTATEPEAKS as HOMER_ANNOTATEPEAKS_CONSENSUS } from '../modules/nf-core/modules/homer/annotatepeaks/main' +include { PICARD_MERGESAMFILES } from '../modules/nf-core/picard/mergesamfiles/main' +include { PICARD_COLLECTMULTIPLEMETRICS } from '../modules/nf-core/picard/collectmultiplemetrics/main' +include { PRESEQ_LCEXTRAP } from '../modules/nf-core/preseq/lcextrap/main' +include { PHANTOMPEAKQUALTOOLS } from '../modules/nf-core/phantompeakqualtools/main' +include { DEEPTOOLS_COMPUTEMATRIX } from '../modules/nf-core/deeptools/computematrix/main' +include { DEEPTOOLS_PLOTPROFILE } from '../modules/nf-core/deeptools/plotprofile/main' +include { DEEPTOOLS_PLOTHEATMAP } from '../modules/nf-core/deeptools/plotheatmap/main' +include { DEEPTOOLS_PLOTFINGERPRINT } from '../modules/nf-core/deeptools/plotfingerprint/main' +include { KHMER_UNIQUEKMERS } from '../modules/nf-core/khmer/uniquekmers/main' // // SUBWORKFLOW: Consisting entirely of nf-core/modules // -include { FASTQC_TRIMGALORE } from '../subworkflows/nf-core/fastqc_trimgalore' -include { ALIGN_BWA_MEM } from '../subworkflows/nf-core/align_bwa_mem' -include { ALIGN_BOWTIE2 } from '../subworkflows/nf-core/align_bowtie2' -include { ALIGN_CHROMAP } from '../subworkflows/nf-core/align_chromap' -include { ALIGN_STAR } from '../subworkflows/nf-core/align_star' -include { MARK_DUPLICATES_PICARD } from '../subworkflows/nf-core/mark_duplicates_picard' +include { FASTQ_FASTQC_UMITOOLS_TRIMGALORE } from '../subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/main' +include { FASTQ_ALIGN_BWA } from '../subworkflows/nf-core/fastq_align_bwa/main' +include { FASTQ_ALIGN_BOWTIE2 } from '../subworkflows/nf-core/fastq_align_bowtie2/main' +include { FASTQ_ALIGN_CHROMAP } from '../subworkflows/nf-core/fastq_align_chromap/main' +include { BAM_MARKDUPLICATES_PICARD } from '../subworkflows/nf-core/bam_markduplicates_picard/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -126,39 +61,75 @@ include { MARK_DUPLICATES_PICARD } from '../subworkflows/nf-core/mark_duplicates ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// Info required for completion email and summary -def multiqc_report = [] +// JSON files required by BAMTools for alignment filtering +ch_bamtools_filter_se_config = file(params.bamtools_filter_se_config) +ch_bamtools_filter_pe_config = file(params.bamtools_filter_pe_config) + +// Header files for MultiQC +ch_spp_nsc_header = file("$projectDir/assets/multiqc/spp_nsc_header.txt", checkIfExists: true) +ch_spp_rsc_header = file("$projectDir/assets/multiqc/spp_rsc_header.txt", checkIfExists: true) +ch_spp_correlation_header = file("$projectDir/assets/multiqc/spp_correlation_header.txt", checkIfExists: true) +ch_peak_count_header = file("$projectDir/assets/multiqc/peak_count_header.txt", checkIfExists: true) +ch_frip_score_header = file("$projectDir/assets/multiqc/frip_score_header.txt", checkIfExists: true) +ch_peak_annotation_header = file("$projectDir/assets/multiqc/peak_annotation_header.txt", checkIfExists: true) +ch_deseq2_pca_header = file("$projectDir/assets/multiqc/deseq2_pca_header.txt", checkIfExists: true) +ch_deseq2_clustering_header = file("$projectDir/assets/multiqc/deseq2_clustering_header.txt", checkIfExists: true) + +// Save AWS IGenomes file containing annotation version +def anno_readme = params.genomes[ params.genome ]?.readme +if (anno_readme && file(anno_readme).exists()) { + file("${params.outdir}/genome/").mkdirs() + file(anno_readme).copyTo("${params.outdir}/genome/") +} + + +// // Info required for completion email and summary +// def multiqc_report = [] workflow CHIPSEQ { - ch_versions = Channel.empty() + take: + ch_input // channel: path(sample_sheet.csv) + ch_versions // channel: [ path(versions.yml) ] + ch_fasta // channel: path(genome.fa) + ch_fai // channel: path(genome.fai) + ch_gtf // channel: path(genome.gtf) + ch_gene_bed // channel: path(gene.beds) + ch_chrom_sizes // channel: path(chrom.sizes) + ch_filtered_bed // channel: path(filtered.bed) + ch_bwa_index // channel: path(bwa/index/) + ch_bowtie2_index // channel: path(bowtie2/index) + ch_chromap_index // channel: path(chromap.index) + ch_star_index // channel: path(star/index/) - // - // SUBWORKFLOW: Uncompress and prepare reference genome files - // - PREPARE_GENOME ( - params.aligner - ) - ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions) + main: + ch_multiqc_files = Channel.empty() // // SUBWORKFLOW: Read in samplesheet, validate and stage input files // INPUT_CHECK ( - file(params.input), + ch_input, params.seq_center ) ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) + // TODO: OPTIONAL, you can use nf-validation plugin to create an input channel from the samplesheet with Channel.fromSamplesheet("input") + // See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/ + // ! There is currently no tooling to help you write a sample sheet schema // // SUBWORKFLOW: Read QC and trim adapters // - FASTQC_TRIMGALORE ( + FASTQ_FASTQC_UMITOOLS_TRIMGALORE ( INPUT_CHECK.out.reads, params.skip_fastqc || params.skip_qc, - params.skip_trimming + false, + false, + params.skip_trimming, + 0, + 10000 ) - ch_versions = ch_versions.mix(FASTQC_TRIMGALORE.out.versions) + ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.versions) // // SUBWORKFLOW: Alignment with BWA & BAM QC @@ -169,81 +140,67 @@ workflow CHIPSEQ { ch_samtools_flagstat = Channel.empty() ch_samtools_idxstats = Channel.empty() if (params.aligner == 'bwa') { - ALIGN_BWA_MEM ( - FASTQC_TRIMGALORE.out.reads, - PREPARE_GENOME.out.bwa_index + FASTQ_ALIGN_BWA ( + FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.reads, + ch_bwa_index, + false, + ch_fasta + .map { + [ [:], it ] + } ) - ch_genome_bam = ALIGN_BWA_MEM.out.bam - ch_genome_bam_index = ALIGN_BWA_MEM.out.bai - ch_samtools_stats = ALIGN_BWA_MEM.out.stats - ch_samtools_flagstat = ALIGN_BWA_MEM.out.flagstat - ch_samtools_idxstats = ALIGN_BWA_MEM.out.idxstats - ch_versions = ch_versions.mix(ALIGN_BWA_MEM.out.versions.first()) + ch_genome_bam = FASTQ_ALIGN_BWA.out.bam + ch_genome_bam_index = FASTQ_ALIGN_BWA.out.bai + ch_samtools_stats = FASTQ_ALIGN_BWA.out.stats + ch_samtools_flagstat = FASTQ_ALIGN_BWA.out.flagstat + ch_samtools_idxstats = FASTQ_ALIGN_BWA.out.idxstats + ch_versions = ch_versions.mix(FASTQ_ALIGN_BWA.out.versions) } // // SUBWORKFLOW: Alignment with Bowtie2 & BAM QC // if (params.aligner == 'bowtie2') { - ALIGN_BOWTIE2 ( - FASTQC_TRIMGALORE.out.reads, - PREPARE_GENOME.out.bowtie2_index, - params.save_unaligned + FASTQ_ALIGN_BOWTIE2 ( + FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.reads, + ch_bowtie2_index, + params.save_unaligned, + false, + ch_fasta + .map { + [ [:], it ] + } ) - ch_genome_bam = ALIGN_BOWTIE2.out.bam - ch_genome_bam_index = ALIGN_BOWTIE2.out.bai - ch_samtools_stats = ALIGN_BOWTIE2.out.stats - ch_samtools_flagstat = ALIGN_BOWTIE2.out.flagstat - ch_samtools_idxstats = ALIGN_BOWTIE2.out.idxstats - ch_versions = ch_versions.mix(ALIGN_BOWTIE2.out.versions.first()) + ch_genome_bam = FASTQ_ALIGN_BOWTIE2.out.bam + ch_genome_bam_index = FASTQ_ALIGN_BOWTIE2.out.bai + ch_samtools_stats = FASTQ_ALIGN_BOWTIE2.out.stats + ch_samtools_flagstat = FASTQ_ALIGN_BOWTIE2.out.flagstat + ch_samtools_idxstats = FASTQ_ALIGN_BOWTIE2.out.idxstats + ch_versions = ch_versions.mix(FASTQ_ALIGN_BOWTIE2.out.versions) } // // SUBWORKFLOW: Alignment with Chromap & BAM QC // if (params.aligner == 'chromap') { - ALIGN_CHROMAP ( - FASTQC_TRIMGALORE.out.reads, - PREPARE_GENOME.out.chromap_index, - PREPARE_GENOME.out.fasta + FASTQ_ALIGN_CHROMAP ( + FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.reads, + ch_chromap_index, + ch_fasta + .map { + [ [:], it ] + }, + [], + [], + [], + [] ) - - // Filter out paired-end reads until the issue below is fixed - // https://github.com/nf-core/chipseq/issues/291 - // ch_genome_bam = ALIGN_CHROMAP.out.bam - ALIGN_CHROMAP - .out - .bam - .branch { - meta, bam -> - single_end: meta.single_end - return [ meta, bam ] - paired_end: !meta.single_end - return [ meta, bam ] - } - .set { ch_genome_bam_chromap } - - ch_genome_bam_chromap - .paired_end - .collect() - .map { - it -> - def count = it.size() - if (count > 0) { - log.warn "=============================================================================\n" + - " Paired-end files produced by chromap cannot be used by some downstream tools due to the issue below:\n" + - " https://github.com/nf-core/chipseq/issues/291\n" + - " They will be excluded from the analysis. Consider using a different aligner\n" + - "===================================================================================" - } - } - - ch_genome_bam = ch_genome_bam_chromap.single_end - ch_genome_bam_index = ALIGN_CHROMAP.out.bai - ch_samtools_stats = ALIGN_CHROMAP.out.stats - ch_samtools_flagstat = ALIGN_CHROMAP.out.flagstat - ch_samtools_idxstats = ALIGN_CHROMAP.out.idxstats - ch_versions = ch_versions.mix(ALIGN_CHROMAP.out.versions.first()) + ch_genome_bam = FASTQ_ALIGN_CHROMAP.out.bam + ch_genome_bam_index = FASTQ_ALIGN_CHROMAP.out.bai + ch_samtools_stats = FASTQ_ALIGN_CHROMAP.out.stats + ch_samtools_flagstat = FASTQ_ALIGN_CHROMAP.out.flagstat + ch_samtools_idxstats = FASTQ_ALIGN_CHROMAP.out.idxstats + ch_versions = ch_versions.mix(FASTQ_ALIGN_CHROMAP.out.versions) } // @@ -251,8 +208,13 @@ workflow CHIPSEQ { // if (params.aligner == 'star') { ALIGN_STAR ( - FASTQC_TRIMGALORE.out.reads, - PREPARE_GENOME.out.star_index + FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.reads, + ch_star_index, + ch_fasta + .map { + [ [:], it ] + }, + params.seq_center ?: '' ) ch_genome_bam = ALIGN_STAR.out.bam ch_genome_bam_index = ALIGN_STAR.out.bai @@ -273,39 +235,51 @@ workflow CHIPSEQ { meta, bam -> def meta_clone = meta.clone() meta_clone.remove('read_group') - meta_clone.id = meta_clone.id.split('_')[0..-2].join('_') - [ meta_clone, bam ] + meta_clone.id = meta_clone.id - ~/_T\d+$/ + [ meta_clone, bam ] } .groupTuple(by: [0]) - .map { - it -> - [ it[0], it[1].flatten() ] + .map { + meta, bam -> + [ meta, bam.flatten() ] } .set { ch_sort_bam } PICARD_MERGESAMFILES ( ch_sort_bam ) - ch_versions = ch_versions.mix(PICARD_MERGESAMFILES.out.versions.first().ifEmpty(null)) + ch_versions = ch_versions.mix(PICARD_MERGESAMFILES.out.versions.first()) // // SUBWORKFLOW: Mark duplicates & filter BAM files after merging // - MARK_DUPLICATES_PICARD ( - PICARD_MERGESAMFILES.out.bam + BAM_MARKDUPLICATES_PICARD ( + PICARD_MERGESAMFILES.out.bam, + ch_fasta + .map { + [ [:], it ] + }, + ch_fai + .map { + [ [:], it ] + } ) - ch_versions = ch_versions.mix(MARK_DUPLICATES_PICARD.out.versions) + ch_versions = ch_versions.mix(BAM_MARKDUPLICATES_PICARD.out.versions) // // SUBWORKFLOW: Filter BAM file with BamTools // - FILTER_BAM_BAMTOOLS ( - MARK_DUPLICATES_PICARD.out.bam.join(MARK_DUPLICATES_PICARD.out.bai, by: [0]), - PREPARE_GENOME.out.filtered_bed.first(), + BAM_FILTER_BAMTOOLS ( + BAM_MARKDUPLICATES_PICARD.out.bam.join(BAM_MARKDUPLICATES_PICARD.out.bai, by: [0]), + ch_filtered_bed.first(), + ch_fasta + .map { + [ [:], it ] + }, ch_bamtools_filter_se_config, ch_bamtools_filter_pe_config ) - ch_versions = ch_versions.mix(FILTER_BAM_BAMTOOLS.out.versions.first().ifEmpty(null)) + ch_versions = ch_versions.mix(BAM_FILTER_BAMTOOLS.out.versions) // // MODULE: Preseq coverage analysis @@ -313,7 +287,7 @@ workflow CHIPSEQ { ch_preseq_multiqc = Channel.empty() if (!params.skip_preseq) { PRESEQ_LCEXTRAP ( - MARK_DUPLICATES_PICARD.out.bam + BAM_MARKDUPLICATES_PICARD.out.bam ) ch_preseq_multiqc = PRESEQ_LCEXTRAP.out.lc_extrap ch_versions = ch_versions.mix(PRESEQ_LCEXTRAP.out.versions.first()) @@ -325,9 +299,20 @@ workflow CHIPSEQ { ch_picardcollectmultiplemetrics_multiqc = Channel.empty() if (!params.skip_picard_metrics) { PICARD_COLLECTMULTIPLEMETRICS ( - FILTER_BAM_BAMTOOLS.out.bam, - PREPARE_GENOME.out.fasta, - [] + BAM_FILTER_BAMTOOLS + .out + .bam + .map { + [ it[0], it[1], [] ] + }, + ch_fasta + .map { + [ [:], it ] + }, + ch_fai + .map { + [ [:], it ] + } ) ch_picardcollectmultiplemetrics_multiqc = PICARD_COLLECTMULTIPLEMETRICS.out.metrics ch_versions = ch_versions.mix(PICARD_COLLECTMULTIPLEMETRICS.out.versions.first()) @@ -336,37 +321,40 @@ workflow CHIPSEQ { // // MODULE: Phantompeaktools strand cross-correlation and QC metrics // - PHANTOMPEAKQUALTOOLS ( - FILTER_BAM_BAMTOOLS.out.bam - ) - ch_versions = ch_versions.mix(PHANTOMPEAKQUALTOOLS.out.versions.first()) + ch_phantompeakqualtools_spp_multiqc = Channel.empty() + ch_multiqc_phantompeakqualtools_nsc_multiqc = Channel.empty() + ch_multiqc_phantompeakqualtools_rsc_multiqc = Channel.empty() + ch_multiqc_phantompeakqualtools_correlation_multiqc = Channel.empty() + if (!params.skip_spp) { + PHANTOMPEAKQUALTOOLS ( + BAM_FILTER_BAMTOOLS.out.bam + ) + ch_phantompeakqualtools_spp_multiqc = PHANTOMPEAKQUALTOOLS.out.spp + ch_versions = ch_versions.mix(PHANTOMPEAKQUALTOOLS.out.versions.first()) - // - // MODULE: MultiQC custom content for Phantompeaktools - // - MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS ( - PHANTOMPEAKQUALTOOLS.out.spp.join(PHANTOMPEAKQUALTOOLS.out.rdata, by: [0]), - ch_spp_nsc_header, - ch_spp_rsc_header, - ch_spp_correlation_header - ) + // + // MODULE: MultiQC custom content for Phantompeaktools + // + MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS ( + PHANTOMPEAKQUALTOOLS.out.spp.join(PHANTOMPEAKQUALTOOLS.out.rdata, by: [0]), + ch_spp_nsc_header, + ch_spp_rsc_header, + ch_spp_correlation_header + ) + ch_multiqc_phantompeakqualtools_nsc_multiqc = MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS.out.nsc + ch_multiqc_phantompeakqualtools_rsc_multiqc = MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS.out.rsc + ch_multiqc_phantompeakqualtools_correlation_multiqc = MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS.out.correlation + } // - // MODULE: BedGraph coverage tracks + // SUBWORKFLOW: Normalised bigWig coverage tracks // - BEDTOOLS_GENOMECOV ( - FILTER_BAM_BAMTOOLS.out.bam.join(FILTER_BAM_BAMTOOLS.out.flagstat, by: [0]) + BAM_BEDGRAPH_BIGWIG_BEDTOOLS_UCSC ( + BAM_FILTER_BAMTOOLS.out.bam.join(BAM_FILTER_BAMTOOLS.out.flagstat, by: [0]), + ch_chrom_sizes ) - ch_versions = ch_versions.mix(BEDTOOLS_GENOMECOV.out.versions.first()) + ch_versions = ch_versions.mix(BAM_BEDGRAPH_BIGWIG_BEDTOOLS_UCSC.out.versions) - // - // MODULE: BigWig coverage tracks - // - UCSC_BEDGRAPHTOBIGWIG ( - BEDTOOLS_GENOMECOV.out.bedgraph, - PREPARE_GENOME.out.chrom_sizes - ) - ch_versions = ch_versions.mix(UCSC_BEDGRAPHTOBIGWIG.out.versions.first()) ch_deeptoolsplotprofile_multiqc = Channel.empty() if (!params.skip_plot_profile) { @@ -374,8 +362,8 @@ workflow CHIPSEQ { // MODULE: deepTools matrix generation for plotting // DEEPTOOLS_COMPUTEMATRIX ( - UCSC_BEDGRAPHTOBIGWIG.out.bigwig, - PREPARE_GENOME.out.gene_bed + BAM_BEDGRAPH_BIGWIG_BEDTOOLS_UCSC.out.bigwig, + ch_gene_bed ) ch_versions = ch_versions.mix(DEEPTOOLS_COMPUTEMATRIX.out.versions.first()) @@ -400,20 +388,28 @@ workflow CHIPSEQ { // // Create channels: [ meta, [ ip_bam, control_bam ] [ ip_bai, control_bai ] ] // - FILTER_BAM_BAMTOOLS + BAM_FILTER_BAMTOOLS .out .bam - .join(FILTER_BAM_BAMTOOLS.out.bai, by: [0]) + .join(BAM_FILTER_BAMTOOLS.out.bai, by: [0]) .set { ch_genome_bam_bai } - + ch_genome_bam_bai - .combine(ch_genome_bam_bai) - .map { - meta1, bam1, bai1, meta2, bam2, bai2 -> - meta1.control == meta2.id ? [ meta1, [ bam1, bam2 ], [ bai1, bai2 ] ] : null + .map { + meta, bam, bai -> + meta.control ? null : [ meta.id, [ bam ] , [ bai ] ] + } + .set { ch_control_bam_bai } + + ch_genome_bam_bai + .map { + meta, bam, bai -> + meta.control ? [ meta.control, meta, [ bam ], [ bai ] ] : null } + .combine(ch_control_bam_bai, by: 0) + .map { it -> [ it[1] , it[2] + it[4], it[3] + it[5] ] } .set { ch_ip_control_bam_bai } - + // // MODULE: deepTools plotFingerprint joint QC for IP and control // @@ -429,15 +425,13 @@ workflow CHIPSEQ { // // MODULE: Calculute genome size with khmer // + // TODO move to prepare genome ch_macs_gsize = Channel.empty() - ch_custompeaks_frip_multiqc = Channel.empty() - ch_custompeaks_count_multiqc = Channel.empty() - ch_plothomerannotatepeaks_multiqc = Channel.empty() ch_subreadfeaturecounts_multiqc = Channel.empty() ch_macs_gsize = params.macs_gsize if (!params.macs_gsize) { KHMER_UNIQUEKMERS ( - PREPARE_GENOME.out.fasta, + ch_fasta, params.read_length ) ch_macs_gsize = KHMER_UNIQUEKMERS.out.kmers.map { it.text.trim() } @@ -445,209 +439,64 @@ workflow CHIPSEQ { // Create channels: [ meta, ip_bam, control_bam ] ch_ip_control_bam_bai - .map { - meta, bams, bais -> - [ meta , bams[0], bams[1] ] + .map { + meta, bams, bais -> + [ meta , bams[0], bams[1] ] } .set { ch_ip_control_bam } // - // MODULE: Call peaks with MACS2 + // SUBWORKFLOW: Call peaks with MACS3, annotate with HOMER and perform downstream QC // - MACS2_CALLPEAK ( + BAM_PEAKS_CALL_QC_ANNOTATE_MACS3_HOMER ( ch_ip_control_bam, - ch_macs_gsize - ) - ch_versions = ch_versions.mix(MACS2_CALLPEAK.out.versions.first()) - - // - // Filter out samples with 0 MACS2 peaks called - // - MACS2_CALLPEAK - .out - .peak - .filter { meta, peaks -> peaks.size() > 0 } - .set { ch_macs2_peaks } - - // Create channels: [ meta, ip_bam, peaks ] - ch_ip_control_bam - .join(ch_macs2_peaks, by: [0]) - .map { - it -> - [ it[0], it[1], it[3] ] - } - .set { ch_ip_bam_peaks } - - // - // MODULE: Calculate FRiP score - // - FRIP_SCORE ( - ch_ip_bam_peaks - ) - ch_versions = ch_versions.mix(FRIP_SCORE.out.versions.first()) - - // Create channels: [ meta, peaks, frip ] - ch_ip_bam_peaks - .join(FRIP_SCORE.out.txt, by: [0]) - .map { - it -> - [ it[0], it[2], it[3] ] - } - .set { ch_ip_peaks_frip } - - // - // MODULE: FRiP score custom content for MultiQC - // - MULTIQC_CUSTOM_PEAKS ( - ch_ip_peaks_frip, + ch_fasta, + ch_gtf, + ch_macs_gsize, + "_peaks.annotatePeaks.txt", ch_peak_count_header, - ch_frip_score_header + ch_frip_score_header, + ch_peak_annotation_header, + params.narrow_peak, + params.skip_peak_annotation, + params.skip_peak_qc ) - ch_custompeaks_frip_multiqc = MULTIQC_CUSTOM_PEAKS.out.frip - ch_custompeaks_count_multiqc = MULTIQC_CUSTOM_PEAKS.out.count - - if (!params.skip_peak_annotation) { - // - // MODULE: Annotate peaks with MACS2 - // - HOMER_ANNOTATEPEAKS_MACS2 ( - ch_macs2_peaks, - PREPARE_GENOME.out.fasta, - PREPARE_GENOME.out.gtf - ) - ch_versions = ch_versions.mix(HOMER_ANNOTATEPEAKS_MACS2.out.versions.first()) - - if (!params.skip_peak_qc) { - // - // MODULE: MACS2 QC plots with R - // - PLOT_MACS2_QC ( - ch_macs2_peaks.collect{it[1]} - ) - ch_versions = ch_versions.mix(PLOT_MACS2_QC.out.versions) - - // - // MODULE: Peak annotation QC plots with R - // - PLOT_HOMER_ANNOTATEPEAKS ( - HOMER_ANNOTATEPEAKS_MACS2.out.txt.collect{it[1]}, - ch_peak_annotation_header, - "_peaks.annotatePeaks.txt" - ) - ch_plothomerannotatepeaks_multiqc = PLOT_HOMER_ANNOTATEPEAKS.out.tsv - ch_versions = ch_versions.mix(PLOT_HOMER_ANNOTATEPEAKS.out.versions) - } - } + ch_versions = ch_versions.mix(BAM_PEAKS_CALL_QC_ANNOTATE_MACS3_HOMER.out.versions) // // Consensus peaks analysis // - ch_macs2_consensus_bed_lib = Channel.empty() - ch_macs2_consensus_txt_lib = Channel.empty() + ch_macs3_consensus_bed_lib = Channel.empty() + ch_macs3_consensus_txt_lib = Channel.empty() ch_deseq2_pca_multiqc = Channel.empty() ch_deseq2_clustering_multiqc = Channel.empty() if (!params.skip_consensus_peaks) { - // Create channels: [ meta , [ peaks ] ] - // Where meta = [ id:antibody, multiple_groups:true/false, replicates_exist:true/false ] - ch_macs2_peaks - .map { - meta, peak -> - [ meta.antibody, meta.id.split('_')[0..-2].join('_'), peak ] - } - .groupTuple() - .map { - antibody, groups, peaks -> - [ - antibody, - groups.groupBy().collectEntries { [(it.key) : it.value.size()] }, - peaks - ] - } - .map { - antibody, groups, peaks -> - def meta_new = [:] - meta_new.id = antibody - meta_new.multiple_groups = groups.size() > 1 - meta_new.replicates_exist = groups.max { groups.value }.value > 1 - [ meta_new, peaks ] - } - .set { ch_antibody_peaks } - - // - // MODULE: Generate consensus peaks across samples - // - MACS2_CONSENSUS ( - ch_antibody_peaks - ) - ch_macs2_consensus_bed_lib = MACS2_CONSENSUS.out.bed - ch_macs2_consensus_txt_lib = MACS2_CONSENSUS.out.txt - ch_versions = ch_versions.mix(MACS2_CONSENSUS.out.versions) - - if (!params.skip_peak_annotation) { - // - // MODULE: Annotate consensus peaks - // - HOMER_ANNOTATEPEAKS_CONSENSUS ( - MACS2_CONSENSUS.out.bed, - PREPARE_GENOME.out.fasta, - PREPARE_GENOME.out.gtf - ) - ch_versions = ch_versions.mix(HOMER_ANNOTATEPEAKS_CONSENSUS.out.versions) - - // - // MODULE: Add boolean fields to annotated consensus peaks to aid filtering - // - ANNOTATE_BOOLEAN_PEAKS ( - MACS2_CONSENSUS.out.boolean_txt.join(HOMER_ANNOTATEPEAKS_CONSENSUS.out.txt, by: [0]), - ) - ch_versions = ch_versions.mix(ANNOTATE_BOOLEAN_PEAKS.out.versions) - } - // Create channels: [ antibody, [ ip_bams ] ] ch_ip_control_bam - .map { + .map { meta, ip_bam, control_bam -> [ meta.antibody, ip_bam ] } .groupTuple() .set { ch_antibody_bams } - // Create channels: [ meta, [ ip_bams ], saf ] - MACS2_CONSENSUS - .out - .saf - .map { - meta, saf -> - [ meta.id, meta, saf ] - } - .join(ch_antibody_bams) - .map { - antibody, meta, saf, bams -> - [ meta, bams.flatten().sort(), saf ] - } - .set { ch_saf_bams } - - // - // MODULE: Quantify peaks across samples with featureCounts - // - SUBREAD_FEATURECOUNTS ( - ch_saf_bams + BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2 ( + BAM_PEAKS_CALL_QC_ANNOTATE_MACS3_HOMER.out.peaks, + ch_antibody_bams, + ch_fasta, + ch_gtf, + ch_deseq2_pca_header, + ch_deseq2_clustering_header, + params.narrow_peak, + params.skip_peak_annotation, + params.skip_deseq2_qc ) - ch_subreadfeaturecounts_multiqc = SUBREAD_FEATURECOUNTS.out.summary - ch_versions = ch_versions.mix(SUBREAD_FEATURECOUNTS.out.versions.first()) - - if (!params.skip_deseq2_qc) { - // - // MODULE: Generate QC plots with DESeq2 - // - DESEQ2_QC ( - SUBREAD_FEATURECOUNTS.out.counts, - ch_deseq2_pca_header, - ch_deseq2_clustering_header - ) - ch_deseq2_pca_multiqc = DESEQ2_QC.out.pca_multiqc - ch_deseq2_clustering_multiqc = DESEQ2_QC.out.dists_multiqc - } + ch_macs3_consensus_bed_lib = BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2.out.consensus_bed + ch_macs3_consensus_txt_lib = BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2.out.consensus_txt + ch_subreadfeaturecounts_multiqc = BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2.out.featurecounts_summary + ch_deseq2_pca_multiqc = BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2.out.deseq2_qc_pca_multiqc + ch_deseq2_clustering_multiqc = BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2.out.deseq2_qc_dists_multiqc + ch_versions = ch_versions.mix(BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2.out.versions) } // @@ -656,87 +505,83 @@ workflow CHIPSEQ { if (!params.skip_igv) { IGV ( params.aligner, - params.narrow_peak ? 'narrowPeak' : 'broadPeak', - PREPARE_GENOME.out.fasta, - UCSC_BEDGRAPHTOBIGWIG.out.bigwig.collect{it[1]}.ifEmpty([]), - ch_macs2_peaks.collect{it[1]}.ifEmpty([]), - ch_macs2_consensus_bed_lib.collect{it[1]}.ifEmpty([]), - ch_macs2_consensus_txt_lib.collect{it[1]}.ifEmpty([]) + params.narrow_peak ? 'narrow_peak' : 'broad_peak', + ch_fasta, + BAM_BEDGRAPH_BIGWIG_BEDTOOLS_UCSC.out.bigwig.collect{it[1]}.ifEmpty([]), + BAM_PEAKS_CALL_QC_ANNOTATE_MACS3_HOMER.out.peaks.collect{it[1]}.ifEmpty([]), + ch_macs3_consensus_bed_lib.collect{it[1]}.ifEmpty([]), + ch_macs3_consensus_txt_lib.collect{it[1]}.ifEmpty([]) ) ch_versions = ch_versions.mix(IGV.out.versions) } // - // MODULE: Pipeline reporting + // Collate and save software versions // - CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique().collectFile(name: 'collated_versions.yml') - ) + softwareVersionsToYAML(ch_versions) + .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_atacseq_software_mqc_versions.yml', sort: true, newLine: true) + .set { ch_collated_versions } // // MODULE: MultiQC // if (!params.skip_multiqc) { - workflow_summary = WorkflowChipseq.paramsSummaryMultiqc(workflow, summary_params) - ch_workflow_summary = Channel.value(workflow_summary) + ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config ): Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo ) : Channel.empty() + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) MULTIQC ( - ch_multiqc_config, - ch_multiqc_custom_config.collect().ifEmpty([]), - CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect(), - ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'), + ch_multiqc_files.collect(), + ch_multiqc_config.toList(), + ch_multiqc_custom_config.toList(), + ch_multiqc_logo.toList(), - FASTQC_TRIMGALORE.out.fastqc_zip.collect{it[1]}.ifEmpty([]), - FASTQC_TRIMGALORE.out.trim_zip.collect{it[1]}.ifEmpty([]), - FASTQC_TRIMGALORE.out.trim_log.collect{it[1]}.ifEmpty([]), + FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.fastqc_zip.collect{it[1]}.ifEmpty([]), + FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_zip.collect{it[1]}.ifEmpty([]), + FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_log.collect{it[1]}.ifEmpty([]), ch_samtools_stats.collect{it[1]}.ifEmpty([]), ch_samtools_flagstat.collect{it[1]}.ifEmpty([]), ch_samtools_idxstats.collect{it[1]}.ifEmpty([]), - MARK_DUPLICATES_PICARD.out.stats.collect{it[1]}.ifEmpty([]), - MARK_DUPLICATES_PICARD.out.flagstat.collect{it[1]}.ifEmpty([]), - MARK_DUPLICATES_PICARD.out.idxstats.collect{it[1]}.ifEmpty([]), - MARK_DUPLICATES_PICARD.out.metrics.collect{it[1]}.ifEmpty([]), + BAM_MARKDUPLICATES_PICARD.out.stats.collect{it[1]}.ifEmpty([]), + BAM_MARKDUPLICATES_PICARD.out.flagstat.collect{it[1]}.ifEmpty([]), + BAM_MARKDUPLICATES_PICARD.out.idxstats.collect{it[1]}.ifEmpty([]), + BAM_MARKDUPLICATES_PICARD.out.metrics.collect{it[1]}.ifEmpty([]), - FILTER_BAM_BAMTOOLS.out.stats.collect{it[1]}.ifEmpty([]), - FILTER_BAM_BAMTOOLS.out.flagstat.collect{it[1]}.ifEmpty([]), - FILTER_BAM_BAMTOOLS.out.idxstats.collect{it[1]}.ifEmpty([]), + BAM_FILTER_BAMTOOLS.out.stats.collect{it[1]}.ifEmpty([]), + BAM_FILTER_BAMTOOLS.out.flagstat.collect{it[1]}.ifEmpty([]), + BAM_FILTER_BAMTOOLS.out.idxstats.collect{it[1]}.ifEmpty([]), ch_picardcollectmultiplemetrics_multiqc.collect{it[1]}.ifEmpty([]), ch_preseq_multiqc.collect{it[1]}.ifEmpty([]), - + ch_deeptoolsplotprofile_multiqc.collect{it[1]}.ifEmpty([]), ch_deeptoolsplotfingerprint_multiqc.collect{it[1]}.ifEmpty([]), - - PHANTOMPEAKQUALTOOLS.out.spp.collect{it[1]}.ifEmpty([]), - MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS.out.nsc.collect{it[1]}.ifEmpty([]), - MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS.out.rsc.collect{it[1]}.ifEmpty([]), - MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS.out.correlation.collect{it[1]}.ifEmpty([]), - - ch_custompeaks_frip_multiqc.collect{it[1]}.ifEmpty([]), - ch_custompeaks_count_multiqc.collect{it[1]}.ifEmpty([]), - ch_plothomerannotatepeaks_multiqc.collect().ifEmpty([]), + + ch_phantompeakqualtools_spp_multiqc.collect{it[1]}.ifEmpty([]), + ch_multiqc_phantompeakqualtools_nsc_multiqc.collect{it[1]}.ifEmpty([]), + ch_multiqc_phantompeakqualtools_rsc_multiqc.collect{it[1]}.ifEmpty([]), + ch_multiqc_phantompeakqualtools_correlation_multiqc.collect{it[1]}.ifEmpty([]), + + BAM_PEAKS_CALL_QC_ANNOTATE_MACS3_HOMER.out.frip_multiqc.collect{it[1]}.ifEmpty([]), + BAM_PEAKS_CALL_QC_ANNOTATE_MACS3_HOMER.out.peak_count_multiqc.collect{it[1]}.ifEmpty([]), + BAM_PEAKS_CALL_QC_ANNOTATE_MACS3_HOMER.out.plot_homer_annotatepeaks_tsv.collect().ifEmpty([]), ch_subreadfeaturecounts_multiqc.collect{it[1]}.ifEmpty([]), ch_deseq2_pca_multiqc.collect().ifEmpty([]), ch_deseq2_clustering_multiqc.collect().ifEmpty([]) ) - multiqc_report = MULTIQC.out.report.toList() + ch_multiqc_report = MULTIQC.out.report } -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - COMPLETION EMAIL AND SUMMARY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ -workflow.onComplete { - if (params.email || params.email_on_fail) { - NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) - } - NfcoreTemplate.summary(workflow, params, log) + emit: + multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [ path(versions.yml) ] } /*