diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 00000000..b290e090 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,20 @@ +{ + "name": "nfcore", + "image": "nfcore/gitpod:latest", + "remoteUser": "gitpod", + "runArgs": ["--privileged"], + + // Configure tool-specific properties. + "customizations": { + // Configure properties specific to VS Code. + "vscode": { + // Set *default* container specific settings.json values on container create. + "settings": { + "python.defaultInterpreterPath": "/opt/conda/bin/python" + }, + + // Add the IDs of extensions you want installed when the container is created. + "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] + } + } +} diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 00000000..497a9f0d --- /dev/null +++ b/.editorconfig @@ -0,0 +1,42 @@ +root = true + +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true +indent_size = 4 +indent_style = space + +[*.{md,yml,yaml,html,css,scss,js}] +indent_size = 2 + +# These files are edited and tested upstream in nf-core/modules +[/modules/nf-core/**] +charset = unset +end_of_line = unset +insert_final_newline = unset +trim_trailing_whitespace = unset +indent_style = unset +[/subworkflows/nf-core/**] +charset = unset +end_of_line = unset +insert_final_newline = unset +trim_trailing_whitespace = unset +indent_style = unset + +[/assets/email*] +indent_size = unset + +# ignore python and markdown +[*.{py,md}] +indent_style = unset + +[/docs/*.xml*] +indent_style = unset +insert_final_newline = unset + +[/docs/images/metro/*.xml*] +indent_style = unset +insert_final_newline = unset +end_of_line = unset diff --git a/.gitattributes b/.gitattributes index 7fe55006..7a2dabc2 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,4 @@ *.config linguist-language=nextflow +*.nf.test linguist-language=nextflow +modules/nf-core/** linguist-generated +subworkflows/nf-core/** linguist-generated diff --git a/.github/.dockstore.yml b/.github/.dockstore.yml new file mode 100644 index 00000000..191fabd2 --- /dev/null +++ b/.github/.dockstore.yml @@ -0,0 +1,6 @@ +# Dockstore config version, not pipeline version +version: 1.2 +workflows: + - subclass: nfl + primaryDescriptorPath: /nextflow.config + publish: True diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 5acaf77a..7b01fbb0 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -9,35 +9,42 @@ Please use the pre-filled template to save time. However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;) +> [!NOTE] > If you need help using or modifying nf-core/phaseimpute then the best place to ask is on the nf-core Slack [#phaseimpute](https://nfcore.slack.com/channels/phaseimpute) channel ([join our Slack here](https://nf-co.re/join/slack)). ## Contribution workflow If you'd like to write some code for nf-core/phaseimpute, the standard workflow is as follows: -1. Check that there isn't already an issue about your idea in the [nf-core/phaseimpute issues](https://github.com/nf-core/phaseimpute/issues) to avoid duplicating work - * If there isn't one already, please create one so that others know you're working on this +1. Check that there isn't already an issue about your idea in the [nf-core/phaseimpute issues](https://github.com/nf-core/phaseimpute/issues) to avoid duplicating work. If there isn't one already, please create one so that others know you're working on this 2. [Fork](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) the [nf-core/phaseimpute repository](https://github.com/nf-core/phaseimpute) to your GitHub account -3. Make the necessary changes / additions within your forked repository -4. Submit a Pull Request against the `dev` branch and wait for the code to be reviewed and merged +3. Make the necessary changes / additions within your forked repository following [Pipeline conventions](#pipeline-contribution-conventions) +4. Use `nf-core pipelines schema build` and add any new parameters to the pipeline JSON schema (requires [nf-core tools](https://github.com/nf-core/tools) >= 1.10). +5. Submit a Pull Request against the `dev` branch and wait for the code to be reviewed and merged If you're not used to this workflow with git, you can start with some [docs from GitHub](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests) or even their [excellent `git` resources](https://try.github.io/). ## Tests +You have the option to test your changes locally by running the pipeline. For receiving warnings about process selectors and other `debug` information, it is recommended to use the debug profile. Execute all the tests with the following command: + +```bash +nf-test test --profile debug,test,docker --verbose +``` + When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. There are typically two types of tests that run: -### Lint Tests +### Lint tests `nf-core` has a [set of guidelines](https://nf-co.re/developers/guidelines) which all pipelines must adhere to. -To enforce these and ensure that all pipelines stay in sync, we have developed a helper tool which runs checks on the pipeline code. This is in the [nf-core/tools repository](https://github.com/nf-core/tools) and once installed can be run locally with the `nf-core lint ` command. +To enforce these and ensure that all pipelines stay in sync, we have developed a helper tool which runs checks on the pipeline code. This is in the [nf-core/tools repository](https://github.com/nf-core/tools) and once installed can be run locally with the `nf-core pipelines lint ` command. If any failures or warnings are encountered, please follow the listed URL for more documentation. -### Pipeline Tests +### Pipeline tests Each `nf-core` pipeline should be set up with a minimal set of test-data. `GitHub Actions` then runs the pipeline on this data to ensure that it exits successfully. @@ -46,12 +53,73 @@ These tests are run both with the latest available version of `Nextflow` and als ## Patch -: warning: Only in the unlikely and regretful event of a release happening with a bug. +:warning: Only in the unlikely and regretful event of a release happening with a bug. -* On your own fork, make a new branch `patch` based on `upstream/master`. -* Fix the bug, and bump version (X.Y.Z+1). -* A PR should be made on `master` from patch to directly this particular bug. +- On your own fork, make a new branch `patch` based on `upstream/master`. +- Fix the bug, and bump version (X.Y.Z+1). +- A PR should be made on `master` from patch to directly this particular bug. ## Getting help -For further information/help, please consult the [nf-core/phaseimpute documentation](https://nf-co.re/nf-core/phaseimpute/docs) and don't hesitate to get in touch on the nf-core Slack [#phaseimpute](https://nfcore.slack.com/channels/phaseimpute) channel ([join our Slack here](https://nf-co.re/join/slack)). +For further information/help, please consult the [nf-core/phaseimpute documentation](https://nf-co.re/phaseimpute/usage) and don't hesitate to get in touch on the nf-core Slack [#phaseimpute](https://nfcore.slack.com/channels/phaseimpute) channel ([join our Slack here](https://nf-co.re/join/slack)). + +## Pipeline contribution conventions + +To make the nf-core/phaseimpute code and processing logic more understandable for new contributors and to ensure quality, we semi-standardise the way the code and other contributions are written. + +### Adding a new step + +If you wish to contribute a new step, please use the following coding standards: + +1. Define the corresponding input channel into your new process from the expected previous process channel +2. Write the process block (see below). +3. Define the output channel if needed (see below). +4. Add any new parameters to `nextflow.config` with a default (see below). +5. Add any new parameters to `nextflow_schema.json` with help text (via the `nf-core pipelines schema build` tool). +6. Add sanity checks and validation for all relevant parameters. +7. Perform local tests to validate that the new code works as expected. +8. If applicable, add a new test command in `.github/workflow/ci.yml`. +9. Update MultiQC config `assets/multiqc_config.yml` so relevant suffixes, file name clean up and module plots are in the appropriate order. If applicable, add a [MultiQC](https://https://multiqc.info/) module. +10. Add a description of the output files and if relevant any appropriate images from the MultiQC report to `docs/output.md`. + +### Default values + +Parameters should be initialised / defined with default values in `nextflow.config` under the `params` scope. + +Once there, use `nf-core pipelines schema build` to add to `nextflow_schema.json`. + +### Default processes resource requirements + +Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/main/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. + +The process resources can be passed on to the tool dynamically within the process with the `${task.cpus}` and `${task.memory}` variables in the `script:` block. + +### Naming schemes + +Please use the following naming schemes, to make it easy to understand what is going where. + +- initial process channel: `ch_output_from_` +- intermediate and terminal channels: `ch__for_` + +### Nextflow version bumping + +If you are using a new feature from core Nextflow, you may bump the minimum required version of nextflow in the pipeline with: `nf-core pipelines bump-version --nextflow . [min-nf-version]` + +### Images and figures + +For overview images and other documents we follow the nf-core [style guidelines and examples](https://nf-co.re/developers/design_guidelines). + +## GitHub Codespaces + +This repo includes a devcontainer configuration which will create a GitHub Codespaces for Nextflow development! This is an online developer environment that runs in your browser, complete with VSCode and a terminal. + +To get started: + +- Open the repo in [Codespaces](https://github.com/nf-core/phaseimpute/codespaces) +- Tools installed + - nf-core + - Nextflow + +Devcontainer specs: + +- [DevContainer config](.devcontainer/devcontainer.json) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md deleted file mode 100644 index 294ac648..00000000 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ /dev/null @@ -1,42 +0,0 @@ -# nf-core/phaseimpute bug report - -Hi there! - -Thanks for telling us about a problem with the pipeline. -Please delete this text and anything that's not relevant from the template below: - -## Describe the bug - -A clear and concise description of what the bug is. - -## Steps to reproduce - -Steps to reproduce the behaviour: - -1. Command line: `nextflow run ...` -2. See error: _Please provide your error message_ - -## Expected behaviour - -A clear and concise description of what you expected to happen. - -## System - -- Hardware: -- Executor: -- OS: -- Version - -## Nextflow Installation - -- Version: - -## Container engine - -- Engine: -- version: -- Image tag: - -## Additional context - -Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 00000000..85218a5e --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,50 @@ +name: Bug report +description: Report something that is broken or incorrect +labels: bug +body: + - type: markdown + attributes: + value: | + Before you post this issue, please check the documentation: + + - [nf-core website: troubleshooting](https://nf-co.re/usage/troubleshooting) + - [nf-core/phaseimpute pipeline documentation](https://nf-co.re/phaseimpute/usage) + + - type: textarea + id: description + attributes: + label: Description of the bug + description: A clear and concise description of what the bug is. + validations: + required: true + + - type: textarea + id: command_used + attributes: + label: Command used and terminal output + description: Steps to reproduce the behaviour. Please paste the command you used to launch the pipeline and the output from your terminal. + render: console + placeholder: | + $ nextflow run ... + + Some output where something broke + + - type: textarea + id: files + attributes: + label: Relevant files + description: | + Please drag and drop the relevant files here. Create a `.zip` archive if the extension is not allowed. + Your verbose log file `.nextflow.log` is often useful _(this is a hidden file in the directory where you launched the pipeline)_ as well as custom Nextflow configuration files. + + - type: textarea + id: system + attributes: + label: System information + description: | + * Nextflow version _(eg. 23.04.0)_ + * Hardware _(eg. HPC, Desktop, Cloud)_ + * Executor _(eg. slurm, local, awsbatch)_ + * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, or Apptainer)_ + * OS _(eg. CentOS Linux, macOS, Linux Mint)_ + * Version of nf-core/phaseimpute _(eg. 1.1, 1.5, 1.8.2)_ diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 00000000..b88beb76 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,7 @@ +contact_links: + - name: Join nf-core + url: https://nf-co.re/join + about: Please join the nf-core community here + - name: "Slack #phaseimpute channel" + url: https://nfcore.slack.com/channels/phaseimpute + about: Discussion about the nf-core/phaseimpute pipeline diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md deleted file mode 100644 index a4ec960f..00000000 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ /dev/null @@ -1,24 +0,0 @@ -# nf-core/phaseimpute feature request - -Hi there! - -Thanks for suggesting a new feature for the pipeline! -Please delete this text and anything that's not relevant from the template below: - -## Is your feature request related to a problem? Please describe - -A clear and concise description of what the problem is. - -Ex. I'm always frustrated when [...] - -## Describe the solution you'd like - -A clear and concise description of what you want to happen. - -## Describe alternatives you've considered - -A clear and concise description of any alternative solutions or features you've considered. - -## Additional context - -Add any other context about the feature request here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml new file mode 100644 index 00000000..038f242b --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -0,0 +1,11 @@ +name: Feature request +description: Suggest an idea for the nf-core/phaseimpute pipeline +labels: enhancement +body: + - type: textarea + id: description + attributes: + label: Description of feature + description: Please describe your suggestion for a new feature. It might help to describe a problem or use case, plus any alternatives that you have considered. + validations: + required: true diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 66e6d750..c34e0b42 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,3 +1,4 @@ + + ## PR checklist -- [ ] This comment contains a description of changes (with reason) +- [ ] This comment contains a description of changes (with reason). - [ ] If you've fixed a bug or added code that should be tested, add tests! -- [ ] If necessary, also make a PR on the [nf-core/phaseimpute branch on the nf-core/test-datasets repo](https://github.com/nf-core/test-datasets/pull/new/nf-core/phaseimpute) -- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker`). -- [ ] Make sure your code lints (`nf-core lint .`). -- [ ] Documentation in `docs` is updated -- [ ] `CHANGELOG.md` is updated -- [ ] `README.md` is updated - -**Learn more about contributing:** [CONTRIBUTING.md](https://github.com/nf-core/phaseimpute/tree/master/.github/CONTRIBUTING.md) \ No newline at end of file +- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/phaseimpute/tree/master/.github/CONTRIBUTING.md) +- [ ] If necessary, also make a PR on the nf-core/phaseimpute _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. +- [ ] Make sure your code lints (`nf-core pipelines lint`). +- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). +- [ ] Usage Documentation in `docs/usage.md` is updated. +- [ ] Output Documentation in `docs/output.md` is updated. +- [ ] `CHANGELOG.md` is updated. +- [ ] `README.md` is updated (including new tool citations and authors/contributors). diff --git a/.github/markdownlint.yml b/.github/markdownlint.yml deleted file mode 100644 index 96b12a70..00000000 --- a/.github/markdownlint.yml +++ /dev/null @@ -1,5 +0,0 @@ -# Markdownlint configuration file -default: true, -line-length: false -no-duplicate-header: - siblings_only: true diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml new file mode 100644 index 00000000..369ffdce --- /dev/null +++ b/.github/workflows/awsfulltest.yml @@ -0,0 +1,54 @@ +name: nf-core AWS full size tests +# This workflow is triggered on PRs opened against the master branch. +# It can be additionally triggered manually with GitHub actions workflow dispatch button. +# It runs the -profile 'test_full' on AWS batch + +on: + pull_request: + branches: + - master + workflow_dispatch: + pull_request_review: + types: [submitted] + +jobs: + run-platform: + name: Run AWS full tests + # run only if the PR is approved by at least 2 reviewers and against the master branch or manually triggered + if: github.repository == 'nf-core/phaseimpute' && github.event.review.state == 'approved' && github.event.pull_request.base.ref == 'master' || github.event_name == 'workflow_dispatch' + runs-on: ubuntu-latest + steps: + - uses: octokit/request-action@v2.x + if: github.event_name != 'workflow_dispatch' + id: check_approvals + with: + route: GET /repos/${{ github.repository }}/pulls/${{ github.event.pull_request.number }}/reviews?per_page=100 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - id: test_variables + if: github.event_name != 'workflow_dispatch' + run: | + JSON_RESPONSE='${{ steps.check_approvals.outputs.data }}' + CURRENT_APPROVALS_COUNT=$(echo $JSON_RESPONSE | jq -c '[.[] | select(.state | contains("APPROVED")) ] | length') + test $CURRENT_APPROVALS_COUNT -ge 2 || exit 1 # At least 2 approvals are required + - name: Launch workflow via Seqera Platform + uses: seqeralabs/action-tower-launch@v2 + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/phaseimpute/work-${{ github.sha }} + parameters: | + { + "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/phaseimpute/results-${{ github.sha }}" + } + profiles: test_full + + - uses: actions/upload-artifact@v4 + with: + name: Seqera Platform debug log file + path: | + seqera_platform_action_*.log + seqera_platform_action_*.json diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml new file mode 100644 index 00000000..36ab01ba --- /dev/null +++ b/.github/workflows/awstest.yml @@ -0,0 +1,33 @@ +name: nf-core AWS test +# This workflow can be triggered manually with the GitHub actions workflow dispatch button. +# It runs the -profile 'test' on AWS batch + +on: + workflow_dispatch: +jobs: + run-platform: + name: Run AWS tests + if: github.repository == 'nf-core/phaseimpute' + runs-on: ubuntu-latest + steps: + # Launch workflow using Seqera Platform CLI tool action + - name: Launch workflow via Seqera Platform + uses: seqeralabs/action-tower-launch@v2 + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/phaseimpute/work-${{ github.sha }} + parameters: | + { + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/phaseimpute/results-test-${{ github.sha }}" + } + profiles: test + + - uses: actions/upload-artifact@v4 + with: + name: Seqera Platform debug log file + path: | + seqera_platform_action_*.log + seqera_platform_action_*.json diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index 2de17c56..1619a8f9 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -2,15 +2,43 @@ name: nf-core branch protection # This workflow is triggered on PRs to master branch on the repository # It fails when someone tries to make a PR against the nf-core `master` branch instead of `dev` on: - pull_request: - branches: - - master + pull_request_target: + branches: [master] jobs: test: - runs-on: ubuntu-18.04 + runs-on: ubuntu-latest steps: - # PRs are only ok if coming from an nf-core `dev` branch or a fork `patch` branch + # PRs to the nf-core repo master branch are only ok if coming from the nf-core repo `dev` or any `patch` branches - name: Check PRs + if: github.repository == 'nf-core/phaseimpute' run: | - { [[ $(git remote get-url origin) == *nf-core/phaseimpute ]] && [[ ${GITHUB_HEAD_REF} = "dev" ]]; } || [[ ${GITHUB_HEAD_REF} == "patch" ]] + { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/phaseimpute ]] && [[ $GITHUB_HEAD_REF == "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] + + # If the above check failed, post a comment on the PR explaining the failure + # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets + - name: Post PR comment + if: failure() + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 + with: + message: | + ## This PR is against the `master` branch :x: + + * Do not close this PR + * Click _Edit_ and change the `base` to `dev` + * This CI test will remain failed until you push a new commit + + --- + + Hi @${{ github.event.pull_request.user.login }}, + + It looks like this pull-request is has been made against the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `master` branch. + The `master` branch on nf-core repositories should always contain code from the latest release. + Because of this, PRs to `master` are only allowed if they come from the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `dev` branch. + + You do not need to close this PR, you can change the target branch to `dev` by clicking the _"Edit"_ button at the top of this page. + Note that even after this, the test will continue to show as failing until you push a new commit. + + Thanks again for your contribution! + repo-token: ${{ secrets.GITHUB_TOKEN }} + allow-repeats: false diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2e0f6e3b..16b89e86 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,30 +1,111 @@ name: nf-core CI -# This workflow is triggered on pushes and PRs to the repository. -# It runs the pipeline with the minimal test dataset to check that it completes without any syntax errors -on: [push, pull_request] +# This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors + +on: + push: + branches: + - dev + - master + pull_request: + branches: + - dev + - master + release: + types: [published] + workflow_dispatch: + +env: + NXF_ANSI_LOG: false + NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity + NXF_SINGULARITY_LIBRARYDIR: ${{ github.workspace }}/.singularity + NFT_VER: "0.9.2" + NFT_WORKDIR: "~" + NFT_DIFF: "pdiff" + NFT_DIFF_ARGS: "--line-numbers --expand-tabs=2" + +concurrency: + group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" + cancel-in-progress: true jobs: test: - env: - NXF_VER: ${{ matrix.nxf_ver }} - NXF_ANSI_LOG: false + name: "Run pipeline with test data (${{ matrix.NXF_VER }} | ${{ matrix.TEST_PROFILE }} | ${{ matrix.profile }})" + # Only run on push if this is the nf-core dev branch (merged PRs) + if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/phaseimpute') }}" runs-on: ubuntu-latest strategy: + fail-fast: false matrix: - # Nextflow versions: check pipeline minimum and current latest - nxf_ver: ['19.10.0', ''] + NXF_VER: + - "24.04.2" + - "latest-everything" + profile: + - "docker" + - "singularity" + TEST_PROFILE: + - "test_all" + - "test_validate" + - "test_stitch" + - "test_dog" + isMaster: + - ${{ github.base_ref == 'master' }} + # Exclude conda and singularity on dev + exclude: + - isMaster: false + profile: "conda" + - isMaster: false + profile: "singularity" steps: - - uses: actions/checkout@v2 - - name: Install Nextflow + - name: Check out pipeline code + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + + - uses: actions/setup-java@8df1039502a15bceb9433410b1a100fbe190c53b # v4 + with: + distribution: "temurin" + java-version: "17" + + - name: Set up Nextflow + uses: nf-core/setup-nextflow@v2 + with: + version: "${{ matrix.NXF_VER }}" + + - name: Set up Apptainer + if: matrix.profile == 'singularity' + uses: eWaterCycle/setup-apptainer@main + + - name: Set up Singularity + if: matrix.profile == 'singularity' run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - name: Pull docker image + mkdir -p $NXF_SINGULARITY_CACHEDIR + mkdir -p $NXF_SINGULARITY_LIBRARYDIR + + - name: Set up Miniconda + if: matrix.profile == 'conda' + uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3 + with: + miniconda-version: "latest" + auto-update-conda: true + conda-solver: libmamba + channels: conda-forge,bioconda + + - name: Set up Conda + if: matrix.profile == 'conda' run: | - docker pull nfcore/phaseimpute:dev - docker tag nfcore/phaseimpute:dev nfcore/phaseimpute:dev - - name: Run pipeline with test data + echo $(realpath $CONDA)/condabin >> $GITHUB_PATH + echo $(realpath python) >> $GITHUB_PATH + + - name: Clean up Disk space + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - name: Install pdiff to see diff between nf-test snapshots + run: | + python -m pip install --upgrade pip + pip install pdiff + + - uses: nf-core/setup-nf-test@v1 + with: + version: ${{ env.NFT_VER }} + + - name: "Run pipeline with test data ${{ matrix.NXF_VER }} | ${{ matrix.TEST_PROFILE }} | ${{ matrix.profile }}" run: | - # TODO nf-core: You can customise CI pipeline run tests as required - # (eg. adding multiple test runs with different parameters) - nextflow run ${GITHUB_WORKSPACE} -profile test,docker + nf-test test --tag "${{ matrix.TEST_PROFILE }}" --profile ${{ matrix.profile }} --verbose diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml new file mode 100644 index 00000000..0b6b1f27 --- /dev/null +++ b/.github/workflows/clean-up.yml @@ -0,0 +1,24 @@ +name: "Close user-tagged issues and PRs" +on: + schedule: + - cron: "0 0 * * 0" # Once a week + +jobs: + clean-up: + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + steps: + - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9 + with: + stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." + stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." + close-issue-message: "This issue was closed because it has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor and then staled for 20 days with no activity." + days-before-stale: 30 + days-before-close: 20 + days-before-pr-close: -1 + any-of-labels: "awaiting-changes,awaiting-feedback" + exempt-issue-labels: "WIP" + exempt-pr-labels: "WIP" + repo-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml new file mode 100644 index 00000000..713dc3e7 --- /dev/null +++ b/.github/workflows/download_pipeline.yml @@ -0,0 +1,119 @@ +name: Test successful pipeline download with 'nf-core pipelines download' + +# Run the workflow when: +# - dispatched manually +# - when a PR is opened or reopened to master branch +# - the head branch of the pull request is updated, i.e. if fixes for a release are pushed last minute to dev. +on: + workflow_dispatch: + inputs: + testbranch: + description: "The specific branch you wish to utilize for the test execution of nf-core pipelines download." + required: true + default: "dev" + pull_request: + types: + - opened + - edited + - synchronize + branches: + - master + pull_request_target: + branches: + - master + +env: + NXF_ANSI_LOG: false + +jobs: + download: + runs-on: ubuntu-latest + steps: + - name: Install Nextflow + uses: nf-core/setup-nextflow@v2 + + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" + architecture: "x64" + + - name: Setup Apptainer + uses: eWaterCycle/setup-apptainer@4bb22c52d4f63406c49e94c804632975787312b3 # v2.0.0 + with: + apptainer-version: 1.3.4 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install git+https://github.com/nf-core/tools.git@dev + + - name: Get the repository name and current branch set as environment variable + run: | + echo "REPO_LOWERCASE=${GITHUB_REPOSITORY,,}" >> ${GITHUB_ENV} + echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> ${GITHUB_ENV} + echo "REPO_BRANCH=${{ github.event.inputs.testbranch || 'dev' }}" >> ${GITHUB_ENV} + + - name: Make a cache directory for the container images + run: | + mkdir -p ./singularity_container_images + + - name: Download the pipeline + env: + NXF_SINGULARITY_CACHEDIR: ./singularity_container_images + run: | + nf-core pipelines download ${{ env.REPO_LOWERCASE }} \ + --revision ${{ env.REPO_BRANCH }} \ + --outdir ./${{ env.REPOTITLE_LOWERCASE }} \ + --compress "none" \ + --container-system 'singularity' \ + --container-library "quay.io" -l "docker.io" -l "community.wave.seqera.io" \ + --container-cache-utilisation 'amend' \ + --download-configuration 'yes' + + - name: Inspect download + run: tree ./${{ env.REPOTITLE_LOWERCASE }} + + - name: Count the downloaded number of container images + id: count_initial + run: | + image_count=$(ls -1 ./singularity_container_images | wc -l | xargs) + echo "Initial container image count: $image_count" + echo "IMAGE_COUNT_INITIAL=$image_count" >> ${GITHUB_ENV} + + - name: Run the downloaded pipeline (stub) + id: stub_run_pipeline + continue-on-error: true + env: + NXF_SINGULARITY_CACHEDIR: ./singularity_container_images + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results + - name: Run the downloaded pipeline (stub run not supported) + id: run_pipeline + if: ${{ job.steps.stub_run_pipeline.status == failure() }} + env: + NXF_SINGULARITY_CACHEDIR: ./singularity_container_images + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -profile test,singularity --outdir ./results + + - name: Count the downloaded number of container images + id: count_afterwards + run: | + image_count=$(ls -1 ./singularity_container_images | wc -l | xargs) + echo "Post-pipeline run container image count: $image_count" + echo "IMAGE_COUNT_AFTER=$image_count" >> ${GITHUB_ENV} + + - name: Compare container image counts + run: | + if [ "${{ env.IMAGE_COUNT_INITIAL }}" -ne "${{ env.IMAGE_COUNT_AFTER }}" ]; then + initial_count=${{ env.IMAGE_COUNT_INITIAL }} + final_count=${{ env.IMAGE_COUNT_AFTER }} + difference=$((final_count - initial_count)) + echo "$difference additional container images were \n downloaded at runtime . The pipeline has no support for offline runs!" + tree ./singularity_container_images + exit 1 + else + echo "The pipeline can be downloaded successfully!" + fi diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml new file mode 100644 index 00000000..f4b59bb9 --- /dev/null +++ b/.github/workflows/fix-linting.yml @@ -0,0 +1,89 @@ +name: Fix linting from a comment +on: + issue_comment: + types: [created] + +jobs: + fix-linting: + # Only run if comment is on a PR with the main repo, and if it contains the magic keywords + if: > + contains(github.event.comment.html_url, '/pull/') && + contains(github.event.comment.body, '@nf-core-bot fix linting') && + github.repository == 'nf-core/phaseimpute' + runs-on: ubuntu-latest + steps: + # Use the @nf-core-bot token to check out so we can push later + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + with: + token: ${{ secrets.nf_core_bot_auth_token }} + + # indication that the linting is being fixed + - name: React on comment + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: eyes + + # Action runs on the issue comment, so we don't get the PR by default + # Use the gh cli to check out the PR + - name: Checkout Pull Request + run: gh pr checkout ${{ github.event.issue.number }} + env: + GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} + + # Install and run pre-commit + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" + + - name: Install pre-commit + run: pip install pre-commit + + - name: Run pre-commit + id: pre-commit + run: pre-commit run --all-files + continue-on-error: true + + # indication that the linting has finished + - name: react if linting finished succesfully + if: steps.pre-commit.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: "+1" + + - name: Commit & push changes + id: commit-and-push + if: steps.pre-commit.outcome == 'failure' + run: | + git config user.email "core@nf-co.re" + git config user.name "nf-core-bot" + git config push.default upstream + git add . + git status + git commit -m "[automated] Fix code linting" + git push + + - name: react if linting errors were fixed + id: react-if-fixed + if: steps.commit-and-push.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: hooray + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: confused + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + issue-number: ${{ github.event.issue.number }} + body: | + @${{ github.actor }} I tried to fix the linting errors, but it didn't work. Please fix them manually. + See [CI log](https://github.com/nf-core/phaseimpute/actions/runs/${{ github.run_id }}) for more details. diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 1e0827a8..a502573c 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -1,50 +1,83 @@ name: nf-core linting # This workflow is triggered on pushes and PRs to the repository. -# It runs the `nf-core lint` and markdown lint tests to ensure that the code meets the nf-core guidelines +# It runs the `nf-core pipelines lint` and markdown lint tests to ensure +# that the code meets the nf-core guidelines. on: push: + branches: + - dev pull_request: release: types: [published] jobs: - Markdown: + pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - uses: actions/setup-node@v1 - with: - node-version: '10' - - name: Install markdownlint - run: npm install -g markdownlint-cli - - name: Run Markdownlint - run: markdownlint ${GITHUB_WORKSPACE} -c ${GITHUB_WORKSPACE}/.github/markdownlint.yml - YAML: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v1 - - uses: actions/setup-node@v1 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + + - name: Set up Python 3.12 + uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - node-version: '10' - - name: Install yaml-lint - run: npm install -g yaml-lint - - name: Run yaml-lint - run: yamllint $(find ${GITHUB_WORKSPACE} -type f -name "*.yml") + python-version: "3.12" + + - name: Install pre-commit + run: pip install pre-commit + + - name: Run pre-commit + run: pre-commit run --all-files + nf-core: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - name: Check out pipeline code + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + - name: Install Nextflow - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - uses: actions/setup-python@v1 + uses: nf-core/setup-nextflow@v2 + + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" + architecture: "x64" + + - name: read .nf-core.yml + uses: pietrobolcato/action-read-yaml@1.1.0 + id: read_yml with: - python-version: '3.6' - architecture: 'x64' + config: ${{ github.workspace }}/.nf-core.yml + - name: Install dependencies run: | python -m pip install --upgrade pip - pip install nf-core - - name: Run nf-core lint - run: nf-core lint ${GITHUB_WORKSPACE} + pip install nf-core==${{ steps.read_yml.outputs['nf_core_version'] }} + + - name: Run nf-core pipelines lint + if: ${{ github.base_ref != 'master' }} + env: + GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} + run: nf-core -l lint_log.txt pipelines lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md + + - name: Run nf-core pipelines lint --release + if: ${{ github.base_ref == 'master' }} + env: + GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} + run: nf-core -l lint_log.txt pipelines lint --release --dir ${GITHUB_WORKSPACE} --markdown lint_results.md + + - name: Save PR number + if: ${{ always() }} + run: echo ${{ github.event.pull_request.number }} > PR_number.txt + + - name: Upload linting log file artifact + if: ${{ always() }} + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4 + with: + name: linting-logs + path: | + lint_log.txt + lint_results.md + PR_number.txt diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml new file mode 100644 index 00000000..42e519bf --- /dev/null +++ b/.github/workflows/linting_comment.yml @@ -0,0 +1,28 @@ +name: nf-core linting comment +# This workflow is triggered after the linting action is complete +# It posts an automated comment to the PR, even if the PR is coming from a fork + +on: + workflow_run: + workflows: ["nf-core linting"] + +jobs: + test: + runs-on: ubuntu-latest + steps: + - name: Download lint results + uses: dawidd6/action-download-artifact@bf251b5aa9c2f7eeb574a96ee720e24f801b7c11 # v6 + with: + workflow: linting.yml + workflow_conclusion: completed + + - name: Get PR number + id: pr_number + run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT + + - name: Post PR comment + uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # v2 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + number: ${{ steps.pr_number.outputs.pr_number }} + path: linting-logs/lint_results.md diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml new file mode 100644 index 00000000..c6ba35df --- /dev/null +++ b/.github/workflows/release-announcements.yml @@ -0,0 +1,75 @@ +name: release-announcements +# Automatic release toot and tweet anouncements +on: + release: + types: [published] + workflow_dispatch: + +jobs: + toot: + runs-on: ubuntu-latest + steps: + - name: get topics and convert to hashtags + id: get_topics + run: | + echo "topics=$(curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ')" | sed 's/-//g' >> $GITHUB_OUTPUT + + - uses: rzr/fediverse-action@master + with: + access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} + host: "mstdn.science" # custom host if not "mastodon.social" (default) + # GitHub event payload + # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#release + message: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + + ${{ steps.get_topics.outputs.topics }} #nfcore #openscience #nextflow #bioinformatics + + send-tweet: + runs-on: ubuntu-latest + + steps: + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.10" + - name: Install dependencies + run: pip install tweepy==4.14.0 + - name: Send tweet + shell: python + run: | + import os + import tweepy + + client = tweepy.Client( + access_token=os.getenv("TWITTER_ACCESS_TOKEN"), + access_token_secret=os.getenv("TWITTER_ACCESS_TOKEN_SECRET"), + consumer_key=os.getenv("TWITTER_CONSUMER_KEY"), + consumer_secret=os.getenv("TWITTER_CONSUMER_SECRET"), + ) + tweet = os.getenv("TWEET") + client.create_tweet(text=tweet) + env: + TWEET: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + TWITTER_CONSUMER_KEY: ${{ secrets.TWITTER_CONSUMER_KEY }} + TWITTER_CONSUMER_SECRET: ${{ secrets.TWITTER_CONSUMER_SECRET }} + TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }} + TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }} + + bsky-post: + runs-on: ubuntu-latest + steps: + - uses: zentered/bluesky-post-action@80dbe0a7697de18c15ad22f4619919ceb5ccf597 # v0.1.0 + with: + post: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + env: + BSKY_IDENTIFIER: ${{ secrets.BSKY_IDENTIFIER }} + BSKY_PASSWORD: ${{ secrets.BSKY_PASSWORD }} + # diff --git a/.github/workflows/template_version_comment.yml b/.github/workflows/template_version_comment.yml new file mode 100644 index 00000000..e8aafe44 --- /dev/null +++ b/.github/workflows/template_version_comment.yml @@ -0,0 +1,46 @@ +name: nf-core template version comment +# This workflow is triggered on PRs to check if the pipeline template version matches the latest nf-core version. +# It posts a comment to the PR, even if it comes from a fork. + +on: pull_request_target + +jobs: + template_version: + runs-on: ubuntu-latest + steps: + - name: Check out pipeline code + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + with: + ref: ${{ github.event.pull_request.head.sha }} + + - name: Read template version from .nf-core.yml + uses: nichmor/minimal-read-yaml@v0.0.2 + id: read_yml + with: + config: ${{ github.workspace }}/.nf-core.yml + + - name: Install nf-core + run: | + python -m pip install --upgrade pip + pip install nf-core==${{ steps.read_yml.outputs['nf_core_version'] }} + + - name: Check nf-core outdated + id: nf_core_outdated + run: echo "OUTPUT=$(pip list --outdated | grep nf-core)" >> ${GITHUB_ENV} + + - name: Post nf-core template version comment + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 + if: | + contains(env.OUTPUT, 'nf-core') + with: + repo-token: ${{ secrets.NF_CORE_BOT_AUTH_TOKEN }} + allow-repeats: false + message: | + > [!WARNING] + > Newer version of the nf-core template is available. + > + > Your pipeline is using an old version of the nf-core template: ${{ steps.read_yml.outputs['nf_core_version'] }}. + > Please update your pipeline to the latest version. + > + > For more documentation on how to update your pipeline, please see the [nf-core documentation](https://github.com/nf-core/tools?tab=readme-ov-file#sync-a-pipeline-with-the-template) and [Synchronisation documentation](https://nf-co.re/docs/contributing/sync). + # diff --git a/.gitignore b/.gitignore index 6354f370..3d4a629b 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,10 @@ work/ data/ results/ .DS_Store -tests/ testing/ +testing* *.pyc +null/ +*.code-workspace +.nf-test* +*.xml.bkp diff --git a/.gitpod.yml b/.gitpod.yml new file mode 100644 index 00000000..46118637 --- /dev/null +++ b/.gitpod.yml @@ -0,0 +1,17 @@ +image: nfcore/gitpod:latest +tasks: + - name: Update Nextflow and setup pre-commit + command: | + pre-commit install --install-hooks + nextflow self-update + +vscode: + extensions: # based on nf-core.nf-core-extensionpack + #- esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code + - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files + - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar + - mechatroner.rainbow-csv # Highlight columns in csv files in different colors + - nextflow.nextflow # Nextflow syntax highlighting + - oderwat.indent-rainbow # Highlight indentation level + - streetsidesoftware.code-spell-checker # Spelling checker for source code + - charliermarsh.ruff # Code linter Ruff diff --git a/.nf-core.yml b/.nf-core.yml new file mode 100644 index 00000000..46b1c323 --- /dev/null +++ b/.nf-core.yml @@ -0,0 +1,15 @@ +bump_version: null +nf_core_version: 3.0.2 +org_path: null +repository_type: pipeline +template: + author: Louis Le Nezet, Anabella Trigila + description: Phasing and imputation pipeline + force: false + is_nfcore: true + name: phaseimpute + org: nf-core + outdir: . + skip_features: null + version: 1.0.0 +update: null diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..9e9f0e1c --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,13 @@ +repos: + - repo: https://github.com/pre-commit/mirrors-prettier + rev: "v3.1.0" + hooks: + - id: prettier + additional_dependencies: + - prettier@3.2.5 + + - repo: https://github.com/editorconfig-checker/editorconfig-checker.python + rev: "3.0.3" + hooks: + - id: editorconfig-checker + alias: ec diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 00000000..437d763d --- /dev/null +++ b/.prettierignore @@ -0,0 +1,12 @@ +email_template.html +adaptivecard.json +slackreport.json +.nextflow* +work/ +data/ +results/ +.DS_Store +testing/ +testing* +*.pyc +bin/ diff --git a/.prettierrc.yml b/.prettierrc.yml new file mode 100644 index 00000000..c81f9a76 --- /dev/null +++ b/.prettierrc.yml @@ -0,0 +1 @@ +printWidth: 120 diff --git a/CHANGELOG.md b/CHANGELOG.md index 77c0bebf..af1f5a98 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,16 +1,119 @@ # nf-core/phaseimpute: Changelog -The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) -and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v1.0dev - [date] +## v1.0.0 - Black Labrador [2024-12-09] -Initial release of nf-core/phaseimpute, created with the [nf-core](http://nf-co.re/) template. +Initial release of nf-core/phaseimpute, created with the [nf-core](https://nf-co.re/) template. +Special thanks to [Matthias Hörtenhuber](https://github.com/mashehu), [Mazzalab](https://github.com/mazzalab) and [Sofia Stamouli](https://github.com/sofstam) for the review of this release. ### `Added` +- [#20](https://github.com/nf-core/phaseimpute/pull/20) - Added automatic detection of vcf contigs for the reference panel and automatic renaming available. +- [#22](https://github.com/nf-core/phaseimpute/pull/20) - Add validation step for concordance analysis. Input channels changed to match inputs steps. Outdir folder organised by steps. Modules config by subworkflows. +- [#26](https://github.com/nf-core/phaseimpute/pull/26) - Added QUILT method. +- [#47](https://github.com/nf-core/phaseimpute/pull/47) - Add possibility to remove samples from reference panel. Add glimpse2 chunking method. Add full-size test parameters. +- [#58](https://github.com/nf-core/phaseimpute/pull/58) - Add external params posfile and chunks. Add glimpse2 phasing and imputation. +- [#67](https://github.com/nf-core/phaseimpute/pull/67) - Export CSVs from each step. +- [#71](https://github.com/nf-core/phaseimpute/pull/71) - Allow external panel to be used in step impute. +- [#97](https://github.com/nf-core/phaseimpute/pull/97) - Add dog reference panel and config to test pipeline with other species. +- [#102](https://github.com/nf-core/phaseimpute/pull/102) - Add dog panel test. +- [#119](https://github.com/nf-core/phaseimpute/pull/119) - Add dog test with panelprep and imputation. +- [#118](https://github.com/nf-core/phaseimpute/pull/118) - Explain how to customize arguments in the pipeline. +- [#111](https://github.com/nf-core/phaseimpute/pull/111) - Add nf-test for all subworkflow, workflow, modules and functions. +- [#131](https://github.com/nf-core/phaseimpute/pull/131) - Set normalisation as optional. Fix extension detection function. Add support for validation with vcf files. Concatenate vcf only if more than one file. Change `--phased` to `--phase` for consistency. +- [#143](https://github.com/nf-core/phaseimpute/pull/143) - Improve contigs warning and error logging. The number of chromosomes contigs is summarized if above `max_chr_names`. +- [#146](https://github.com/nf-core/phaseimpute/pull/146) - Add `seed` parameter for `QUILT`. +- [#164](https://github.com/nf-core/phaseimpute/pull/164) - Add additional requirement on input schema `"uniqueEntries": ["panel", "chr"]` and `end` should be greater than `start` in regions. + +### `Changed` + +- [#18](https://github.com/nf-core/phaseimpute/pull/18) - Maps and region by chromosome. Update tests config files. Correct meta map propagation. `test_impute` and `test_sim` works. +- [#19](https://github.com/nf-core/phaseimpute/pull/19) - Changed reference panel to accept a csv, update modules and subworkflows (glimpse1/2 and shapeit5) +- [#40](https://github.com/nf-core/phaseimpute/pull/40) - Add `STITCH` method. Reorganize panelprep subworkflows. +- [#51](https://github.com/nf-core/phaseimpute/pull/51) - Update all process and fix linting errors. Remove `FASTQC` added by the template. +- [#56](https://github.com/nf-core/phaseimpute/pull/56) - Move to nf-test to check the output files names generated. Fix validation and concatenation by chromosomes missing. Add dedicated GLIMPSE1 subworkflow. Fix posfile generation to be done once for glimpse and stitch. +- [#68](https://github.com/nf-core/phaseimpute/pull/68) - `QUILT` can handle external params chunks and hap-legend files. +- [#78](https://github.com/nf-core/phaseimpute/pull/78) - Separate validate step from panel preparation. +- [#84](https://github.com/nf-core/phaseimpute/pull/84) - Change depth computation to use `SAMTOOLS_DEPTH` and make separation by chromosome only if regions are specified. +- [#85](https://github.com/nf-core/phaseimpute/pull/85) - Use external params in individual tests for tools. +- [#86](https://github.com/nf-core/phaseimpute/pull/86) - Move `BCFTOOLS_CONVERT` to `VCF_SITES_EXTRACT_BCFTOOLS`. +- [#88](https://github.com/nf-core/phaseimpute/pull/88) - Improve multiQC report with more information. +- [#91](https://github.com/nf-core/phaseimpute/pull/91) - Update metro map with all steps and remove deprecated ones. +- [#93](https://github.com/nf-core/phaseimpute/pull/93) - Add support for CRAM file. +- [#93](https://github.com/nf-core/phaseimpute/pull/93) - Check contigs name at workflow level for BAM and VCF. +- [#93](https://github.com/nf-core/phaseimpute/pull/93) - Samples remove with multi-allelics records. +- [#93](https://github.com/nf-core/phaseimpute/pull/93) - Samtools merge in `BAM_REGION` subworkflow. +- [#93](https://github.com/nf-core/phaseimpute/pull/93) - Fix glimpse2_phase output file names. +- [#93](https://github.com/nf-core/phaseimpute/pull/93) - Fix fai combination to fasta. +- [#96](https://github.com/nf-core/phaseimpute/pull/96) - Simplify csv export +- [#96](https://github.com/nf-core/phaseimpute/pull/96) - Use only legend file as posfile for all imputation workflow. +- [#100](https://github.com/nf-core/phaseimpute/pull/100) - Update bcftools, samtools, ... nf-core modules. All indexing is now done with the file creation for most of them. +- [#101](https://github.com/nf-core/phaseimpute/pull/101) - Set `--compute_freq` as `false` by default. +- [#102](https://github.com/nf-core/phaseimpute/pull/102) - Compute chr name from whole vcf. +- [#102](https://github.com/nf-core/phaseimpute/pull/102) - Only warn the user if some contigs are absent from files, the regions to compute is now the intersection of regions, panel, posfile, chunks, map. +- [#102](https://github.com/nf-core/phaseimpute/pull/102) - Update all test and recompute snapshot to match new version of the phaseimpute test dataset. +- [#103](https://github.com/nf-core/phaseimpute/pull/103) - Update `GLIMPSE2_PHASE`, `GUNZIP` and `MULTIQC` +- [#135](https://github.com/nf-core/phaseimpute/pull/135) - Impute by batch of 100 individuals by default using `--batch_size` parameter. All individuals BAM files are gathered and VCF are allowed for `GLIMPSE1` and `GLIMPSE2`. Channel preprocessing of stitch is done in stitch subworkflow. Genotype likelihood computation for `GLIMPSE1` is now done outside of the subworkflow and merge the resulting vcf with all the samples. New test added to check batch separation. Improve `usage.md` documentation. Add validation to initialization of the pipeline to ensure compatibility between tools, steps and the files provided by the user. +- [#139](https://github.com/nf-core/phaseimpute/pull/139) - Update all nf-core modules. +- [#146](https://github.com/nf-core/phaseimpute/pull/146) - Remove conda CI check for PR due to Nextflow error. +- [#144](https://github.com/nf-core/phaseimpute/pull/144) - Documentation updates. +- [#148](https://github.com/nf-core/phaseimpute/pull/148) - Fix AWS fulltest github action for manual dispatch. +- [#149](https://github.com/nf-core/phaseimpute/pull/149) - Remove the map file from the AWS fulltest. +- [#152](https://github.com/nf-core/phaseimpute/pull/152) - Fix URLs in the documentation and remove tools citation in the README, use a white background for all images in the documentation. +- [#153](https://github.com/nf-core/phaseimpute/pull/153) - Update and simplify subworkflows snapshot and check only for files names (no md5sum for bam and vcf files due to timestamp). +- [#157](https://github.com/nf-core/phaseimpute/pull/157) - Add `chunk_model` as parameter for better control over `GLIMPSE2_CHUNK` and set window size in `GLIMPSE1_CHUNK` and `GLIMPSE2_chunk` to 4mb to reduce number of chunks (empirical). +- [#160](https://github.com/nf-core/phaseimpute/pull/160) - Improve `CHANGELOG.md` and add details to `usage.md` +- [#158](https://github.com/nf-core/phaseimpute/pull/158) - Remove frequency computation and phasing from full test to reduce cost and computational time. +- [#164](https://github.com/nf-core/phaseimpute/pull/164) - Rename `BAM_REGION_SAMTOOLS` to `BAM_EXTRACT_REGION_SAMTOOLS`. Remove `GLIMPSE2_SPLITREFERENCE` as it is not used. Add more steps to `test_all` profile for more exhaustivity. +- [#163](https://github.com/nf-core/phaseimpute/pull/163) - Improve configuration for demanding processes. Use Genome in a Bottle VCF benchmarking file for AWS full test. Moved from `glimpse1` to `glimpse2` for the full test profile. +- [#165](https://github.com/nf-core/phaseimpute/pull/165) - Update metro map and add logo to the documentation. + ### `Fixed` +- [#15](https://github.com/nf-core/phaseimpute/pull/15) - Changed test csv files to point to nf-core repository. +- [#16](https://github.com/nf-core/phaseimpute/pull/16) - Removed `outdir` from test config files. +- [#65](https://github.com/nf-core/phaseimpute/pull/65) - Separate stitch output by individuals. +- [#75](https://github.com/nf-core/phaseimpute/pull/75) - Set frequency computation with `VCFFIXUP` process as optional with `--compute_freq`. Use `GLIMPSE_CHUNK` on panel vcf to compute the chunk and not makewindows on fasta. +- [#117](https://github.com/nf-core/phaseimpute/pull/117) - Fix directories in CSV. +- [#151](https://github.com/nf-core/phaseimpute/pull/151) - Fix `Type not supported: class org.codehaus.groovy.runtime.GStringImpl` error due to `String` test in `getFileExtension()`. +- [#158](https://github.com/nf-core/phaseimpute/pull/158) - Fix contigs usage when regions is only a subset of the given contigs (e.g. if panel file has the 22 chr and the region file only 2 then only the 2 common will be processed). Fix `multiQC` samples names for better comprehension. Fix `-resume` errors when `ch_fasta` is use by adding `cache = 'lenient'` in necessary processes. Fix `--window-size` of `GLIMPSE_CHUNK` from `4` to `4000000`. +- [#153](https://github.com/nf-core/phaseimpute/pull/153) - Fix getFileExtension function. Fix image in `usage.md`. Fix small warnings and errors with updated language server. `def` has been added when necessary, `:` use instead of `,` in assertions, `_` added to variables not used in closures, `for` loop replaced by `.each{}`, remove unused code / input. +- [#161](https://github.com/nf-core/phaseimpute/pull/161) - Fix `VCF_SPLIT_BCFTOOLS` when only one sample present by updating `BCFTOOLS_PLUGINSPLIT` and adding `BCFTOOLS_QUERY` to get truth samples names for renaming the resulting files. +- [#162](https://github.com/nf-core/phaseimpute/pull/162) - Fix `fai` usage when provided by `genomes` parameter. +- [#164](https://github.com/nf-core/phaseimpute/pull/164) - Improve documentation writing +- [#163](https://github.com/nf-core/phaseimpute/pull/163) - Fix MULTIQC samples names (add post-processing for clean up `FILTER_CHR_DWN`, `FILTER_CHR_INP`, `GAWK_ERROR_SPL`, `GAWK_RSQUARE_SPL`). Fix output panel `publisDir`. Fix java version to `17` in `ci.yml` due to new nextflow version. + ### `Dependencies` +| Dependency | New version | +| ------------- | ----------- | +| `bcftools` | 1.20 | +| `bedtools` | 2.31.1 | +| `gawk` | 5.3.0 | +| `glimpse-bio` | 1.1.1 | +| `glimpse-bio` | 2.0.1 | +| `gunzip` | 1.10 | +| `htslib` | 1.21 | +| `multiqc` | 1.25.1 | +| `r-quilt` | 1.0.5 | +| `r-stitch` | 1.6.10 | +| `samtools` | 1.21 | +| `shapeit5` | 1.0.0 | +| `tabix` | 1.11 | +| `vcflib` | 1.0.3 | + ### `Deprecated` + +### `Contributors` + +[Louis Le Nézet](https://github.com/LouisLeNezet) +[Anabella Trigila](https://github.com/atrigila) +[Eugenia Fontecha](https://github.com/eugeniafontecha) +[Maxime U Garcia](https://github.com/maxulysse) +[Matias Romero Victorica](https://github.com/mrvictorica) +[Nicolas Schcolnicov](https://github.com/nschcolnicov) +[Hemanoel Passarelli](https://github.com/hemanoel) +[Matthias Hörtenhuber](https://github.com/mashehu) +[Sofia Stamouli](https://github.com/sofstam) diff --git a/CITATIONS.md b/CITATIONS.md new file mode 100644 index 00000000..ed82cbf7 --- /dev/null +++ b/CITATIONS.md @@ -0,0 +1,61 @@ +# nf-core/phaseimpute: Citations + +## [nf-core](https://pubmed.ncbi.nlm.nih.gov/32055031/) + +> Ewels PA, Peltzer A, Fillinger S, Patel H, Alneberg J, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. The nf-core framework for community-curated bioinformatics pipelines. Nat Biotechnol. 2020 Mar;38(3):276-278. doi: 10.1038/s41587-020-0439-x. PubMed PMID: 32055031. + +## [Nextflow](https://pubmed.ncbi.nlm.nih.gov/28398311/) + +> Di Tommaso P, Chatzou M, Floden EW, Barja PP, Palumbo E, Notredame C. Nextflow enables reproducible computational workflows. Nat Biotechnol. 2017 Apr 11;35(4):316-319. doi: 10.1038/nbt.3820. PubMed PMID: 28398311. + +## Pipeline tools + +- [bcftools](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3198575/) + +> Li, H. (2011). A statistical framework for SNP calling, mutation discovery, association mapping and population genetical parameter estimation from sequencing data. Bioinformatics, 27(21), 2987-2993. + +- [GLIMPSE](https://www.nature.com/articles/s41588-020-00756-0) + +> Rubinacci, S., Ribeiro, D. M., Hofmeister, R. J., & Delaneau, O. (2021). Efficient phasing and imputation of low-coverage sequencing data using large reference panels. Nature Genetics, 53(1), 120-126. + +- [GLIMPSE2](https://doi.org/10.1038/s41588-023-01438-3) + +> Rubinacci, S., Hofmeister, R. J., Sousa da Mota, B., & Delaneau, O. (2023). Imputation of low-coverage sequencing data from 150,119 UK Biobank genomes. Nature genetics 55, 1088–1090. + +- [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) + +> Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. + +- [QUILT](https://pubmed.ncbi.nlm.nih.gov/34083788/) + +> Davies, R. W., Kucka, M., Su, D., Shi, S., Flanagan, M., Cunniff, C. M., ... & Myers, S. (2021). Rapid genotype imputation from sequence with reference panels. Nature genetics, 53(7), 1104-1111. + +- [Shapeit](https://doi.org/10.1038/s41588-023-01415-w) + +> Hofmeister RJ, Ribeiro DM, Rubinacci S., Delaneau O. (2023). Accurate rare variant phasing of whole-genome and whole-exome sequencing data in the UK Biobank. Nature Genetics doi: https://doi.org/10.1038/s41588-023-01415-w + +- [STITCH](https://doi.org/10.1038/ng.3594) + +> Davies, R. W., Flint, J., Myers, S., & Mott, R. (2016). Rapid genotype imputation from sequence without reference panels. Nature genetics 48, 965–969. + +## Software packaging/containerisation tools + +- [Anaconda](https://anaconda.com) + +> Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. + +- [Bioconda](https://pubmed.ncbi.nlm.nih.gov/29967506/) + +> Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506. + +- [BioContainers](https://pubmed.ncbi.nlm.nih.gov/28379341/) + +> da Veiga Leprevost F, Grüning B, Aflitos SA, Röst HL, Uszkoreit J, Barsnes H, Vaudel M, Moreno P, Gatto L, Weber J, Bai M, Jimenez RC, Sachsenberg T, Pfeuffer J, Alvarez RV, Griss J, Nesvizhskii AI, Perez-Riverol Y. BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics. 2017 Aug 15;33(16):2580-2582. doi: 10.1093/bioinformatics/btx192. PubMed PMID: 28379341; PubMed Central PMCID: PMC5870671. + +- [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) + +> Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2. doi: 10.5555/2600239.2600241. + +- [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) + +> Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index cf930c8a..c089ec78 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,46 +1,182 @@ -# Contributor Covenant Code of Conduct +# Code of Conduct at nf-core (v1.4) ## Our Pledge -In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. +In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: -## Our Standards +- Age +- Ability +- Body size +- Caste +- Familial status +- Gender identity and expression +- Geographical location +- Level of experience +- Nationality and national origins +- Native language +- Neurodiversity +- Race or ethnicity +- Religion +- Sexual identity and orientation +- Socioeconomic status -Examples of behavior that contributes to creating a positive environment include: +Please note that the list above is alphabetised and is therefore not ranked in any order of preference or importance. -* Using welcoming and inclusive language -* Being respectful of differing viewpoints and experiences -* Gracefully accepting constructive criticism -* Focusing on what is best for the community -* Showing empathy towards other community members +## Preamble -Examples of unacceptable behavior by participants include: +:::note +This Code of Conduct (CoC) has been drafted by Renuka Kudva, Cris Tuñí, and Michael Heuer, with input from the nf-core Core Team and Susanna Marquez from the nf-core community. "We", in this document, refers to the Safety Officers and members of the nf-core Core Team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will be amended periodically to keep it up-to-date. In case of any dispute, the most current version will apply. +::: -* The use of sexualized language or imagery and unwelcome sexual attention or advances -* Trolling, insulting/derogatory comments, and personal or political attacks -* Public or private harassment -* Publishing others' private information, such as a physical or electronic address, without explicit permission -* Other conduct which could reasonably be considered inappropriate in a professional setting +An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). + +Our Safety Officers are Saba Nafees, Cris Tuñí, and Michael Heuer. + +nf-core is a young and growing community that welcomes contributions from anyone with a shared vision for [Open Science Policies](https://www.fosteropenscience.eu/taxonomy/term/8). Open science policies encompass inclusive behaviours and we strive to build and maintain a safe and inclusive environment for all individuals. + +We have therefore adopted this CoC, which we require all members of our community and attendees of nf-core events to adhere to in all our workspaces at all times. Workspaces include, but are not limited to, Slack, meetings on Zoom, gather.town, YouTube live etc. + +Our CoC will be strictly enforced and the nf-core team reserves the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. + +We ask all members of our community to help maintain supportive and productive workspaces and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. + +Questions, concerns, or ideas on what we can include? Contact members of the Safety Team on Slack or email safety [at] nf-co [dot] re. ## Our Responsibilities -Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. +Members of the Safety Team (the Safety Officers) are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. + +The Safety Team, in consultation with the nf-core core team, have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this CoC, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. + +Members of the core team or the Safety Team who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and will be subject to the same actions as others in violation of the CoC. + +## When and where does this Code of Conduct apply? + +Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events, such as hackathons, workshops, bytesize, and collaborative workspaces on gather.town. These guidelines include, but are not limited to, the following (listed alphabetically and therefore in no order of preference): + +- Communicating with an official project email address. +- Communicating with community members within the nf-core Slack channel. +- Participating in hackathons organised by nf-core (both online and in-person events). +- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence, and on the nf-core gather.town workspace. +- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, gather.town, Jitsi, YouTube live etc. +- Representing nf-core on social media. This includes both official and personal accounts. + +## nf-core cares 😊 + +nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include, but are not limited to, the following (listed in alphabetical order): + +- Ask for consent before sharing another community member’s personal information (including photographs) on social media. +- Be respectful of differing viewpoints and experiences. We are all here to learn from one another and a difference in opinion can present a good learning opportunity. +- Celebrate your accomplishments! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) +- Demonstrate empathy towards other community members. (We don’t all have the same amount of time to dedicate to nf-core. If tasks are pending, don’t hesitate to gently remind members of your team. If you are leading a task, ask for help if you feel overwhelmed.) +- Engage with and enquire after others. (This is especially important given the geographically remote nature of the nf-core community, so let’s do this the best we can) +- Focus on what is best for the team and the community. (When in doubt, ask) +- Accept feedback, yet be unafraid to question, deliberate, and learn. +- Introduce yourself to members of the community. (We’ve all been outsiders and we know that talking to strangers can be hard for some, but remember we’re interested in getting to know you and your visions for open science!) +- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communication to be kind.**) +- Take breaks when you feel like you need them. +- Use welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack) + +## nf-core frowns on 😕 + +The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this CoC. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces: + +- Deliberate intimidation, stalking or following and sustained disruption of communication among participants of the community. This includes hijacking shared screens through actions such as using the annotate tool in conferencing software such as Zoom. +- “Doxing” i.e. posting (or threatening to post) another person’s personal identifying information online. +- Spamming or trolling of individuals on social media. +- Use of sexual or discriminatory imagery, comments, jokes, or unwelcome sexual attention. +- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion, or work experience. + +### Online Trolling + +The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the risk of online trolling. This is unacceptable — reports of such behaviour will be taken very seriously and perpetrators will be excluded from activities immediately. + +All community members are **required** to ask members of the group they are working with for explicit consent prior to taking screenshots of individuals during video calls. + +## Procedures for reporting CoC violations + +If someone makes you feel uncomfortable through their behaviours or actions, report it as soon as possible. + +You can reach out to members of the Safety Team (Saba Nafees, Cris Tuñí, and Michael Heuer) on Slack. Alternatively, contact a member of the nf-core core team [nf-core core team](https://nf-co.re/about), and they will forward your concerns to the Safety Team. + +Issues directly concerning members of the Core Team or the Safety Team will be dealt with by other members of the core team and the safety manager — possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson and details will be shared in due course. -Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. +All reports will be handled with the utmost discretion and confidentiality. -## Scope +You can also report any CoC violations to safety [at] nf-co [dot] re. In your email report, please do your best to include: -This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. +- Your contact information. +- Identifying information (e.g. names, nicknames, pseudonyms) of the participant who has violated the Code of Conduct. +- The behaviour that was in violation and the circumstances surrounding the incident. +- The approximate time of the behaviour (if different than the time the report was made). +- Other people involved in the incident, if applicable. +- If you believe the incident is ongoing. +- If there is a publicly available record (e.g. mailing list record, a screenshot). +- Any additional information. + +After you file a report, one or more members of our Safety Team will contact you to follow up on your report. + +## Who will read and handle reports + +All reports will be read and handled by the members of the Safety Team at nf-core. + +If members of the Safety Team are deemed to have a conflict of interest with a report, they will be required to recuse themselves as per our Code of Conduct and will not have access to any follow-ups. + +To keep this first report confidential from any of the Safety Team members, please submit your first report by direct messaging on Slack/direct email to any of the nf-core members you are comfortable disclosing the information to, and be explicit about which member(s) you do not consent to sharing the information with. + +## Reviewing reports + +After receiving the report, members of the Safety Team will review the incident report to determine whether immediate action is required, for example, whether there is immediate threat to participants’ safety. + +The Safety Team, in consultation with members of the nf-core core team, will assess the information to determine whether the report constitutes a Code of Conduct violation, for them to decide on a course of action. + +In the case of insufficient information, one or more members of the Safety Team may contact the reporter, the reportee, or any other attendees to obtain more information. + +Once additional information is gathered, the Safety Team will collectively review and decide on the best course of action to take, if any. The Safety Team reserves the right to not act on a report. + +## Confidentiality + +All reports, and any additional information included, are only shared with the team of safety officers (and possibly members of the core team, in case the safety officer is in violation of the CoC). We will respect confidentiality requests for the purpose of protecting victims of abuse. + +We will not name harassment victims, beyond discussions between the safety officer and members of the nf-core team, without the explicit consent of the individuals involved. ## Enforcement -Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team on [Slack](https://nf-co.re/join/slack). The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. +Actions taken by the nf-core’s Safety Team may include, but are not limited to: + +- Asking anyone to stop a behaviour. +- Asking anyone to leave the event and online spaces either temporarily, for the remainder of the event, or permanently. +- Removing access to the gather.town and Slack, either temporarily or permanently. +- Communicating to all participants to reinforce our expectations for conduct and remind what is unacceptable behaviour; this may be public for practical reasons. +- Communicating to all participants that an incident has taken place and how we will act or have acted — this may be for the purpose of letting event participants know we are aware of and dealing with the incident. +- Banning anyone from participating in nf-core-managed spaces, future events, and activities, either temporarily or permanently. +- No action. + +## Attribution and Acknowledgements + +- The [Contributor Covenant, version 1.4](http://contributor-covenant.org/version/1/4) +- The [OpenCon 2017 Code of Conduct](http://www.opencon2017.org/code_of_conduct) (CC BY 4.0 OpenCon organisers, SPARC and Right to Research Coalition) +- The [eLife innovation sprint 2020 Code of Conduct](https://sprint.elifesciences.org/code-of-conduct/) +- The [Mozilla Community Participation Guidelines v3.1](https://www.mozilla.org/en-US/about/governance/policies/participation/) (version 3.1, CC BY-SA 3.0 Mozilla) + +## Changelog + +### v1.4 - February 8th, 2022 + +- Included a new member of the Safety Team. Corrected a typographical error in the text. + +### v1.3 - December 10th, 2021 + +- Added a statement that the CoC applies to nf-core gather.town workspaces. Corrected typographical errors in the text. + +### v1.2 - November 12th, 2021 + +- Removed information specific to reporting CoC violations at the Hackathon in October 2021. -Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. +### v1.1 - October 14th, 2021 -## Attribution +- Updated with names of new Safety Officers and specific information for the hackathon in October 2021. -This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] +### v1.0 - March 15th, 2021 -[homepage]: http://contributor-covenant.org -[version]: http://contributor-covenant.org/version/1/4/ +- Complete rewrite from original [Contributor Covenant](http://contributor-covenant.org/) CoC. diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index b6bc3e90..00000000 --- a/Dockerfile +++ /dev/null @@ -1,13 +0,0 @@ -FROM nfcore/base:1.9 -LABEL authors="@louislenezet" \ - description="Docker image containing all software requirements for the nf-core/phaseimpute pipeline" - -# Install the conda environment -COPY environment.yml / -RUN conda env create -f /environment.yml && conda clean -a - -# Add conda installation dir to PATH (instead of doing 'conda activate') -ENV PATH /opt/conda/envs/nf-core-phaseimpute-1.0dev/bin:$PATH - -# Dump the details of the installed packages to a file for posterity -RUN conda env export --name nf-core-phaseimpute-1.0dev > nf-core-phaseimpute-1.0dev.yml diff --git a/LICENSE b/LICENSE index ac5e876f..bffa19f8 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) @louislenezet +Copyright (c) Louis Le Nezet, Anabella Trigila Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 87b79c24..51592d0d 100644 --- a/README.md +++ b/README.md @@ -1,71 +1,119 @@ -# ![nf-core/phaseimpute](docs/images/nf-core-phaseimpute_logo.png) +

+ + + nf-core/phaseimpute + +

-**Nf-core pipeline for phasing and imputing genomic data.**. +**Multi-step pipeline dedicated to genetic imputation from simulation to validation** -[![GitHub Actions CI Status](https://github.com/nf-core/phaseimpute/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/phaseimpute/actions) -[![GitHub Actions Linting Status](https://github.com/nf-core/phaseimpute/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/phaseimpute/actions) -[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A519.10.0-brightgreen.svg)](https://www.nextflow.io/) +[![GitHub Actions CI Status](https://github.com/nf-core/phaseimpute/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/phaseimpute/actions/workflows/ci.yml) +[![GitHub Actions Linting Status](https://github.com/nf-core/phaseimpute/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/phaseimpute/actions/workflows/linting.yml) [![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/phaseimpute/results) [![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) [![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/) +[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) +[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) +[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) +[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/phaseimpute) -[![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg)](http://bioconda.github.io/) -[![Docker](https://img.shields.io/docker/automated/nfcore/phaseimpute.svg)](https://hub.docker.com/r/nfcore/phaseimpute) +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23phaseimpute-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/phaseimpute) [![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core) [![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core) [![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) ## Introduction -The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with docker containers making installation trivial and results highly reproducible. +**nf-core/phaseimpute** is a bioinformatics pipeline to phase and impute genetic data. -## Quick Start +metromap -i. Install [`nextflow`](https://nf-co.re/usage/installation) +The whole pipeline consists of five main steps, each of which can be run separately and independently. Users are not required to run all steps sequentially and can select specific steps based on their needs: -ii. Install either [`Docker`](https://docs.docker.com/engine/installation/) or [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) for full pipeline reproducibility (please only use [`Conda`](https://conda.io/miniconda.html) as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles)) +1. **QC: Chromosome Name Check**: Ensures compatibility by validating that all expected contigs are present in the variant and alignment files. -iii. Download the pipeline and test it on a minimal dataset with a single command +2. **Simulation (`--simulate`)**: Generates artificial datasets by downsampling high-density data to simulate low-pass genetic information. This enables the comparison of imputation results against a high-quality dataset (truth set). Simulations may include: -```bash -nextflow run nf-core/phaseimpute -profile test, + - **Low-pass data generation** by downsampling BAM or CRAM files with [`samtools view -s`](https://www.htslib.org/doc/samtools-view.html) at different depths. + +3. **Panel Preparation (`--panelprep`)**: Prepares the reference panel through phasing, quality control, variant filtering, and annotation. Key processes include: + + - **Normalization** of the reference panel to retain essential variants. + - **Phasing** of haplotypes in the reference panel using [Shapeit5](https://odelaneau.github.io/shapeit5/). + - **Chunking** of the reference panel into specific regions across chromosomes. + - **Position Extraction** for targeted imputation sites. + +4. **Imputation (`--impute`)**: This is the primary step, where genotypes in the target dataset are imputed using the prepared reference panel. The main steps are: + + - **Imputation** of the target dataset using tools like [Glimpse1](https://odelaneau.github.io/GLIMPSE/glimpse1/index.html), [Glimpse2](https://odelaneau.github.io/GLIMPSE/), [Stitch](https://github.com/rwdavies/stitch), or [Quilt](https://github.com/rwdavies/QUILT). + - **Ligation** of imputed chunks to produce a final VCF file per sample, with all chromosomes unified. + +5. **Validation (`--validate`)**: Assesses imputation accuracy by comparing the imputed dataset to a truth dataset. This step leverages the [Glimpse2](https://odelaneau.github.io/GLIMPSE/) concordance process to summarize differences between two VCF files. + +For more detailed instructions, please refer to the [usage documentation](https://nf-co.re/phaseimpute/usage). + +## Usage + +> [!NOTE] +> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. + +The primary function of this pipeline is to impute a target dataset based on a phased panel. Begin by preparing a samplesheet with your input data, formatted as follows: + +```csv title="samplesheet.csv" +sample,file,index +SAMPLE_1X,/path/to/.,/path/to/. ``` -> Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile ` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment. +Each row represents either a bam or a cram file along with its corresponding index file. Ensure that all input files have consistent file extensions. + +For certain tools and steps within the pipeline, you will also need to provide a samplesheet for the reference panel. Here's an example of what a final samplesheet for a reference panel might look like, covering three chromosomes: -iv. Start running your own analysis! +```csv title="panel.csv" +panel,chr,vcf,index +Phase3,1,ALL.chr1.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz,ALL.chr1.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz.csi +Phase3,2,ALL.chr2.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz,ALL.chr2.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz.csi +Phase3,3,ALL.chr3.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz,ALL.chr3.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz.csi +``` - +## Running the pipeline + +Run one of the steps of the pipeline (imputation with glimpse1) using the following command and test profile: ```bash -nextflow run nf-core/phaseimpute -profile --reads '*_R{1,2}.fastq.gz' --genome GRCh37 +nextflow run nf-core/phaseimpute \ + -profile test, \ + --outdir ``` -See [usage docs](docs/usage.md) for all of the available options when running the pipeline. - -## Documentation +> [!WARNING] +> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files). -The nf-core/phaseimpute pipeline comes with documentation about the pipeline, found in the `docs/` directory: +For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/phaseimpute/usage) and the [parameter documentation](https://nf-co.re/phaseimpute/parameters). -1. [Installation](https://nf-co.re/usage/installation) -2. Pipeline configuration - * [Local installation](https://nf-co.re/usage/local_installation) - * [Adding your own system config](https://nf-co.re/usage/adding_own_config) - * [Reference genomes](https://nf-co.re/usage/reference_genomes) -3. [Running the pipeline](docs/usage.md) -4. [Output and how to interpret the results](docs/output.md) -5. [Troubleshooting](https://nf-co.re/usage/troubleshooting) +## Pipeline output - +To see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/phaseimpute/results) tab on the nf-core website pipeline page. +For more details on the output files and reports, please refer to the [output documentation](https://nf-co.re/phaseimpute/output). ## Credits -nf-core/phaseimpute was originally written by @louislenezet. +nf-core/phaseimpute was originally written by Louis Le Nézet & Anabella Trigila. + +We thank the following people for their extensive assistance in the development of this pipeline: + +- Saul Pierotti +- Eugenia Fontecha +- Matias Romero Victorica +- Hemanoel Passarelli ## Contributions and Support -If you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md). +If you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md). Further development tips can be found in the [development documentation](docs/development.md). + +For further information or help, don't hesitate to get in touch on the [Slack `#phaseimpute` channel](https://nfcore.slack.com/channels/phaseimpute) (you can join with [this invite](https://nf-co.re/join/slack)). + +## Citations -For further information or help, don't hesitate to get in touch on [Slack](https://nfcore.slack.com/channels/phaseimpute) (you can join with [this invite](https://nf-co.re/join/slack)). + -## Citation +If you use nf-core/phaseimpute for your analysis, please cite it using the following doi: [10.5281/zenodo.XXXXXX](https://doi.org/10.5281/zenodo.XXXXXX) - - +An extensive list of references for the tools used by the pipeline, including QUILT, GLIMPSE, and STITCH, can be found in the [`CITATIONS.md`](CITATIONS.md) file. You can cite the `nf-core` publication as follows: @@ -73,5 +121,4 @@ You can cite the `nf-core` publication as follows: > > Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen. > -> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x). -> ReadCube: [Full Access Link](https://rdcu.be/b1GjZ) +> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x). diff --git a/assets/adaptivecard.json b/assets/adaptivecard.json new file mode 100644 index 00000000..e7079ad6 --- /dev/null +++ b/assets/adaptivecard.json @@ -0,0 +1,67 @@ +{ + "type": "message", + "attachments": [ + { + "contentType": "application/vnd.microsoft.card.adaptive", + "contentUrl": null, + "content": { + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "msteams": { + "width": "Full" + }, + "type": "AdaptiveCard", + "version": "1.2", + "body": [ + { + "type": "TextBlock", + "size": "Large", + "weight": "Bolder", + "color": "<% if (success) { %>Good<% } else { %>Attention<%} %>", + "text": "nf-core/phaseimpute v${version} - ${runName}", + "wrap": true + }, + { + "type": "TextBlock", + "spacing": "None", + "text": "Completed at ${dateComplete} (duration: ${duration})", + "isSubtle": true, + "wrap": true + }, + { + "type": "TextBlock", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors. The full error message was: ${errorReport}.<% } %>", + "wrap": true + }, + { + "type": "TextBlock", + "text": "The command used to launch the workflow was as follows:", + "wrap": true + }, + { + "type": "TextBlock", + "text": "${commandLine}", + "isSubtle": true, + "wrap": true + } + ], + "actions": [ + { + "type": "Action.ShowCard", + "title": "Pipeline Configuration", + "card": { + "type": "AdaptiveCard", + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "body": [ + { + "type": "FactSet", + "facts": [<% out << summary.collect{ k,v -> "{\"title\": \"$k\", \"value\" : \"$v\"}"}.join(",\n") %> + ] + } + ] + } + } + ] + } + } + ] +} diff --git a/assets/chr_rename_add.txt b/assets/chr_rename_add.txt new file mode 100644 index 00000000..c48a2640 --- /dev/null +++ b/assets/chr_rename_add.txt @@ -0,0 +1,39 @@ +1 chr1 +2 chr2 +3 chr3 +4 chr4 +5 chr5 +6 chr6 +7 chr7 +8 chr8 +9 chr9 +10 chr10 +11 chr11 +12 chr12 +13 chr13 +14 chr14 +15 chr15 +16 chr16 +17 chr17 +18 chr18 +19 chr19 +20 chr20 +21 chr21 +22 chr22 +23 chr23 +24 chr24 +25 chr25 +26 chr26 +27 chr27 +28 chr28 +29 chr29 +30 chr30 +31 chr31 +32 chr32 +33 chr33 +34 chr34 +35 chr35 +36 chr36 +37 chr37 +38 chr38 +X chrX diff --git a/assets/chr_rename_del.txt b/assets/chr_rename_del.txt new file mode 100644 index 00000000..f324781a --- /dev/null +++ b/assets/chr_rename_del.txt @@ -0,0 +1,39 @@ +chr1 1 +chr2 2 +chr3 3 +chr4 4 +chr5 5 +chr6 6 +chr7 7 +chr8 8 +chr9 9 +chr10 10 +chr11 11 +chr12 12 +chr13 13 +chr14 14 +chr15 15 +chr16 16 +chr17 17 +chr18 18 +chr19 19 +chr20 20 +chr21 21 +chr22 22 +chr23 23 +chr24 24 +chr25 25 +chr26 26 +chr27 27 +chr28 28 +chr29 29 +chr30 30 +chr31 31 +chr32 32 +chr33 33 +chr34 34 +chr35 35 +chr36 36 +chr37 37 +chr38 38 +chrX X diff --git a/assets/email_template.html b/assets/email_template.html index 409966b2..d416687c 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -1,11 +1,10 @@ - - + nf-core/phaseimpute Pipeline Report @@ -13,7 +12,7 @@ -

nf-core/phaseimpute v${version}

+

nf-core/phaseimpute ${version}

Run Name: $runName

<% if (!success){ diff --git a/assets/email_template.txt b/assets/email_template.txt index d393d5dc..58b47420 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -4,9 +4,8 @@ |\\ | |__ __ / ` / \\ |__) |__ } { | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, `._,._,' - nf-core/phaseimpute v${version} + nf-core/phaseimpute ${version} ---------------------------------------------------- - Run Name: $runName <% if (success){ diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml new file mode 100644 index 00000000..38b29c7e --- /dev/null +++ b/assets/methods_description_template.yml @@ -0,0 +1,29 @@ +id: "nf-core-phaseimpute-methods-description" +description: "Suggested text and references to use when describing pipeline usage within the methods section of a publication." +section_name: "nf-core/phaseimpute Methods Description" +section_href: "https://github.com/nf-core/phaseimpute" +plot_type: "html" +## TODO nf-core: Update the HTML below to your preferred methods description, e.g. add publication citation for this pipeline +## You inject any metadata in the Nextflow '${workflow}' object +data: | +

Methods

+

Data was processed using nf-core/phaseimpute v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

+

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

+
${workflow.commandLine}
+

${tool_citations}

+

References

+
    +
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. doi: 10.1038/nbt.3820
  • +
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. doi: 10.1038/s41587-020-0439-x
  • +
  • Grüning, B., Dale, R., Sjödin, A., Chapman, B. A., Rowe, J., Tomkins-Tinch, C. H., Valieris, R., Köster, J., & Bioconda Team. (2018). Bioconda: sustainable and comprehensive software distribution for the life sciences. Nature Methods, 15(7), 475–476. doi: 10.1038/s41592-018-0046-7
  • +
  • da Veiga Leprevost, F., Grüning, B. A., Alves Aflitos, S., Röst, H. L., Uszkoreit, J., Barsnes, H., Vaudel, M., Moreno, P., Gatto, L., Weber, J., Bai, M., Jimenez, R. C., Sachsenberg, T., Pfeuffer, J., Vera Alvarez, R., Griss, J., Nesvizhskii, A. I., & Perez-Riverol, Y. (2017). BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics (Oxford, England), 33(16), 2580–2582. doi: 10.1093/bioinformatics/btx192
  • + ${tool_bibliography} +
+
+
Notes:
+
    + ${nodoi_text} +
  • The command above does not include parameters contained in any configs or profiles that may have been used. Ensure the config file is also uploaded with your publication!
  • +
  • You should also cite all software used within this run. Check the "Software Versions" of this report to get version information.
  • +
+
diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml deleted file mode 100644 index 95e1da0a..00000000 --- a/assets/multiqc_config.yaml +++ /dev/null @@ -1,11 +0,0 @@ -report_comment: > - This report has been generated by the nf-core/phaseimpute - analysis pipeline. For information about how to interpret these results, please see the - documentation. -report_section_order: - software_versions: - order: -1000 - nf-core-phaseimpute-summary: - order: -1001 - -export_plots: true diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml new file mode 100644 index 00000000..75c911d4 --- /dev/null +++ b/assets/multiqc_config.yml @@ -0,0 +1,26 @@ +report_comment: > + This report has been generated by the nf-core/phaseimpute + analysis pipeline. For information about how to interpret these results, please see the + documentation. +report_section_order: + "nf-core-phaseimpute-methods-description": + order: -1000 + software_versions: + order: -1001 + "nf-core-phaseimpute-summary": + order: -1002 + +top_modules: + - samtools + - bcftools + +export_plots: true + +disable_version_detection: true + +fn_clean_trim: + - ".panel" + +extra_fn_clean_exts: + - type: regex + pattern: "\\.batch[0-9]+" diff --git a/assets/nf-core-phaseimpute_logo.png b/assets/nf-core-phaseimpute_logo.png deleted file mode 100644 index 76372357..00000000 Binary files a/assets/nf-core-phaseimpute_logo.png and /dev/null differ diff --git a/assets/nf-core-phaseimpute_logo_light.png b/assets/nf-core-phaseimpute_logo_light.png new file mode 100644 index 00000000..0ee5e742 Binary files /dev/null and b/assets/nf-core-phaseimpute_logo_light.png differ diff --git a/assets/panel.csv b/assets/panel.csv new file mode 100644 index 00000000..7286169e --- /dev/null +++ b/assets/panel.csv @@ -0,0 +1,3 @@ +panel,chr,vcf,index +1000GP,chr21,1000GP_21.vcf,1000GP_21.vcf.csi +1000GP,chr22,1000GP_22.vcf,1000GP_22.vcf.csi diff --git a/assets/regionsheet.csv b/assets/regionsheet.csv new file mode 100644 index 00000000..030c9ba1 --- /dev/null +++ b/assets/regionsheet.csv @@ -0,0 +1,2 @@ +chr,start,end +20,20000000,2200000 diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv new file mode 100644 index 00000000..483cee7e --- /dev/null +++ b/assets/samplesheet.csv @@ -0,0 +1,3 @@ +sample,file,index +1_BAM_1X,/path/to/.bam,/path/to/.bai +1_BAM_SNP,/path/to/.bam,/path/to/.bai diff --git a/assets/schema_chunks.json b/assets/schema_chunks.json new file mode 100644 index 00000000..e137468b --- /dev/null +++ b/assets/schema_chunks.json @@ -0,0 +1,31 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/nf-core/phaseimpute/master/assets/schema_chunks.json", + "title": "nf-core/phaseimpute pipeline - params.chunks schema", + "description": "Schema for the file provided with params.chunks", + "type": "array", + "items": { + "type": "object", + "properties": { + "panel": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Panel name must be provided as a string and cannot contain spaces", + "meta": ["id"] + }, + "chr": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Chromosome name must be provided as a string and cannot contain spaces", + "meta": ["chr"] + }, + "file": { + "type": "string", + "pattern": "^\\S+\\.(txt|bin)$", + "errorMessage": "File with chunks per chromosome must be provided. Must have .txt or .bin extension" + } + }, + "required": ["panel", "chr", "file"], + "uniqueEntries": ["panel", "chr"] + } +} diff --git a/assets/schema_input.json b/assets/schema_input.json new file mode 100644 index 00000000..33e5dca1 --- /dev/null +++ b/assets/schema_input.json @@ -0,0 +1,35 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/nf-core/phaseimpute/master/assets/schema_input.json", + "title": "nf-core/phaseimpute pipeline - params.input", + "description": "Schema for the file provided with params.input", + "type": "array", + "items": { + "type": "object", + "properties": { + "sample": { + "type": "string", + "pattern": "^[a-zA-Z0-9_-]+$", + "errorMessage": "Sample name must be provided and cannot contain spaces nor special character '.' .", + "meta": ["id"] + }, + "tools": { + "type": "string", + "pattern": "^[a-zA-Z0-9_]+$", + "errorMessage": "Tool name cannot contain spaces nor special character '.' .", + "meta": ["tools"] + }, + "file": { + "type": "string", + "pattern": "^\\S+\\.(bam|cram)|((vcf|bcf)(\\.gz))?$", + "errorMessage": "BAM, CRAM, VCF or BCF file must be provided, cannot contain spaces and must have extension '.bam', '.cram' or '.vcf', '.bcf' with optional '.gz' extension" + }, + "index": { + "errorMessage": "Input file index must be provided, cannot contain spaces and must have extension '.bai', 'crai', '.tbi' or '.csi'", + "type": "string", + "pattern": "^\\S+\\.(bai|crai|tbi|csi)$" + } + }, + "required": ["sample", "file", "index"] + } +} diff --git a/assets/schema_input_panel.json b/assets/schema_input_panel.json new file mode 100644 index 00000000..a9f93130 --- /dev/null +++ b/assets/schema_input_panel.json @@ -0,0 +1,36 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/nf-core/phaseimpute/master/assets/schema_input_panel.json", + "title": "nf-core/phaseimpute pipeline - params.panel schema", + "description": "Schema for the file provided with params.panel", + "type": "array", + "items": { + "type": "object", + "properties": { + "panel": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Panel name must be provided as a string and cannot contain spaces", + "meta": ["id"] + }, + "chr": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Chromosome must be provided as a string and cannot contain spaces", + "meta": ["chr"] + }, + "vcf": { + "type": "string", + "pattern": "^\\S+\\.(vcf|bcf)(.gz)?$", + "errorMessage": "Panel file must be provided, cannot contain spaces and must have extension '.vcf' or '.bcf' with optional '.gz' extension" + }, + "index": { + "type": "string", + "pattern": "^\\S+\\.(vcf|bcf)(\\.gz)?\\.(tbi|csi)$", + "errorMessage": "Panel index file must be provided, cannot contain spaces and must have extension '.vcf' or '.bcf' with optional '.gz' extension and with '.csi' or '.tbi' extension" + } + }, + "required": ["panel", "chr", "vcf", "index"], + "uniqueEntries": ["panel", "chr"] + } +} diff --git a/assets/schema_input_region.json b/assets/schema_input_region.json new file mode 100644 index 00000000..81d7bda3 --- /dev/null +++ b/assets/schema_input_region.json @@ -0,0 +1,28 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/nf-core/phaseimpute/master/assets/schema_input_region.json", + "title": "nf-core/phaseimpute pipeline - params.input_region schema", + "description": "Schema for the file provided with params.input_region", + "type": "array", + "items": { + "type": "object", + "properties": { + "chr": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Chromosome name must be provided as a string and cannot contain spaces" + }, + "start": { + "type": "integer", + "pattern": "^\\d+$", + "errorMessage": "Region start name must be provided, cannot contain spaces and must be numeric" + }, + "end": { + "type": "integer", + "pattern": "^\\d+$", + "errorMessage": "Region end name must be provided, cannot contain spaces and must be numeric" + } + }, + "required": ["chr", "start", "end"] + } +} diff --git a/assets/schema_map.json b/assets/schema_map.json new file mode 100644 index 00000000..d827ebd4 --- /dev/null +++ b/assets/schema_map.json @@ -0,0 +1,24 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/nf-core/phaseimpute/master/assets/schema_map.json", + "title": "nf-core/phaseimpute pipeline - params.map schema", + "description": "Schema for the file provided with params.map", + "type": "array", + "items": { + "type": "object", + "properties": { + "chr": { + "type": "string", + "pattern": "^(chr)?[0-9]+$", + "errorMessage": "Chromosome must be provided and must be a string containing only numbers, with or without the prefix 'chr'", + "meta": ["chr"] + }, + "map": { + "type": "string", + "pattern": "^\\S+\\.(g)?map(\\.gz)?$", + "errorMessage": "Map file must be provided, cannot contain spaces and must have extension '.map' or '.gmap' with optional '.gz' extension" + } + }, + "required": ["chr", "map"] + } +} diff --git a/assets/schema_posfile.json b/assets/schema_posfile.json new file mode 100644 index 00000000..81937e4f --- /dev/null +++ b/assets/schema_posfile.json @@ -0,0 +1,46 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/nf-core/phaseimpute/master/assets/schema_posfile.json", + "title": "nf-core/phaseimpute pipeline - params.posfile schema", + "description": "Schema for the file provided with params.posfile", + "type": "array", + "items": { + "type": "object", + "properties": { + "panel": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Panel name must be provided as a string and cannot contain spaces", + "meta": ["id"] + }, + "chr": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Chromosome name must be provided as a string and cannot contain spaces", + "meta": ["chr"] + }, + "vcf": { + "type": "string", + "pattern": "^\\S+\\.((vcf)(\\.gz))?$", + "errorMessage": "VCF with sites per chromosome must be provided. Must have .vcf.gz extension" + }, + "index": { + "type": "string", + "pattern": "^\\S+\\.(vcf|bcf)(\\.gz)?\\.(tbi|csi)$", + "errorMessage": "VCF index with sites per chromosome file must be provided, cannot contain spaces and must have extension '.vcf' or '.bcf' with optional '.gz' extension and with '.csi' or '.tbi' extension" + }, + "hap": { + "type": "string", + "pattern": "^\\S+\\.(hap)(\\.gz)?$", + "errorMessage": "Hap file can be provided, cannot contain spaces and must have extension '.hap' with '.gz' extension" + }, + "legend": { + "type": "string", + "pattern": "^\\S+\\.(legend)(\\.gz)?$", + "errorMessage": "Legend file can be provided, cannot contain spaces and must have extension '.legend' with '.gz' extension" + } + }, + "required": ["panel", "chr"], + "uniqueEntries": ["panel", "chr"] + } +} diff --git a/assets/sendmail_template.txt b/assets/sendmail_template.txt index a0a61fec..4e900ed2 100644 --- a/assets/sendmail_template.txt +++ b/assets/sendmail_template.txt @@ -12,18 +12,18 @@ $email_html Content-Type: image/png;name="nf-core-phaseimpute_logo.png" Content-Transfer-Encoding: base64 Content-ID: -Content-Disposition: inline; filename="nf-core-phaseimpute_logo.png" +Content-Disposition: inline; filename="nf-core-phaseimpute_logo_light.png" -<% out << new File("$baseDir/assets/nf-core-phaseimpute_logo.png"). - bytes. - encodeBase64(). - toString(). - tokenize( '\n' )*. - toList()*. - collate( 76 )*. - collect { it.join() }. - flatten(). - join( '\n' ) %> +<% out << new File("$projectDir/assets/nf-core-phaseimpute_logo_light.png"). + bytes. + encodeBase64(). + toString(). + tokenize( '\n' )*. + toList()*. + collate( 76 )*. + collect { it.join() }. + flatten(). + join( '\n' ) %> <% if (mqcFile){ @@ -37,15 +37,15 @@ Content-ID: Content-Disposition: attachment; filename=\"${mqcFileObj.getName()}\" ${mqcFileObj. - bytes. - encodeBase64(). - toString(). - tokenize( '\n' )*. - toList()*. - collate( 76 )*. - collect { it.join() }. - flatten(). - join( '\n' )} + bytes. + encodeBase64(). + toString(). + tokenize( '\n' )*. + toList()*. + collate( 76 )*. + collect { it.join() }. + flatten(). + join( '\n' )} """ }} %> diff --git a/assets/slackreport.json b/assets/slackreport.json new file mode 100644 index 00000000..264f3bed --- /dev/null +++ b/assets/slackreport.json @@ -0,0 +1,34 @@ +{ + "attachments": [ + { + "fallback": "Plain-text summary of the attachment.", + "color": "<% if (success) { %>good<% } else { %>danger<%} %>", + "author_name": "nf-core/phaseimpute ${version} - ${runName}", + "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", + "fields": [ + { + "title": "Command used to launch the workflow", + "value": "```${commandLine}```", + "short": false + } + <% + if (!success) { %> + , + { + "title": "Full error message", + "value": "```${errorReport}```", + "short": false + }, + { + "title": "Pipeline configuration", + "value": "<% out << summary.collect{ k,v -> k == "hook_url" ? "_${k}_: (_hidden_)" : ( ( v.class.toString().contains('Path') || ( v.class.toString().contains('String') && v.contains('/') ) ) ? "_${k}_: `${v}`" : (v.class.toString().contains('DateTime') ? ("_${k}_: " + v.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM))) : "_${k}_: ${v}") ) }.join(",\n") %>", + "short": false + } + <% } + %> + ], + "footer": "Completed at <% out << dateComplete.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM)) %> (duration: ${duration})" + } + ] +} diff --git a/bin/markdown_to_html.py b/bin/markdown_to_html.py deleted file mode 100644 index 57cc4263..00000000 --- a/bin/markdown_to_html.py +++ /dev/null @@ -1,100 +0,0 @@ -#!/usr/bin/env python -from __future__ import print_function -import argparse -import markdown -import os -import sys - -def convert_markdown(in_fn): - input_md = open(in_fn, mode="r", encoding="utf-8").read() - html = markdown.markdown( - "[TOC]\n" + input_md, - extensions = [ - 'pymdownx.extra', - 'pymdownx.b64', - 'pymdownx.highlight', - 'pymdownx.emoji', - 'pymdownx.tilde', - 'toc' - ], - extension_configs = { - 'pymdownx.b64': { - 'base_path': os.path.dirname(in_fn) - }, - 'pymdownx.highlight': { - 'noclasses': True - }, - 'toc': { - 'title': 'Table of Contents' - } - } - ) - return html - -def wrap_html(contents): - header = """ - - - - - -
- """ - footer = """ -
- - - """ - return header + contents + footer - - -def parse_args(args=None): - parser = argparse.ArgumentParser() - parser.add_argument('mdfile', type=argparse.FileType('r'), nargs='?', - help='File to convert. Defaults to stdin.') - parser.add_argument('-o', '--out', type=argparse.FileType('w'), - default=sys.stdout, - help='Output file name. Defaults to stdout.') - return parser.parse_args(args) - -def main(args=None): - args = parse_args(args) - converted_md = convert_markdown(args.mdfile.name) - html = wrap_html(converted_md) - args.out.write(html) - -if __name__ == '__main__': - sys.exit(main()) diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py deleted file mode 100644 index 1692a218..00000000 --- a/bin/scrape_software_versions.py +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env python -from __future__ import print_function -from collections import OrderedDict -import re - -# TODO nf-core: Add additional regexes for new tools in process get_software_versions -regexes = { - 'nf-core/phaseimpute': ['v_pipeline.txt', r"(\S+)"], - 'Nextflow': ['v_nextflow.txt', r"(\S+)"], - 'FastQC': ['v_fastqc.txt', r"FastQC v(\S+)"], - 'MultiQC': ['v_multiqc.txt', r"multiqc, version (\S+)"], -} -results = OrderedDict() -results['nf-core/phaseimpute'] = 'N/A' -results['Nextflow'] = 'N/A' -results['FastQC'] = 'N/A' -results['MultiQC'] = 'N/A' - -# Search each file using its regex -for k, v in regexes.items(): - try: - with open(v[0]) as x: - versions = x.read() - match = re.search(v[1], versions) - if match: - results[k] = "v{}".format(match.group(1)) - except IOError: - results[k] = False - -# Remove software set to false in results -for k in list(results): - if not results[k]: - del(results[k]) - -# Dump to YAML -print (''' -id: 'software_versions' -section_name: 'nf-core/phaseimpute Software Versions' -section_href: 'https://github.com/nf-core/phaseimpute' -plot_type: 'html' -description: 'are collected at run time from the software output.' -data: | -
-''') -for k,v in results.items(): - print("
{}
{}
".format(k,v)) -print ("
") - -# Write out regexes as csv file: -with open('software_versions.csv', 'w') as f: - for k,v in results.items(): - f.write("{}\t{}\n".format(k,v)) diff --git a/conf/base.config b/conf/base.config index c8bda3c9..2e591419 100644 --- a/conf/base.config +++ b/conf/base.config @@ -1,51 +1,60 @@ /* - * ------------------------------------------------- - * nf-core/phaseimpute Nextflow base config file - * ------------------------------------------------- - * A 'blank slate' config file, appropriate for general - * use on most high performace compute environments. - * Assumes that all software is installed and available - * on the PATH. Runs in `local` mode - all jobs will be - * run on the logged in environment. - */ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + nf-core/phaseimpute Nextflow base config file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + A 'blank slate' config file, appropriate for general use on most high performance + compute environments. Assumes that all software is installed and available on + the PATH. Runs in `local` mode - all jobs will be run on the logged in environment. +---------------------------------------------------------------------------------------- +*/ process { - // TODO nf-core: Check the defaults for all processes - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 7.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { 1 * task.attempt } + memory = { 6.GB * task.attempt } + time = { 4.h * task.attempt } - errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } - maxRetries = 1 - maxErrors = '-1' + errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } + maxRetries = 1 + maxErrors = '-1' - // Process-specific resource requirements - // NOTE - Only one of the labels below are used in the fastqc process in the main script. - // If possible, it would be nice to keep the same label naming convention when - // adding in your processes. - // TODO nf-core: Customise requirements for specific processes. - // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors - withLabel:process_low { - cpus = { check_max( 2 * task.attempt, 'cpus' ) } - memory = { check_max( 14.GB * task.attempt, 'memory' ) } - time = { check_max( 6.h * task.attempt, 'time' ) } - } - withLabel:process_medium { - cpus = { check_max( 6 * task.attempt, 'cpus' ) } - memory = { check_max( 42.GB * task.attempt, 'memory' ) } - time = { check_max( 8.h * task.attempt, 'time' ) } - } - withLabel:process_high { - cpus = { check_max( 12 * task.attempt, 'cpus' ) } - memory = { check_max( 84.GB * task.attempt, 'memory' ) } - time = { check_max( 10.h * task.attempt, 'time' ) } - } - withLabel:process_long { - time = { check_max( 20.h * task.attempt, 'time' ) } - } - withName:get_software_versions { - cache = false - } - + // Process-specific resource requirements + // NOTE - Please try and re-use the labels below as much as possible. + // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. + // If possible, it would be nice to keep the same label naming convention when + // adding in your local modules too. + // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors + withLabel:process_single { + cpus = { 1 } + memory = { 6.GB * task.attempt } + time = { 4.h * task.attempt } + } + withLabel:process_low { + cpus = { 2 * task.attempt } + memory = { 12.GB * task.attempt } + time = { 4.h * task.attempt } + } + withLabel:process_medium { + cpus = { 6 * task.attempt } + memory = { 36.GB * task.attempt } + time = { 8.h * task.attempt } + } + withLabel:process_high { + cpus = { 12 * task.attempt } + memory = { 72.GB * task.attempt } + time = { 16.h * task.attempt } + } + withLabel:process_long { + time = { 20.h * task.attempt } + } + withLabel:process_high_memory { + memory = { 200.GB * task.attempt } + } + withLabel:error_ignore { + errorStrategy = 'ignore' + } + withLabel:error_retry { + errorStrategy = 'retry' + maxRetries = 2 + } } diff --git a/conf/igenomes.config b/conf/igenomes.config index 2de92422..3f114377 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -1,420 +1,440 @@ /* - * ------------------------------------------------- - * Nextflow config file for iGenomes paths - * ------------------------------------------------- - * Defines reference genomes, using iGenome paths - * Can be used by any config that customises the base - * path using $params.igenomes_base / --igenomes_base - */ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for iGenomes paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines reference genomes using iGenome paths. + Can be used by any config that customises the base path using: + $params.igenomes_base / --igenomes_base +---------------------------------------------------------------------------------------- +*/ params { - // illumina iGenomes reference file paths - genomes { - 'GRCh37' { - fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/README.txt" - mito_name = "MT" - macs_gsize = "2.7e9" - blacklist = "${baseDir}/assets/blacklists/GRCh37-blacklist.bed" + // illumina iGenomes reference file paths + genomes { + 'GRCh37' { + fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/README.txt" + mito_name = "MT" + macs_gsize = "2.7e9" + blacklist = "${projectDir}/assets/blacklists/GRCh37-blacklist.bed" + } + 'GRCh38' { + fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "2.7e9" + blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" + } + 'CHM13' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAIndex/" + bwamem2 = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAmem2Index/" + gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/CHM13/Annotation/Genes/genes.gtf" + gff = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/914/755/GCF_009914755.1_T2T-CHM13v2.0/GCF_009914755.1_T2T-CHM13v2.0_genomic.gff.gz" + mito_name = "chrM" + } + 'GRCm38' { + fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/README.txt" + mito_name = "MT" + macs_gsize = "1.87e9" + blacklist = "${projectDir}/assets/blacklists/GRCm38-blacklist.bed" + } + 'TAIR10' { + fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/README.txt" + mito_name = "Mt" + } + 'EB2' { + fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/README.txt" + } + 'UMD3.1' { + fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/README.txt" + mito_name = "MT" + } + 'WBcel235' { + fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" + mito_name = "MtDNA" + macs_gsize = "9e7" + } + 'CanFam3.1' { + fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/README.txt" + mito_name = "MT" + } + 'GRCz10' { + fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'BDGP6' { + fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" + mito_name = "M" + macs_gsize = "1.2e8" + } + 'EquCab2' { + fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/README.txt" + mito_name = "MT" + } + 'EB1' { + fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/README.txt" + } + 'Galgal4' { + fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'Gm01' { + fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/README.txt" + } + 'Mmul_1' { + fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/README.txt" + mito_name = "MT" + } + 'IRGSP-1.0' { + fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed" + mito_name = "Mt" + } + 'CHIMP2.1.4' { + fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/README.txt" + mito_name = "MT" + } + 'Rnor_5.0' { + fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'Rnor_6.0' { + fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'R64-1-1' { + fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed" + mito_name = "MT" + macs_gsize = "1.2e7" + } + 'EF2' { + fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/README.txt" + mito_name = "MT" + macs_gsize = "1.21e7" + } + 'Sbi1' { + fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/README.txt" + } + 'Sscrofa10.2' { + fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/README.txt" + mito_name = "MT" + } + 'AGPv3' { + fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" + mito_name = "Mt" + } + 'hg38' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "2.7e9" + blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" + } + 'hg19' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = "2.7e9" + blacklist = "${projectDir}/assets/blacklists/hg19-blacklist.bed" + } + 'mm10' { + fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = "1.87e9" + blacklist = "${projectDir}/assets/blacklists/mm10-blacklist.bed" + } + 'bosTau8' { + fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.bed" + mito_name = "chrM" + } + 'ce10' { + fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = "9e7" + } + 'canFam3' { + fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/README.txt" + mito_name = "chrM" + } + 'danRer10' { + fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "1.37e9" + } + 'dm6' { + fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "1.2e8" + } + 'equCab2' { + fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/README.txt" + mito_name = "chrM" + } + 'galGal4' { + fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/README.txt" + mito_name = "chrM" + } + 'panTro4' { + fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/README.txt" + mito_name = "chrM" + } + 'rn6' { + fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.bed" + mito_name = "chrM" + } + 'sacCer3' { + fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BismarkIndex/" + readme = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = "1.2e7" + } + 'susScr3' { + fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/README.txt" + mito_name = "chrM" + } } - 'GRCh38' { - fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "2.7e9" - blacklist = "${baseDir}/assets/blacklists/hg38-blacklist.bed" - } - 'GRCm38' { - fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/README.txt" - mito_name = "MT" - macs_gsize = "1.87e9" - blacklist = "${baseDir}/assets/blacklists/GRCm38-blacklist.bed" - } - 'TAIR10' { - fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/README.txt" - mito_name = "Mt" - } - 'EB2' { - fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/README.txt" - } - 'UMD3.1' { - fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/README.txt" - mito_name = "MT" - } - 'WBcel235' { - fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" - mito_name = "MtDNA" - macs_gsize = "9e7" - } - 'CanFam3.1' { - fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/README.txt" - mito_name = "MT" - } - 'GRCz10' { - fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" - mito_name = "MT" - } - 'BDGP6' { - fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" - mito_name = "M" - macs_gsize = "1.2e8" - } - 'EquCab2' { - fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/README.txt" - mito_name = "MT" - } - 'EB1' { - fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/README.txt" - } - 'Galgal4' { - fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed" - mito_name = "MT" - } - 'Gm01' { - fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/README.txt" - } - 'Mmul_1' { - fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/README.txt" - mito_name = "MT" - } - 'IRGSP-1.0' { - fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed" - mito_name = "Mt" - } - 'CHIMP2.1.4' { - fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/README.txt" - mito_name = "MT" - } - 'Rnor_6.0' { - fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed" - mito_name = "MT" - } - 'R64-1-1' { - fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed" - mito_name = "MT" - macs_gsize = "1.2e7" - } - 'EF2' { - fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/README.txt" - mito_name = "MT" - macs_gsize = "1.21e7" - } - 'Sbi1' { - fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/README.txt" - } - 'Sscrofa10.2' { - fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/README.txt" - mito_name = "MT" - } - 'AGPv3' { - fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" - mito_name = "Mt" - } - 'hg38' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "2.7e9" - blacklist = "${baseDir}/assets/blacklists/hg38-blacklist.bed" - } - 'hg19' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "2.7e9" - blacklist = "${baseDir}/assets/blacklists/hg19-blacklist.bed" - } - 'mm10' { - fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "1.87e9" - blacklist = "${baseDir}/assets/blacklists/mm10-blacklist.bed" - } - 'bosTau8' { - fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.bed" - mito_name = "chrM" - } - 'ce10' { - fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "9e7" - } - 'canFam3' { - fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/README.txt" - mito_name = "chrM" - } - 'danRer10' { - fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.bed" - mito_name = "chrM" - } - 'dm6' { - fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "1.2e8" - } - 'equCab2' { - fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/README.txt" - mito_name = "chrM" - } - 'galGal4' { - fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/README.txt" - mito_name = "chrM" - } - 'panTro4' { - fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/README.txt" - mito_name = "chrM" - } - 'rn6' { - fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.bed" - mito_name = "chrM" - } - 'sacCer3' { - fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BismarkIndex/" - readme = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "1.2e7" - } - 'susScr3' { - fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/README.txt" - mito_name = "chrM" - } - } } diff --git a/conf/igenomes_ignored.config b/conf/igenomes_ignored.config new file mode 100644 index 00000000..b4034d82 --- /dev/null +++ b/conf/igenomes_ignored.config @@ -0,0 +1,9 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for iGenomes paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Empty genomes dictionary to use when igenomes is ignored. +---------------------------------------------------------------------------------------- +*/ + +params.genomes = [:] diff --git a/conf/modules.config b/conf/modules.config new file mode 100644 index 00000000..0e75eb81 --- /dev/null +++ b/conf/modules.config @@ -0,0 +1,126 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +process { + + publishDir = [ + path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:MULTIQC' { + ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } + publishDir = [ + path: { "${params.outdir}/multiqc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + // Stats + // Simulation + // Coverage process + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:SAMTOOLS_COVERAGE_INP' { + cache = "lenient" + ext.prefix = { "${meta.id}.truth.allchr" } + publishDir = [ enabled: false ] + } + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:SAMTOOLS_COVERAGE_DWN' { + cache = "lenient" + ext.prefix = { "${meta.id}.allchr" } + publishDir = [ enabled: false ] + } + + // Filter chromosomes in coverage + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:FILTER_CHR_INP' { + ext.prefix = { "${meta.id}.truth" } + publishDir = [ enabled: false ] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:FILTER_CHR_DWN' { + ext.prefix = { "${meta.id}" } + publishDir = [ enabled: false ] + } + + // VCF + // PANEL + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BCFTOOLS_STATS_PANEL' { + ext.prefix = { "${meta.id}.panel" } + publishDir = [ + path: { "${params.outdir}/prep_panel/stats/" }, + mode: params.publish_dir_mode, + enabled: true, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + // TRUTH + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BCFTOOLS_STATS_TRUTH' { + ext.prefix = { "${meta.id}.truth" } + publishDir = [ + path: { "${params.outdir}/validation/stats" }, + mode: params.publish_dir_mode, + enabled: true, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + // IMPUTE + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BCFTOOLS_STATS_TOOLS' { + ext.prefix = { "${meta.id}.${meta.tools}" } + publishDir = [ + path: { "${params.outdir}/imputation/stats" }, + mode: params.publish_dir_mode, + enabled: true, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:LIST_TO_FILE' { + tag = { "${meta.id}" } + ext.prefix = { "${meta.id}.batch${meta.batch}" } + publishDir = [ + path: { "${params.outdir}/imputation/batch" }, + mode: params.publish_dir_mode, + enabled: true, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + // Compute sample files for renaming + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BCFTOOLS_QUERY_IMPUTED' { + tag = { "${meta.id} Batch ${meta.batch} ${meta.tools}" } + ext.args = '--list-samples' + publishDir = [enabled: false] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GAWK_IMPUTED' { + tag = { "${meta.id} Batch ${meta.batch} ${meta.tools}" } + ext.prefix = { "${meta.id}_samples"} + ext.args2 = { "-v tools=\"${meta.tools}\" " + "'BEGIN { OFS = \"\\t\" } { print \$1, \"-\", \$1\".\"tools }'" } + publishDir = [enabled: false] + } + + // Split by samples for each tool + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:SPLIT_IMPUTED:BCFTOOLS_PLUGINSPLIT' { + tag = { "${meta.id} Batch ${meta.batch} ${meta.tools}" } + ext.args = ["--output-type z", "--write-index=tbi"].join(' ') + publishDir = [ + path: { "${params.outdir}/imputation/${meta.tools}/samples/" }, + mode: params.publish_dir_mode, + enabled: true, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + +} diff --git a/conf/steps/chrcheck.config b/conf/steps/chrcheck.config new file mode 100644 index 00000000..0e48fd59 --- /dev/null +++ b/conf/steps/chrcheck.config @@ -0,0 +1,35 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +process { + // Check input + withName: 'NFCORE_PHASEIMPUTE:CHRCHECK_INPUT:.*' { + publishDir = [ enabled: false ] + tag = {"${meta.id}"} + } + + // Check input truth + withName: 'NFCORE_PHASEIMPUTE:CHRCHECK_TRUTH:.*' { + publishDir = [ enabled: false ] + tag = {"${meta.id}"} + } + + // Check panel + withName: 'NFCORE_PHASEIMPUTE:CHRCHECK_PANEL:.*' { + publishDir = [ enabled: false ] + tag = {"${meta.id} ${meta.chr}"} + } + + withName: 'NFCORE_PHASEIMPUTE:CHRCHECK_.*:VCF_CHR_RENAME_BCFTOOLS:BCFTOOLS_ANNOTATE' { + ext.args = ["-Oz", "--no-version", "--write-index=tbi"].join(' ') + } +} diff --git a/conf/steps/imputation_glimpse1.config b/conf/steps/imputation_glimpse1.config new file mode 100644 index 00000000..d28ad992 --- /dev/null +++ b/conf/steps/imputation_glimpse1.config @@ -0,0 +1,96 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +process { + // Configuration for the glimpse1 imputation subworkflow + + // Call the variants before imputation + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_GLIMPSE1:.*' { + publishDir = [ enabled: false ] + tag = {"${meta.id} ${meta.chr}"} + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_GLIMPSE1:GAWK' { + ext.args2 = "'NR>1 { split(\$1, a, \"[:-_]\"); print a[1], \$2, \$3 \",\" \$4 }'" + ext.prefix = { "${meta.id}" } + ext.suffix = "tsv" + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_GLIMPSE1:BCFTOOLS_MPILEUP' { + cache = "lenient" + ext.args = [ + "-I", + "-E", + "-a 'FORMAT/DP'" + ].join(' ') + ext.args2 = [ + "-Aim", + "-C alleles" + ].join(' ') + ext.prefix = { "${meta.id}" } + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_GLIMPSE1:BCFTOOLS_MERGE' { + cache = "lenient" + ext.args = [ + "--write-index=tbi", + ].join(' ') + ext.prefix = { "${meta.id}.merge" } + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_GLIMPSE1:BCFTOOLS_ANNOTATE' { + ext.args = ["--set-id '%CHROM:%POS:%REF:%ALT'", "-Oz", "--write-index=tbi"].join(' ') + ext.prefix = { "${meta.id}.annotate" } + } + + // Impute the variants + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:.*' { + publishDir = [ enabled: false ] + tag = {"${meta.id} ${meta.chr}"} + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:GLIMPSE_PHASE' { + ext.args = ["--impute-reference-only-variants"].join(' ') + ext.prefix = { "${meta.id}.batch${meta.batch}.${meta.chunk.replace(':','_')}.glimpse1" } + ext.suffix = "bcf" + publishDir = [ enabled: false ] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:BCFTOOLS_INDEX_1' { + ext.args = "--csi" + publishDir = [ enabled: false ] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:GLIMPSE_LIGATE' { + ext.prefix = { "${meta.id}.batch${meta.batch}.${meta.chr}.ligate.glimpse1" } + publishDir = [ enabled: false ] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:BCFTOOLS_INDEX_2' { + ext.args = "--tbi" + publishDir = [ enabled: false ] + } + + // Concatenate the imputed chunks + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_GLIMPSE1:.*' { + publishDir = [ + path: { "${params.outdir}/imputation/glimpse1/concat" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_GLIMPSE1:BCFTOOLS_CONCAT' { + ext.args = ["--ligate", "--output-type z", "--write-index=tbi"].join(' ') + ext.prefix = { "${meta.id}.batch${meta.batch}.glimpse1" } + } +} diff --git a/conf/steps/imputation_glimpse2.config b/conf/steps/imputation_glimpse2.config new file mode 100644 index 00000000..76ef02ce --- /dev/null +++ b/conf/steps/imputation_glimpse2.config @@ -0,0 +1,57 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +process { + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_IMPUTE_GLIMPSE2:.*' { + publishDir = [ enabled: false ] + tag = {"Batch ${meta.batch} ${meta.chr}"} + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_IMPUTE_GLIMPSE2:GLIMPSE2_PHASE' { + cache = "lenient" + ext.prefix = { "${meta.id}.batch${meta.batch}.${meta.chunk.replace(':','_')}.glimpse2" } + ext.args = { "--keep-monomorphic-ref-sites" } + ext.suffix = "bcf" + publishDir = [ enabled: false ] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_IMPUTE_GLIMPSE2:BCFTOOLS_INDEX_1' { + ext.args = "--csi" + publishDir = [ enabled: false ] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_IMPUTE_GLIMPSE2:GLIMPSE2_LIGATE' { + ext.prefix = { "${meta.id}.batch${meta.batch}.${meta.chr}.ligate" } + publishDir = [ enabled: false ] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_IMPUTE_GLIMPSE2:BCFTOOLS_INDEX_2' { + ext.args = "--tbi" + publishDir = [ enabled: false ] + } + + // Concatenate the imputed chunks + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_GLIMPSE2:.*' { + publishDir = [ + path: { "${params.outdir}/imputation/glimpse2/concat" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_GLIMPSE2:BCFTOOLS_CONCAT' { + ext.args = ["--ligate", "--output-type z", "--write-index=tbi"].join(' ') + ext.prefix = { "${meta.id}.batch${meta.batch}.glimpse2" } + } +} diff --git a/conf/steps/imputation_quilt.config b/conf/steps/imputation_quilt.config new file mode 100644 index 00000000..acd2d2cc --- /dev/null +++ b/conf/steps/imputation_quilt.config @@ -0,0 +1,50 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +process { + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_IMPUTE_QUILT:.*' { + publishDir = [ + path: { "${params.outdir}/imputation/quilt/" }, + mode: params.publish_dir_mode, + ] + tag = {"Batch ${meta.batch} ${meta.chr}"} + } + + // Impute quilt + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_IMPUTE_QUILT:QUILT_QUILT' { + ext.args = "--seed=${params.seed}" + ext.prefix = { "${meta.id}.batch${meta.batch}.${meta.chunk.replace(':','_')}.quilt" } + publishDir = [enabled: false] + } + + // Annotate quilt imputed VCFs + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_IMPUTE_QUILT:BCFTOOLS_ANNOTATE' { + ext.args = ["--set-id '%CHROM:%POS:%REF:%ALT'", "-Oz", "--write-index=tbi"].join(' ') + ext.prefix = { "${meta.id}.batch${meta.batch}.${meta.chunk.replace(':','_')}.quilt.annotate" } + publishDir = [ enabled: false ] + } + + // Concatenate quilt imputed VCFs + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_QUILT:.*' { + publishDir = [ + path: { "${params.outdir}/imputation/quilt/concat" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_QUILT:BCFTOOLS_CONCAT' { + ext.args = ["--ligate", "--output-type z", "--write-index=tbi"].join(' ') + ext.prefix = { "${meta.id}.batch${meta.batch}.quilt" } + } +} diff --git a/conf/steps/imputation_stitch.config b/conf/steps/imputation_stitch.config new file mode 100644 index 00000000..172d2ba2 --- /dev/null +++ b/conf/steps/imputation_stitch.config @@ -0,0 +1,50 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +process { + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_IMPUTE_STITCH:.*' { + publishDir = [enabled: false] + tag = {"Batch ${meta.batch} ${meta.chr}"} + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_IMPUTE_STITCH:GAWK' { + ext.args2 = "'BEGIN { OFS=\"\\t\" } NR>1 { split(\$1, a, \"[:-_]\"); print a[1], \$2, \$3, \$4 }'" // Remove duplicates + ext.prefix = { "${meta.id}.batch${meta.batch}.${meta.chr}.posfile.stitch" } + ext.suffix = "txt" + publishDir = [enabled: false] + tag = {"${meta.id} ${meta.chr}"} + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_IMPUTE_STITCH:STITCH' { + cache = "lenient" + ext.prefix = { "${meta.id}.batch${meta.batch}.${meta.chr}.stitch" } + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_IMPUTE_STITCH:BCFTOOLS_INDEX' { + ext.args = '--tbi' + publishDir = [enabled: false] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_STITCH:.*' { + publishDir = [enabled:false] + publishDir = [ + path: { "${params.outdir}/imputation/stitch/concat" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_STITCH:BCFTOOLS_CONCAT' { + ext.args = ["--ligate", "--output-type z", "--write-index=tbi"].join(' ') + ext.prefix = { "${meta.id}.batch${meta.batch}.stitch" } + } +} diff --git a/conf/steps/initialisation.config b/conf/steps/initialisation.config new file mode 100644 index 00000000..59d44707 --- /dev/null +++ b/conf/steps/initialisation.config @@ -0,0 +1,17 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +process { + withName: 'PIPELINE_INITIALISATION:.*' { + publishDir = [ enabled: false ] + } +} diff --git a/conf/steps/panel_prep.config b/conf/steps/panel_prep.config new file mode 100644 index 00000000..23675e9b --- /dev/null +++ b/conf/steps/panel_prep.config @@ -0,0 +1,224 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +process { + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHR_CHECK:.*' { + publishDir = [ + path: { "${params.outdir}/prep_panel/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: false + ] + tag = {"${meta.id} ${meta.chr}"} + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHR_CHECK:VCF_CHR_RENAME:BCFTOOLS_ANNOTATE' { + ext.args = ["-Oz", "--no-version", "--write-index=tbi"].join(' ') + ext.prefix = { "${meta.id}_chrrename" } + publishDir = [ enabled: false ] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHR_CHECK:VCF_CHR_RENAME:GAWK' { + ext.args2 = 'BEGIN {FS="\\t"} NR==1 { if ($1 ~ /^chr/) { col1=""; col2="chr" } else { col1="chr"; col2="" } } { sub(/^chr/, "", $1); if ($1 ~ /^[0-9]+|[XYMT]$/) print col1$1, col2$1; else print $1, $1 }' + ext.prefix = { "${meta.id}_chrrename" } + ext.suffix = "txt" + publishDir = [ enabled: false ] + } + + // Subworkflow: VCF_NORMALIZE_BCFTOOLS + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:.*' { + publishDir = [ enabled: false ] + tag = {"${meta.id} ${meta.chr}"} + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_NORM' { + ext.args = ["-m +any", "--no-version", "--output-type z", "--write-index=tbi"].join(' ') + ext.prefix = { "${meta.id}_${meta.chr}_multiallelic" } + publishDir = [ enabled: false ] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_VIEW' { + ext.args = [ + "-v snps", "-m 2", "-M 2", + params.remove_samples ? "-s^${params.remove_samples}" : '', + "--output-type z", "--write-index=tbi" + ].join(' ') + ext.prefix = { "${meta.id}_${meta.chr}_normalized" } + publishDir = [ + path: { "${params.outdir}/prep_panel/panel" }, + mode: params.publish_dir_mode, + saveAs: { filename -> + if ( !params.compute_freq && !params.phase ) { + filename.equals('versions.yml') ? null : filename + } else { + null + } + } + ] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:VCFLIB_VCFFIXUP' { + ext.prefix = { "${meta.id}_${meta.chr}_fixup" } + publishDir = [ + path: { "${params.outdir}/prep_panel/panel" }, + mode: params.publish_dir_mode, + saveAs: { filename -> + if ( !params.phase ) { + filename.equals('versions.yml') ? null : filename + } else { + null + } + } + ] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_INDEX' { + ext.args = "--tbi" + publishDir = [ + path: { "${params.outdir}/prep_panel/panel" }, + mode: params.publish_dir_mode, + saveAs: { filename -> + if ( !params.phase ) { + filename.equals('versions.yml') ? null : filename + } else { + null + } + } + ] + } + + // Subworkflow: VCF_PHASE_SHAPEIT5 + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_PHASE_SHAPEIT5:.*' { + publishDir = [ enabled: false ] + tag = {"${meta.id} ${meta.chr}"} + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_PHASE_SHAPEIT5:GLIMPSE2_CHUNK' { + ext.args = "--window-mb 4" + ext.prefix = { "${meta.id}_chunks" } + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_PHASE_SHAPEIT5:SHAPEIT5_PHASECOMMON' { + ext.prefix = { "${meta.id}_${meta.chunk.replace(':',"_")}_chunks" } + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_PHASE_SHAPEIT5:SHAPEIT5_LIGATE' { + ext.prefix = { "${meta.id}_${meta.chr}_phased" } + publishDir = [ + path: { "${params.outdir}/prep_panel/panel" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: true + ] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_PHASE_SHAPEIT5:VCF_BCFTOOLS_INDEX_2' { + ext.prefix = { "${meta.id}_${meta.chr}_phased" } + ext.args = "--csi" + publishDir = [ + path: { "${params.outdir}/prep_panel/panel" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: true + ] + } + + // Subworkflow: VCF_SITES_EXTRACT_BCFTOOLS + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SITES_EXTRACT_BCFTOOLS:.*' { + publishDir = [ enabled: false ] + tag = {"${meta.id} ${meta.chr}"} + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SITES_EXTRACT_BCFTOOLS:BCFTOOLS_CONVERT' { + ext.args = {"--haplegendsample ${meta.id}_${meta.chr}"} + publishDir = [ + path: { "${params.outdir}/prep_panel/haplegend/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: true + ] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SITES_EXTRACT_BCFTOOLS:BCFTOOLS_VIEW' { + ext.args = [ + "-G", + "-m 2", + "-M 2", + "-v snps", + "--output-type z", + "--write-index=tbi", + "--no-version" + ].join(' ') + ext.prefix = { "${meta.id}_${meta.chr}_glimpse1_sites" } + publishDir = [ + path: { "${params.outdir}/prep_panel/sites/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: true + ] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SITES_EXTRACT_BCFTOOLS:BCFTOOLS_INDEX' { + ext.prefix = { "${meta.id}_${meta.chr}_glimpse1_sites" } + publishDir = [ + path: { "${params.outdir}/prep_panel/sites/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: true + ] + } + + + // Subworkflow: Concat phased panel + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_PANEL:.*' { + publishDir = [ enabled: false ] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_PANEL:BCFTOOLS_CONCAT' { + ext.args = ["--ligate", "--output-type z", "--write-index=tbi"].join(' ') + ext.prefix = { "${meta.id}.panel" } + } + + // Subworkflow: Make chunks + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHUNK_GLIMPSE:.*' { + publishDir = [ + path: { "${params.outdir}/prep_panel/chunks/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: true + ] + tag = {"${meta.id} ${meta.chr}"} + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHUNK_GLIMPSE:GLIMPSE_CHUNK' { + ext.args = "--window-size 4000000" + ext.prefix = { "${meta.id}_${meta.chr}_chunks_glimpse1" } + publishDir = [ + path: { "${params.outdir}/prep_panel/chunks/glimpse1/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: true + ] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHUNK_GLIMPSE:GLIMPSE2_CHUNK' { + ext.args = "--window-mb 4" + ext.prefix = { "${meta.id}_${meta.chr}_chunks_glimpse2" } + publishDir = [ + path: { "${params.outdir}/prep_panel/chunks/glimpse2/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: true + ] + } + +} diff --git a/conf/steps/simulation.config b/conf/steps/simulation.config new file mode 100644 index 00000000..17035abb --- /dev/null +++ b/conf/steps/simulation.config @@ -0,0 +1,64 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +process { + // Optional subworkflow to extract regions + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_EXTRACT_REGION_SAMTOOLS:.*' { + publishDir = [ enabled: false ] + tag = {"${meta.id} ${meta.chr}"} + } + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_EXTRACT_REGION_SAMTOOLS:SAMTOOLS_VIEW' { + label = 'process_medium' + ext.args = ["--output-fmt bam", "--write-index"].join(' ') + ext.prefix = { "${meta.id}_R${meta.region.replace(':','_')}" } + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_EXTRACT_REGION_SAMTOOLS:SAMTOOLS_MERGE' { + cache = "lenient" + ext.prefix = { "${meta.id}" } + tag = {"${meta.id} ${meta.chr}"} + } + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_EXTRACT_REGION_SAMTOOLS:SAMTOOLS_INDEX' { + ext.args = "" + } + + // Downsampling subworkflow + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE_SAMTOOLS:.*' { + tag = {"${meta.id} ${meta.chr}"} + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE_SAMTOOLS:SAMTOOLS_DEPTH' { + publishDir = [enabled: false] + ext.prefix = { "${meta1.id}_C${meta1.chr ?: "all"}.depth" } + tag = {"${meta1.id} ${meta1.chr ?: "all"}"} + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE_SAMTOOLS:GAWK' { + publishDir = [enabled: false] + ext.args2 = "'{ total += \$3 } END { print total/NR }'" + ext.prefix = { "${meta.id}_C${meta.chr ?: "all"}.depth.mean" } + ext.suffix = "tsv" + tag = {"${meta.id} ${meta.chr ?: "all"}"} + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE_SAMTOOLS:SAMTOOLS_VIEW' { + cache = "lenient" + ext.args = ["--output-fmt bam", "--write-index"].join(' ') + ext.prefix = { "${meta.id}.depth_${meta.depth}x" } + publishDir = [ + path: { "${params.outdir}/simulation/samples" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + tag = {"${meta.id} ${meta.chr}"} + } +} diff --git a/conf/steps/validation.config b/conf/steps/validation.config new file mode 100644 index 00000000..58103506 --- /dev/null +++ b/conf/steps/validation.config @@ -0,0 +1,138 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +process { + // Configuration for the validation step + + // Compute genotype likelihoods for the truth set + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_TRUTH:.*' { + publishDir = [ enabled: false ] + tag = {"${meta.id} ${meta.chr}"} + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_TRUTH:GAWK' { + ext.args2 = "'NR>1 { split(\$1, a, \"[:-_]\"); print a[1], \$2, \$3 \",\" \$4 }'" + ext.prefix = { "${meta.id}" } + ext.suffix = "tsv" + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_TRUTH:BCFTOOLS_MPILEUP' { + label = 'process_high' + cache = "lenient" + ext.args = [ + "-I", + "-E", + "-a 'FORMAT/DP'" + ].join(' ') + ext.args2 = [ + "-Aim", + "-C alleles" + ].join(' ') + ext.prefix = { "${meta.id}.truth.call" } + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_TRUTH:BCFTOOLS_MERGE' { + ext.args = [ + "--write-index=tbi", + ].join(' ') + ext.prefix = { "${meta.id}" } + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_TRUTH:BCFTOOLS_ANNOTATE' { + ext.args = ["--set-id '%CHROM:%POS:%REF:%ALT'", "-Oz", "--write-index=tbi"].join(' ') + ext.prefix = { "${meta.id}_${meta.chr}.annotate" } + } + + // Concatenate the truth set + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_TRUTH:.*' { + ext.prefix = { "${meta.id}.batch${meta.batch}.truth" } + publishDir = [ + path: { "${params.outdir}/validation/concat" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_TRUTH:BCFTOOLS_CONCAT' { + ext.args = ["--ligate", "--output-type z", "--write-index=tbi"].join(' ') + } + + // Compute sample files for renaming + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BCFTOOLS_QUERY_TRUTH' { + tag = { "${meta.id} Batch ${meta.batch} ${meta.tools}" } + ext.args = '--list-samples' + publishDir = [enabled: false] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GAWK_TRUTH' { + tag = { "${meta.id} Batch ${meta.batch} ${meta.tools}" } + ext.prefix = { "${meta.id}_samples"} + ext.args2 = "'BEGIN { OFS = \"\\t\" } { print \$1, \"-\", \$1\".truth\" }'" + publishDir = [enabled: false] + } + + // Split by samples + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:SPLIT_TRUTH:BCFTOOLS_PLUGINSPLIT' { + ext.args = ["--output-type z", "--write-index=tbi"].join(' ') + publishDir = [ + path: { "${params.outdir}/validation/samples" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename} + ] + } + + // Validation subworkflow + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCORDANCE_GLIMPSE2:.*' { + publishDir = [ + path: { "${params.outdir}/validation/stats/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + tag = {"${meta.id} ${meta.panel} ${meta.tools}"} + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCORDANCE_GLIMPSE2:GLIMPSE2_CONCORDANCE' { + ext.prefix = { "${meta.id}${meta.panel ? '_P' + meta.panel : ''}${meta.tools ? '_T' + meta.tools : ''}.concordance" } + publishDir = [ enabled: false ] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCORDANCE_GLIMPSE2:GAWK_ERROR_SPL' { + tag = { "${meta.id} Batch ${meta.batch} ${meta.tools}" } + ext.prefix = { "${meta.id}${meta.panel ? '_P' + meta.panel : ''}${meta.tools ? '_T' + meta.tools : ''}.concordance.renamed.error.spl" } + ext.suffix = "txt.gz" + ext.args2 = { "-v tool=\"${meta.tools}\" " + "'BEGIN { OFS = \" \" } !/^#/ { \$3 = \$3\".\"tool } { print }'" } + publishDir = [enabled: false] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCORDANCE_GLIMPSE2:GAWK_RSQUARE_SPL' { + tag = { "${meta.id} Batch ${meta.batch} ${meta.tools}" } + ext.prefix = { "${meta.id}${meta.panel ? '_P' + meta.panel : ''}${meta.tools ? '_T' + meta.tools : ''}.concordance.renamed.rsquare.spl" } + ext.suffix = "txt.gz" + ext.args2 = { "-v tool=\"${meta.tools}\" " + "'BEGIN { OFS = \" \" } !/^#/ { \$1 = \$1\".\"tool } { print }'" } + publishDir = [enabled: false] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCORDANCE_GLIMPSE2:GUNZIP' { + ext.prefix = { "${meta.id}${meta.panel ? '_P' + meta.panel : ''}${meta.tools ? '_T' + meta.tools : ''}" } + publishDir = [ enabled: false ] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCORDANCE_GLIMPSE2:ADD_COLUMNS' { + ext.prefix = { "${meta.id}${meta.panel ? '_P' + meta.panel : ''}${meta.tools ? '_T' + meta.tools : ''}_SNP" } + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCORDANCE_GLIMPSE2:GAWK' { + ext.args2 = "'(NR == 1) || (FNR > 1)'" // Skip header line + ext.suffix = { "txt" } + tag = {"${meta.id}"} + } +} diff --git a/conf/test.config b/conf/test.config index bbfbd1c9..6774be7b 100644 --- a/conf/test.config +++ b/conf/test.config @@ -1,26 +1,45 @@ /* - * ------------------------------------------------- - * Nextflow config file for running tests - * ------------------------------------------------- - * Defines bundled input files and everything required - * to run a fast and simple test. Use as follows: - * nextflow run nf-core/phaseimpute -profile test, - */ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/phaseimpute -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 4, + memory: '4.GB', + time: '1.h' + ] +} params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = 6.GB - max_time = 48.h - - // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed - single_end = false - readPaths = [ - ['Testdata', ['https://github.com/nf-core/test-datasets/raw/exoseq/testdata/Testdata_R1.tiny.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/exoseq/testdata/Testdata_R2.tiny.fastq.gz']], - ['SRR389222', ['https://github.com/nf-core/test-datasets/raw/methylseq/testdata/SRR389222_sub1.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/methylseq/testdata/SRR389222_sub2.fastq.gz']] - ] + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Input data + input = "${projectDir}/tests/csv/sample_bam.csv" + input_region = "${projectDir}/tests/csv/region.csv" + + // Genome references + fasta = params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz" + fasta_fai = params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz.fai" + panel = "${projectDir}/tests/csv/panel.csv" + + // Pipeline steps + steps = "impute" + + // External params + chunks = "${projectDir}/tests/csv/chunks.csv" + posfile = "${projectDir}/tests/csv/posfile_legend.csv" + + // Impute tools + tools = "glimpse1" } + + diff --git a/conf/test_all.config b/conf/test_all.config new file mode 100644 index 00000000..7f1e2200 --- /dev/null +++ b/conf/test_all.config @@ -0,0 +1,60 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/phaseimpute -profile test_all, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 4, + memory: '4.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'Test simulation / imputation / validation mode' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Input data + input = "${projectDir}/tests/csv/sample_sim.csv" + input_region = "${projectDir}/tests/csv/region.csv" + depth = 1 + + // Genome references + fasta = params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz" + + // Panel preparation + panel = "${projectDir}/tests/csv/panel.csv" + phase = true + normalize = true + compute_freq = true + chunk_model = "recursive" + remove_samples = "NA12878,NA12891,NA12892" + + // Pipeline steps + steps = "all" + + // Impute tools + tools = "glimpse1,glimpse2,stitch,quilt" +} + +process { + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHUNK_GLIMPSE:GLIMPSE_CHUNK' { + ext.args = ["--window-size 10000", "--window-count 400", "--buffer-size 5000", "--buffer-count 30"].join(' ') + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHUNK_GLIMPSE:GLIMPSE2_CHUNK' { + ext.args = ["--window-mb 0.01", "--window-cm 0.01", "--window-count 200", "--buffer-mb 0.005", "--buffer-cm 0.005", "--buffer-count 30"].join(' ') + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_PHASE_SHAPEIT5:GLIMPSE2_CHUNK' { + ext.args = ["--window-mb 0.01", "--window-cm 0.01", "--window-count 200", "--buffer-mb 0.005", "--buffer-cm 0.005", "--buffer-count 30"].join(' ') + } +} diff --git a/conf/test_all_fullchr.config b/conf/test_all_fullchr.config new file mode 100644 index 00000000..7765bc30 --- /dev/null +++ b/conf/test_all_fullchr.config @@ -0,0 +1,40 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/phaseimpute -profile test_all_fullchr, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 24, + memory: '50.GB', + time: '4.h' + ] +} + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check all steps on full chromosomes' + + // Genome references + fasta = params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz" + panel = "${projectDir}/tests/csv/panel_fullchr.csv" + input = "${projectDir}/tests/csv/sample_sim_full.csv" + + // Pipeline steps + steps = "all" + tools = "glimpse1,glimpse2,quilt,stitch" + depth = 1 + + // Panelprep optional args + remove_samples = "NA12878,NA19401,NA20359,NA12891,NA12892,NA20362" + normalize = true + compute_freq = false + phase = false +} diff --git a/conf/test_batch.config b/conf/test_batch.config new file mode 100644 index 00000000..3c823680 --- /dev/null +++ b/conf/test_batch.config @@ -0,0 +1,44 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/phaseimpute -profile test_glimpse2, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 4, + memory: '4.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'Test batch profile' + config_profile_description = 'Minimal test dataset to check batch effect with glimpse2 and quilt' + + // Input data + input = "${projectDir}/tests/csv/sample_bam.csv" + input_truth = "${projectDir}/tests/csv/sample_validate_truth.csv" + input_region = "${projectDir}/tests/csv/region.csv" + + // Genome references + fasta = params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz" + panel = "${projectDir}/tests/csv/panel.csv" + + // Pipeline steps + steps = "impute,validate" + + // External params + chunks = "${projectDir}/tests/csv/chunks.csv" + posfile = "${projectDir}/tests/csv/posfile.csv" + + // Imputation arguments + tools = "glimpse2,quilt" + batch_size = 2 +} diff --git a/conf/test_dog.config b/conf/test_dog.config new file mode 100644 index 00000000..f7a86f98 --- /dev/null +++ b/conf/test_dog.config @@ -0,0 +1,57 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/phaseimpute -profile test_dog_panelprep, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 4, + memory: '4.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check panel prepation steps in dog genome' + + // Genome references + fasta = params.pipelines_testdata_base_path + "dog_data/reference_genome/canFam3.s.fa.gz" + fasta_fai = params.pipelines_testdata_base_path + "dog_data/reference_genome/canFam3.s.fa.gz.fai" + panel = "${projectDir}/tests/csv/panel_dog.csv" + + // Panelprep optional args + phase = true + normalize = false + compute_freq = false + rename_chr = true + chunk_model = "recursive" + + // Input data + input = params.pipelines_testdata_base_path + "dog_data/csv/sample_dog.csv" + + // Pipeline steps + steps = "panelprep,impute" + tools = "glimpse1,glimpse2,quilt" +} + +process { + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHUNK_GLIMPSE:GLIMPSE_CHUNK' { + ext.args = ["--window-size 10000", "--window-count 400", "--buffer-size 5000", "--buffer-count 30"].join(' ') + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHUNK_GLIMPSE:GLIMPSE2_CHUNK' { + ext.args = ["--window-mb 0.01", "--window-cm 0.01", "--window-count 200", "--buffer-mb 0.005", "--buffer-cm 0.005", "--buffer-count 30"].join(' ') + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_PHASE_SHAPEIT5:GLIMPSE2_CHUNK' { + ext.args = ["--window-mb 0.01", "--window-cm 0.01", "--window-count 200", "--buffer-mb 0.005", "--buffer-cm 0.005", "--buffer-count 30"].join(' ') + } +} diff --git a/conf/test_full.config b/conf/test_full.config new file mode 100644 index 00000000..94e04dd9 --- /dev/null +++ b/conf/test_full.config @@ -0,0 +1,57 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running full-size tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a full size pipeline test. + + Use as follows: + nextflow run nf-core/phaseimpute -profile test_full, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 12, + memory: '50.GB', + time: '6.h' + ] +} + +params { + config_profile_name = 'Full test profile' + config_profile_description = 'Full test dataset to check pipeline function' + + // Genome references + //map = "https://bochet.gcc.biostat.washington.edu/beagle/genetic_maps/plink.GRCh38.map.zip" + genome = "GRCh38" + + // Input data + input = "${projectDir}/tests/csv/sample_sim_full.csv" + input_truth = "${projectDir}/tests/csv/sample_sim_full_truth.csv" + panel = "${projectDir}/tests/csv/panel_full.csv" + + // Pipeline steps + steps = "all" + + // Panelprep optional args + remove_samples = "NA12878,NA12891,NA12892" + normalize = true + compute_freq = false + phase = false + + // Impute tools + tools = "glimpse2" + + // Concordance arguments + min_val_gl = null + min_val_dp = null +} + +process { + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCORDANCE_GLIMPSE2:GLIMPSE2_CONCORDANCE' { + ext.args = "--gt-val --af-tag AF" + ext.prefix = { "${meta.id}${meta.panel ? '_P' + meta.panel : ''}${meta.tools ? '_T' + meta.tools : ''}.concordance" } + publishDir = [ enabled: false ] + } +} diff --git a/conf/test_glimpse2.config b/conf/test_glimpse2.config new file mode 100644 index 00000000..11562d33 --- /dev/null +++ b/conf/test_glimpse2.config @@ -0,0 +1,40 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/phaseimpute -profile test_glimpse2, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 4, + memory: '4.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function with GLIMPSE2' + + // Input data + input = "${projectDir}/tests/csv/sample_bam.csv" + + // Genome references + fasta = params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz" + panel = "${projectDir}/tests/csv/panel.csv" + + // Pipeline steps + steps = "impute" + + // External params + chunks = "${projectDir}/tests/csv/chunks.csv" + + // Impute tools + tools = "glimpse2" +} diff --git a/conf/test_panelprep.config b/conf/test_panelprep.config new file mode 100644 index 00000000..d85cbc90 --- /dev/null +++ b/conf/test_panelprep.config @@ -0,0 +1,53 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/phaseimpute -profile test_panelprep, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 4, + memory: '4.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check panel prepation steps' + + // Genome references + fasta = params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz" + input_region = "${projectDir}/tests/csv/region.csv" + panel = "${projectDir}/tests/csv/panel.csv" + + // Panelprep optional args + phase = true + normalize = true + compute_freq = true + remove_samples = "HG00096,HG00097,HG00099,HG00100" + chunk_model = "recursive" + + // Pipeline steps + steps = "panelprep" +} + +process { + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHUNK_GLIMPSE:GLIMPSE_CHUNK' { + ext.args = ["--window-size 10000", "--window-count 400", "--buffer-size 5000", "--buffer-count 30"].join(' ') + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHUNK_GLIMPSE:GLIMPSE2_CHUNK' { + ext.args = ["--window-mb 0.01", "--window-cm 0.01", "--window-count 200", "--buffer-mb 0.005", "--buffer-cm 0.005", "--buffer-count 30"].join(' ') + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_PHASE_SHAPEIT5:GLIMPSE2_CHUNK' { + ext.args = ["--window-mb 0.01", "--window-cm 0.01", "--window-count 200", "--buffer-mb 0.005", "--buffer-cm 0.005", "--buffer-count 30"].join(' ') + } +} diff --git a/conf/test_quilt.config b/conf/test_quilt.config new file mode 100644 index 00000000..1f305de3 --- /dev/null +++ b/conf/test_quilt.config @@ -0,0 +1,41 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/phaseimpute -profile test_quilt, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 4, + memory: '4.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'Minimal Quilt Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function using the tool QUILT' + + // Input data + input = "${projectDir}/tests/csv/sample_bam.csv" + input_region = "${projectDir}/tests/csv/region.csv" + + // Genome references + fasta = params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz" + + // Pipeline steps + steps = "impute" + + // External params + chunks = "${projectDir}/tests/csv/chunks.csv" + posfile = "${projectDir}/tests/csv/posfile_hap_legend.csv" + + // Impute tools + tools = "quilt" +} diff --git a/conf/test_sim.config b/conf/test_sim.config new file mode 100644 index 00000000..e05362ea --- /dev/null +++ b/conf/test_sim.config @@ -0,0 +1,35 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/phaseimpute -profile test_sim, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 4, + memory: '4.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'Test simulation / imputation / validation mode' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Input data + input = "${projectDir}/tests/csv/sample_sim.csv" + input_region = "${projectDir}/tests/csv/region.csv" + depth = 1 + + // Genome references + fasta = params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz" + + // Pipeline steps + steps = "simulate" +} diff --git a/conf/test_stitch.config b/conf/test_stitch.config new file mode 100644 index 00000000..8699253c --- /dev/null +++ b/conf/test_stitch.config @@ -0,0 +1,38 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/phaseimpute -profile test_stitch, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 4, + memory: '4.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'Minimal Stitch Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function using the tool STITCH' + + // Input data + input = "${projectDir}/tests/csv/sample_bam.csv" + input_region = "${projectDir}/tests/csv/region.csv" + + // Genome references + fasta = params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz" + posfile = "${projectDir}/tests/csv/posfile_legend.csv" + + // Pipeline steps + steps = "impute" + + // Impute tools + tools = "stitch" +} diff --git a/conf/test_validate.config b/conf/test_validate.config new file mode 100644 index 00000000..29d703da --- /dev/null +++ b/conf/test_validate.config @@ -0,0 +1,37 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/phaseimpute -profile test_validate, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 4, + memory: '4.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'Test validation mode' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Input data + input = "${projectDir}/tests/csv/sample_validate_imputed.csv" + input_truth = "${projectDir}/tests/csv/sample_validate_truth.csv" + input_region = "${projectDir}/tests/csv/region.csv" + + // Genome references + fasta = params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz" + posfile = "${projectDir}/tests/csv/posfile_vcf_index.csv" + map = "${projectDir}/tests/csv/map.csv" + + // Pipeline steps + steps = "validate" +} diff --git a/docs/README.md b/docs/README.md index c5e29a34..b04a3490 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,12 +1,10 @@ # nf-core/phaseimpute: Documentation -The nf-core/phaseimpute documentation is split into the following files: +The nf-core/phaseimpute documentation is split into the following pages: -1. [Installation](https://nf-co.re/usage/installation) -2. Pipeline configuration - * [Local installation](https://nf-co.re/usage/local_installation) - * [Adding your own system config](https://nf-co.re/usage/adding_own_config) - * [Reference genomes](https://nf-co.re/usage/reference_genomes) -3. [Running the pipeline](usage.md) -4. [Output and how to interpret the results](output.md) -5. [Troubleshooting](https://nf-co.re/usage/troubleshooting) +- [Usage](usage.md) + - An overview of how the pipeline works, how to run it and a description of all of the different command-line flags. +- [Output](output.md) + - An overview of the different results produced by the pipeline and how to interpret them. + +You can find a lot more documentation about installing, configuring and running nf-core pipelines on the website: [https://nf-co.re](https://nf-co.re) diff --git a/docs/development.md b/docs/development.md new file mode 100644 index 00000000..2173cb6f --- /dev/null +++ b/docs/development.md @@ -0,0 +1,26 @@ +# Tips for development + +## Channel management and combination + +All channels need to be identified by a meta map. To follow which information is available, the `meta` argument +is suffixed with a combination of the following capital letters: + +- I : individual id +- P : panel id +- R : region used +- M : map used +- T : tool used +- G : reference genome used (is it needed ?) +- S : simulation (depth or genotype array) + +Therefore, the following channel operation example includes a meta map containing the panel id with the region and tool used: + +```nextflow +ch_panel_for_impute.map { + metaPRT, vcf, index -> ... +} +``` + +## Release names + +The names of releases are composed of a color and a dog breed. diff --git a/docs/images/InputSoftware_compatibility.drawio b/docs/images/InputSoftware_compatibility.drawio new file mode 100644 index 00000000..9920a06b --- /dev/null +++ b/docs/images/InputSoftware_compatibility.drawio @@ -0,0 +1 @@ +7Vzdd6I4FP9rPGf3oXP4jPjY2s7H7nQ7ne52p/syJ0JUpkBYiK3OXz9BgkACgoqAM9qHwk0i5H7+cnPjQB27y3cB9Oe32ELOQJGs5UC9HiiKokoK/RdRVjFF1oEUU2aBbTFaSniwvyNGTLotbAuFuY4EY4fYfp5oYs9DJsnRYBDg13y3KXbyT/XhDAmEBxM6IvVf2yLzmGoow5T+HtmzefJkGYziFhcmndlMwjm08GuGpN4M1HGAMYmv3OUYORH3Er7E496WtG5eLEAeqTPgq/GH8/nmm/3yvHr48+6fx/+lx7cXIP6WF+gs2ITZy5JVwgFkUYawWxyQOZ5hDzo3KfUqwAvPQtFjJHqX9vmIsU+JMiV+Q4SsmHThgmBKmhPXYa3iVNjsQrwITLTl/ROVgMEMkS392LSiuWQewBj1DmEXkWBFOwTIgcR+yQsfMh2abfqlbKYXjNM7cF0WuP7B8xdkoACHTuJqEtCrWXR1dXlL+2F6Lz2O3wqiyTP+dW4T9ODDNcNeqT3mmTy1HWeMHfpl0Vh1OkXANCk9JAF+RpkWaziaSNI2sbyggKDlVkayVjUxd2b/Brt9TW1JTgxknrEjTToS64cC69c8bl/n0dImX9jw6Popun6js7vrZabpepXceJQFX9KO0e1Tti0dtr5Lxu1vX0pN+9J7ZV+JF8tI+c5DJUbUoqzlfWQt5ySdCr4zWctaTWEzH3AhvWFWV1v66++6DAK4ynTwse2RMPOoTxEhfY6u5H2NInEBsaK/nu9PL+I34EYnr4On05DyidfVzfwPUF+tEyVtWuGqFWlYU5FW+aCwmxpVyl2VuGATWwoblYr3UH3Ujb7ol+Ad/175kXscqAUYY47dySLcA19MlRJ8ASZABw3hCyPPYxmIAMMowBfGsfCFPCoJPVeQmPNOgcYm4DxlWvoCNLS6PgPsHHxk+mnEccicQWu844jf/mDHIXNKrRrbAxn/XmpfHA0Q0fYt9CJWTGJr4Nc7v11crFu+hlSbfz9by8HW0jguL9bATWjumWUooHvLuIev6vNH98P75//A44V9//nFNZIQ3BHC20tPOR06BPI1rn8bCMfJuWlIp2o76FNTXlQTnOiMqoIfDfNSV8qpUziHfnRJhQQdBzl4FkCXytJHgU1fCAV826e0oRrpLVGSqi1AfpaODEsrQn6GMlFBQ8hPCHqKiPzaTS0Zgpz+uus0hPU6s6TX9Bh1kw0tZW4lQchPNw9dSnmvnFLvpDzqlZR1QcjvHNv1QySf5CKZR0EbF9jZIrkb+HMS0H5UF0nJvTIZMe1x69sOimBKf5AJkik2GRaZ2wgMVdhUTopfdHSOTBT1bG9VdtTctkfjBle4ghT3kLdvcX2EE+TkBQQde+bRa5MybW1dkY7bJnQuWYNrW1asCyi0v8PJ+vsidrN1Ev1y/WqgX28EIBjJpjCDDR5syiGystqitNsSewrI2VkzSb4LbsTxsmOyKMJbRJXw7DE36aPOPKZ+RigtOsJiS9S4FT7g8WhDSUX+OXpFUrGi/3GSQLKYBUrXJGX+IsAmCmusSibQfJ6ttfduQRzbK0vsQGRMC1crwDTQZHokZ9B5zZAuCSw+O4OWdxjacgb8WhlouzkDrv+RnIGYoLhf2A75yTyBIvXNE2ijsycoVcq6O/Mn4gn4DYZ+egKxgPuB2Fs3hE7SFfQOE8hiaQPDY+LK4bRZr3B77npBiVXLeEw/e+Ey3iQ5kcp0lio17a4PS1GKjoydiZBIsCAFtUInfzYC9O1shHI+HLGTrSUmVG1rdQudD61s5ly1oCol0KixwmKxBOJ0z120rkZ9O09h7Hiegu+vV/QfSlv7716cV/mEukuFpuwhkezPUGkP+lZpr4IzCqwKONWRqV+FN2pBwfivWkag966MQBf3IHoR3ntui3VNLA3vxlDfJ8A3Fv92RQqg68jfWMAuKGP6Nbfkee/Tgy354dnHlPFGM2rGe73xJP1h+ynignFtb1Y00PUXZH31Yk5FsNz79A6/jz3sPm9qnIoF7W8Jutq0gpekWvjkXcuplmSe2aVlnCeV3MSCTtNugNY7uxFz02PsRpMsxwUnudWjSyVK3RnrgVjd1FOX1XjQrz4ZZbTj6viN7aHCSbvkcGVjp8jFLNrPrhTIsy6jH6+jtx6OfMIhdRltbT7wlVM1f1alswIP/jBvLws8wK97dKN6MdMS1uPTFa07QDHvNsaeCaNXnETzM+cBdnFIHxSWI5L2MxVTPfqL6NgjGXr8KQQz689x1l+g8/11IJZtnuVYKUd+PXBMOdLb9AdZY/NNf9dWvfkB diff --git a/docs/images/InputSoftware_compatibility.png b/docs/images/InputSoftware_compatibility.png new file mode 100644 index 00000000..68930a9d Binary files /dev/null and b/docs/images/InputSoftware_compatibility.png differ diff --git a/docs/images/logo/nf-core-phaseimpute.svg b/docs/images/logo/nf-core-phaseimpute.svg new file mode 100644 index 00000000..759906ed --- /dev/null +++ b/docs/images/logo/nf-core-phaseimpute.svg @@ -0,0 +1,605 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/logo/nf-core-phaseimpute_hexagonal_logo.png b/docs/images/logo/nf-core-phaseimpute_hexagonal_logo.png new file mode 100644 index 00000000..842d9cf0 Binary files /dev/null and b/docs/images/logo/nf-core-phaseimpute_hexagonal_logo.png differ diff --git a/docs/images/logo/nf-core-phaseimpute_logo_dark.png b/docs/images/logo/nf-core-phaseimpute_logo_dark.png new file mode 100644 index 00000000..a85d7440 Binary files /dev/null and b/docs/images/logo/nf-core-phaseimpute_logo_dark.png differ diff --git a/docs/images/logo/nf-core-phaseimpute_logo_light.png b/docs/images/logo/nf-core-phaseimpute_logo_light.png new file mode 100644 index 00000000..12729a9d Binary files /dev/null and b/docs/images/logo/nf-core-phaseimpute_logo_light.png differ diff --git a/docs/images/logo/nf-core-phaseimpute_logo_only_dark.png b/docs/images/logo/nf-core-phaseimpute_logo_only_dark.png new file mode 100644 index 00000000..cf95dfd1 Binary files /dev/null and b/docs/images/logo/nf-core-phaseimpute_logo_only_dark.png differ diff --git a/docs/images/logo/nf-core-phaseimpute_logo_only_light.png b/docs/images/logo/nf-core-phaseimpute_logo_only_light.png new file mode 100644 index 00000000..1abda022 Binary files /dev/null and b/docs/images/logo/nf-core-phaseimpute_logo_only_light.png differ diff --git a/docs/images/metro/Impute.png b/docs/images/metro/Impute.png new file mode 100644 index 00000000..aa8cb984 Binary files /dev/null and b/docs/images/metro/Impute.png differ diff --git a/docs/images/metro/MetroMap.png b/docs/images/metro/MetroMap.png new file mode 100644 index 00000000..00ab68aa Binary files /dev/null and b/docs/images/metro/MetroMap.png differ diff --git a/docs/images/metro/MetroMap.svg b/docs/images/metro/MetroMap.svg new file mode 100644 index 00000000..1812bd71 --- /dev/null +++ b/docs/images/metro/MetroMap.svg @@ -0,0 +1,2223 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Steps + Processes + + + + panelprep + + + + + impute + + + + + simulate + + + + + validate + + + Optional + + + + Mandatory + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SampleLow-pass + Chunks + Panel + Sites + NormalizePanel + Panel + SampleHigh-depth + Sampletruth + SampleImputed + Statistics + Phase + Chunkcontigs + Extractsites + Imputewith tool + Statistics + ComputeGL + Concordance + Downsampleto depth + ExtractRegion + 1 + 2 + 3 + 4 + + + + + + + txt + + + + + + tsv + + + + + + + fastq + + + + + + bam + + + + + + + + fastq + + + + + + bam + + + + + + + + fastq + + + + + + fastq + + + + + + vcf + + + + + + + + fastq + + + + + + fastq + + + + + + vcf + + + + + + + + fastq + + + + + + fastq + + + + + + vcf + + + + + + + + fastq + + + + + + fastq + + + + + + vcf/bam + + + + + + + csv + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/metro/MetroMap.xml b/docs/images/metro/MetroMap.xml new file mode 100644 index 00000000..bc2e8b71 --- /dev/null +++ b/docs/images/metro/MetroMap.xml @@ -0,0 +1,7283 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/metro/MetroMap_animated.svg b/docs/images/metro/MetroMap_animated.svg new file mode 100644 index 00000000..0508b3db --- /dev/null +++ b/docs/images/metro/MetroMap_animated.svg @@ -0,0 +1,2467 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Steps + Processes + + + + panelprep + + + + + simulate + + + + + validate + + + Optional + + + + Mandatory + + + + + impute + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SampleLow-pass + Chunks + Panel + Sites + NormalizePanel + Panel + SampleHigh-depth + Sampletruth + SampleImputed + Statistics + Phase + Chunkcontigs + Extractsites + Imputewith tool + Statistics + ComputeGL + Concordance + Downsampleto depth + ExtractRegion + 1 + 2 + 3 + 4 + + + + + + + txt + + + + + + tsv + + + + + + + fastq + + + + + + bam + + + + + + + + fastq + + + + + + bam + + + + + + + + fastq + + + + + + fastq + + + + + + vcf + + + + + + + + fastq + + + + + + fastq + + + + + + vcf + + + + + + + + fastq + + + + + + fastq + + + + + + vcf + + + + + + + + fastq + + + + + + fastq + + + + + + vcf/bam + + + + + + + csv + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/metro/PanelPrep.png b/docs/images/metro/PanelPrep.png new file mode 100644 index 00000000..fd9d79cc Binary files /dev/null and b/docs/images/metro/PanelPrep.png differ diff --git a/docs/images/metro/Simulate.png b/docs/images/metro/Simulate.png new file mode 100644 index 00000000..716e961f Binary files /dev/null and b/docs/images/metro/Simulate.png differ diff --git a/docs/images/metro/Validate.png b/docs/images/metro/Validate.png new file mode 100644 index 00000000..017057ae Binary files /dev/null and b/docs/images/metro/Validate.png differ diff --git a/docs/images/metro/filtermetro.md b/docs/images/metro/filtermetro.md new file mode 100644 index 00000000..3f73bf00 --- /dev/null +++ b/docs/images/metro/filtermetro.md @@ -0,0 +1,72 @@ +# How to generate and filter metro map. + +## What we want + +- Easily updatable diagram +- Expandable / filter at different depth +- Customizable +- Non used channel not visible (cleaning) +- Correct names +- Easy to read and identify, workflow, subworkflow, function, process, files +- Easy to use and configurable + +## Available solutions with pro and cons + +- `-with-dag` generate a mermaid diagram mostly complete + - Pros : Easy to use, already implemented, dependant of the workflow + - Cons : Difficult to read, messy (many not used channels), not exhaustive of the pipeline + +## Run pipeline with dag + +```bash +nextflow run main.nf -profile test_sim,singularity --outdir results -with-dag -preview +``` + +## Extract mermaid from html + +```bash +python docs/images/metro/metro.py -f results/pipeline_info/pipeline_dag_2024-05-25_23-27-14.html +``` + +````bash +#Get last html +htmlfile=$(find results/pipeline_info -name "pipeline_dag_*.html" -printf "%T@ %p\n" | sort -n | tail -1 | awk '{print $2}') +#Extract mermaid +content=$(sed -n '/
/p' $htmlfile \
+    | sed 's/
//g' \
+    | sed 's/<\/pre>//g')
+#Register into markdown
+mdfile="docs/images/metro/mermaid.md"
+touch $mdfile
+echo '```mermaid' > $mdfile
+echo "$content" >> $mdfile
+echo '```' >> $mdfile
+````
+
+## Filter
+
+### Extract all empty
+
+```bash
+grep -oP 'v\d+\[" "\]' $mdfile | sed 's/\[" "\]//g' > docs/images/metro/empty.txt
+```
+
+### Extract all relation
+
+```bash
+grep -oP 'v\d+ --> v\d+' $mdfile | sed 's/ --> /\t/g' > docs/images/metro/relationships.txt
+```
+
+### Structure
+
+- Flowchart
+  - Subgraph1
+    - Name
+    - Type
+    - Content
+      - Process1
+        - Name
+        - Type
+      - Process2
+        - Name
+        - Type
diff --git a/docs/images/metro/legend.png b/docs/images/metro/legend.png
new file mode 100644
index 00000000..d91a2dfd
Binary files /dev/null and b/docs/images/metro/legend.png differ
diff --git a/docs/images/metro/metro.py b/docs/images/metro/metro.py
new file mode 100644
index 00000000..94417269
--- /dev/null
+++ b/docs/images/metro/metro.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on 25/05/2024
+@author: LouisLeNezet
+Main script to the metro maps
+"""
+
+import argparse
+import pandas as pd
+from lxml import html, etree
+
+parser = argparse.ArgumentParser()
+parser.add_argument("-f", "--file", type=str)
+
+args = parser.parse_args()
+file_path = args.file
+
+with open(file_path) as f:
+    page = f.read()
+
+tree = html.fromstring(page)
+pre = tree.find_class("mermaid")
+
+mermaid = pre[0].text_content().split("\n")
+mermaid = [x.strip() for x in mermaid if x.strip() != ""]
+
+# Extract relationships from mermaid
+rels = [x for x in mermaid if "-->" in x]
+rels = pd.DataFrame(rels, columns=["Rel"])
+rels[["From", "To"]] = rels["Rel"].str.split(" --> ", expand=True)
+
+# Extract graph structure
+graph = [x for x in mermaid if "-->" not in x]
+graph = pd.DataFrame(graph, columns=["Nodes"])
+names = graph["Nodes"].str.findall("^v\\d+")
+graph["Names"] = [x[0] if len(x)==1 else x for x in names]
+
+# Extract nodes to delete
+to_del = []
+for pat in ['[" "]']:
+    to_del = to_del + [x.split(pat)[0] for x in graph["Nodes"] if pat in x]
+
+# Delete unwanted nodes
+for x in to_del:
+    all_to = rels.loc[(rels["From"] == x)]
+    if len(all_to) == 0:
+        rels = rels.loc[(rels["To"] != x)]
+        graph = graph.loc[(graph["Names"] != x)]
+    elif len(all_to) == 1:
+        print(x, "link to one")
+    else :
+        print(x, "link to more than one")
+
+new_graph="\n".join(graph["Nodes"])
+new_rels = "\n".join(rels["Rel"])
+
+new_mermaid="\n".join(["\n",new_graph, new_rels])
+
+with open("new.html", "wb") as f:
+    tree.find_class("mermaid")[0].text = new_mermaid
+    f.write(etree.tostring(tree))
diff --git a/docs/images/metro/new.html b/docs/images/metro/new.html
new file mode 100644
index 00000000..3383825e
--- /dev/null
+++ b/docs/images/metro/new.html
@@ -0,0 +1,355 @@
+
+  
+    
+  
+  
+    
+
+flowchart TB
+subgraph " "
+v1["Channel.of"]
+v2["Channel.of"]
+v11["channel.fromSamplesheet"]
+v19["channel.fromSamplesheet"]
+v20["channel.fromSamplesheet"]
+v23["channel.fromSamplesheet"]
+v24["Channel.of"]
+v25["Channel.of"]
+v44["qname"]
+v74["qname"]
+v129["regions"]
+v130["targets"]
+v131["samples"]
+v138["bed"]
+v149["regions"]
+v150["targets"]
+v151["samples"]
+v160["regions"]
+v161["targets"]
+v162["samples"]
+v177["regions"]
+v178["targets"]
+v179["samples"]
+v182["program_file"]
+v189["Channel.of"]
+v191["Channel.of"]
+v193["Channel.of"]
+v261["model"]
+v275[" "]
+v276[" "]
+v288["Channel.of"]
+v326["save_mpileup"]
+v332["Channel.of"]
+v357[" "]
+v358["min_val_gl"]
+v359["min_val_dp"]
+v383["program_file"]
+v393["Channel.of"]
+v396["Channel.fromPath"]
+v399["ch_workflow_summary"]
+v403["ch_methods_description"]
+end
+subgraph PIPELINE_INITIALISATION
+v3([SAMTOOLS_FAIDX])
+v8(( ))
+end
+subgraph " "
+v16["files_ext"]
+v26["ch_genotype"]
+v184["posfile"]
+v257["chunks_glimpse1"]
+v267["chunks_glimpse2"]
+v300["ch_impute_output"]
+v385["stats"]
+v415["multiqc_report"]
+end
+subgraph NFCORE_PHASEIMPUTE
+subgraph PHASEIMPUTE
+subgraph BAM_REGION
+v45([SAMTOOLS_VIEW])
+v53([SAMTOOLS_INDEX])
+end
+subgraph BAM_DOWNSAMPLE
+v64([SAMTOOLS_COVERAGE])
+v75([SAMTOOLS_VIEW])
+v83([SAMTOOLS_INDEX_1])
+v92([SAMTOOLS_MERGE])
+v97([SAMTOOLS_INDEX_2])
+v58(( ))
+v88(( ))
+end
+subgraph VCF_CHR_CHECK
+v108([VCFCHRBFR])
+v107(( ))
+end
+subgraph VCF_NORMALIZE_BCFTOOLS
+v123([BCFTOOLS_NORM])
+v125([BCFTOOLS_INDEX_1])
+v132([BCFTOOLS_DEL_MLT_ALL])
+v134([BCFTOOLS_INDEX_2])
+v139([BCFTOOLS_CONVERT])
+v128(( ))
+end
+subgraph VCF_SITES_EXTRACT_BCFTOOLS
+v152([BCFTOOLS_VIEW])
+v155([BCFTOOLS_INDEX])
+v163([BCFTOOLS_QUERY])
+v166([TABIX_BGZIP])
+v170([TABIX_TABIX])
+end
+subgraph PREPARE_POSFILE_TSV
+v180([BCFTOOLS_QUERY])
+v183([GAWK])
+end
+subgraph VCF_PHASE_PANEL
+subgraph VCF_PHASE_SHAPEIT5
+v201([BEDTOOLS_MAKEWINDOWS])
+v210([SHAPEIT5_PHASECOMMON])
+v213([VCF_BCFTOOLS_INDEX_1])
+v223([SHAPEIT5_LIGATE])
+v226([VCF_BCFTOOLS_INDEX_2])
+v190(( ))
+v192(( ))
+v194(( ))
+v206(( ))
+end
+end
+subgraph CONCAT_PANEL
+v240([BCFTOOLS_CONCAT])
+v243([BCFTOOLS_INDEX])
+end
+subgraph VCF_CHUNK_GLIMPSE
+v251([GLIMPSE_CHUNK])
+v262([GLIMPSE2_CHUNK])
+v255(( ))
+v265(( ))
+end
+subgraph BAM_IMPUTE_QUILT
+v277([QUILT_QUILT])
+v283([BCFTOOLS_INDEX_1])
+v290([BCFTOOLS_ANNOTATE])
+v293([BCFTOOLS_INDEX_2])
+v102(( ))
+v287(( ))
+end
+subgraph CONCAT_QUILT
+v304([BCFTOOLS_CONCAT])
+v307([BCFTOOLS_INDEX])
+v297(( ))
+end
+subgraph GL_TRUTH
+v327([BCFTOOLS_MPILEUP])
+v334([BCFTOOLS_ANNOTATE])
+v337([BCFTOOLS_INDEX])
+v12(( ))
+v331(( ))
+end
+subgraph VCF_CONCORDANCE_GLIMPSE2
+v360([GLIMPSE2_CONCORDANCE])
+v375([GUNZIP])
+v378([ADD_COLUMNS])
+v384([GAWK])
+v381(( ))
+end
+v410([MULTIQC])
+v6(( ))
+v407(( ))
+v408(( ))
+v409(( ))
+v414(( ))
+end
+end
+v1 --> v3
+v1 --> v8
+v2 --> v3
+v3 --> v6
+v3 --> v8
+v11 --> v12
+v12 --> v16
+v19 --> v8
+v19 --> v107
+v20 --> v12
+v23 --> v12
+v24 --> v58
+v25 --> v26
+v44 --> v45
+v8 --> v45
+v12 --> v45
+v45 --> v53
+v45 --> v6
+v45 --> v58
+v53 --> v6
+v53 --> v58
+v8 --> v64
+v58 --> v64
+v64 --> v6
+v64 --> v58
+v74 --> v75
+v8 --> v75
+v58 --> v75
+v75 --> v83
+v75 --> v6
+v75 --> v88
+v83 --> v6
+v83 --> v88
+v8 --> v92
+v88 --> v92
+v92 --> v97
+v92 --> v102
+v97 --> v6
+v97 --> v102
+v107 --> v108
+v108 --> v6
+v108 --> v8
+v8 --> v123
+v123 --> v125
+v123 --> v6
+v123 --> v128
+v125 --> v6
+v125 --> v128
+v129 --> v132
+v130 --> v132
+v131 --> v132
+v128 --> v132
+v132 --> v134
+v132 --> v6
+v132 --> v12
+v134 --> v6
+v134 --> v12
+v138 --> v139
+v8 --> v139
+v12 --> v139
+v139 --> v6
+v139 --> v102
+v149 --> v152
+v150 --> v152
+v151 --> v152
+v12 --> v152
+v152 --> v155
+v152 --> v6
+v152 --> v12
+v155 --> v6
+v155 --> v12
+v160 --> v163
+v161 --> v163
+v162 --> v163
+v12 --> v163
+v163 --> v166
+v163 --> v6
+v166 --> v170
+v166 --> v6
+v166 --> v12
+v170 --> v6
+v170 --> v12
+v177 --> v180
+v178 --> v180
+v179 --> v180
+v12 --> v180
+v180 --> v183
+v180 --> v6
+v182 --> v183
+v183 --> v184
+v183 --> v6
+v189 --> v190
+v191 --> v192
+v193 --> v194
+v12 --> v201
+v201 --> v6
+v201 --> v12
+v201 --> v206
+v12 --> v210
+v190 --> v210
+v192 --> v210
+v194 --> v210
+v210 --> v213
+v210 --> v6
+v210 --> v206
+v213 --> v6
+v213 --> v206
+v206 --> v223
+v223 --> v226
+v223 --> v6
+v223 --> v12
+v226 --> v6
+v226 --> v12
+v12 --> v240
+v240 --> v243
+v240 --> v6
+v240 --> v12
+v243 --> v6
+v243 --> v12
+v12 --> v251
+v251 --> v6
+v251 --> v102
+v251 --> v255
+v255 --> v257
+v261 --> v262
+v12 --> v262
+v262 --> v6
+v262 --> v265
+v265 --> v267
+v275 --> v277
+v276 --> v277
+v102 --> v277
+v277 --> v283
+v277 --> v6
+v277 --> v287
+v283 --> v6
+v283 --> v287
+v288 --> v287
+v287 --> v290
+v290 --> v293
+v290 --> v6
+v290 --> v297
+v293 --> v6
+v293 --> v297
+v297 --> v300
+v297 --> v304
+v304 --> v307
+v304 --> v6
+v304 --> v12
+v307 --> v6
+v307 --> v12
+v326 --> v327
+v8 --> v327
+v12 --> v327
+v327 --> v6
+v327 --> v331
+v332 --> v331
+v331 --> v334
+v334 --> v337
+v334 --> v6
+v334 --> v12
+v337 --> v6
+v337 --> v12
+v357 --> v360
+v358 --> v360
+v359 --> v360
+v12 --> v360
+v360 --> v375
+v360 --> v6
+v375 --> v378
+v375 --> v6
+v378 --> v6
+v378 --> v381
+v383 --> v384
+v381 --> v384
+v384 --> v385
+v384 --> v6
+v393 --> v6
+v396 --> v407
+v399 --> v6
+v403 --> v6
+v6 --> v410
+v407 --> v410
+v408 --> v410
+v409 --> v410
+v410 --> v414
+v414 --> v415
+ + + diff --git a/docs/images/metro/pipeline_dag_2024-05-28_12-01-55.html b/docs/images/metro/pipeline_dag_2024-05-28_12-01-55.html new file mode 100644 index 00000000..163303f0 --- /dev/null +++ b/docs/images/metro/pipeline_dag_2024-05-28_12-01-55.html @@ -0,0 +1,467 @@ + + + + + + +
+flowchart TB
+    subgraph " "
+    v1["Channel.of"]
+    v2["Channel.of"]
+    v11["channel.fromSamplesheet"]
+    v19["channel.fromSamplesheet"]
+    v20["channel.fromSamplesheet"]
+    v23["channel.fromSamplesheet"]
+    v24["Channel.of"]
+    v25["Channel.of"]
+    v44["qname"]
+    v74["qname"]
+    v129["regions"]
+    v130["targets"]
+    v131["samples"]
+    v138["bed"]
+    v149["regions"]
+    v150["targets"]
+    v151["samples"]
+    v160["regions"]
+    v161["targets"]
+    v162["samples"]
+    v177["regions"]
+    v178["targets"]
+    v179["samples"]
+    v182["program_file"]
+    v189["Channel.of"]
+    v191["Channel.of"]
+    v193["Channel.of"]
+    v261["model"]
+    v275[" "]
+    v276[" "]
+    v288["Channel.of"]
+    v326["save_mpileup"]
+    v332["Channel.of"]
+    v357[" "]
+    v358["min_val_gl"]
+    v359["min_val_dp"]
+    v383["program_file"]
+    v393["Channel.of"]
+    v396["Channel.fromPath"]
+    v399["ch_workflow_summary"]
+    v403["ch_methods_description"]
+    end
+    subgraph PIPELINE_INITIALISATION
+    v3([SAMTOOLS_FAIDX])
+    v8(( ))
+    end
+    subgraph " "
+    v4[" "]
+    v5[" "]
+    v16["files_ext"]
+    v26["ch_genotype"]
+    v39[" "]
+    v46[" "]
+    v47[" "]
+    v48[" "]
+    v49[" "]
+    v50[" "]
+    v54[" "]
+    v55[" "]
+    v76[" "]
+    v77[" "]
+    v78[" "]
+    v79[" "]
+    v80[" "]
+    v84[" "]
+    v85[" "]
+    v93[" "]
+    v94[" "]
+    v95[" "]
+    v96[" "]
+    v98[" "]
+    v99[" "]
+    v116[" "]
+    v126[" "]
+    v135[" "]
+    v140[" "]
+    v141[" "]
+    v142[" "]
+    v143[" "]
+    v144[" "]
+    v156[" "]
+    v167[" "]
+    v171[" "]
+    v184["posfile"]
+    v214[" "]
+    v227[" "]
+    v244[" "]
+    v257["chunks_glimpse1"]
+    v267["chunks_glimpse2"]
+    v278[" "]
+    v279[" "]
+    v280[" "]
+    v284[" "]
+    v294[" "]
+    v300["ch_impute_output"]
+    v308[" "]
+    v328[" "]
+    v338[" "]
+    v385["stats"]
+    v411[" "]
+    v412[" "]
+    v413[" "]
+    v415["multiqc_report"]
+    end
+    subgraph NFCORE_PHASEIMPUTE
+    subgraph PHASEIMPUTE
+    subgraph BAM_REGION
+    v45([SAMTOOLS_VIEW])
+    v53([SAMTOOLS_INDEX])
+    end
+    subgraph BAM_DOWNSAMPLE
+    v64([SAMTOOLS_COVERAGE])
+    v75([SAMTOOLS_VIEW])
+    v83([SAMTOOLS_INDEX_1])
+    v92([SAMTOOLS_MERGE])
+    v97([SAMTOOLS_INDEX_2])
+    v58(( ))
+    v88(( ))
+    end
+    subgraph VCF_CHR_CHECK
+    v108([VCFCHRBFR])
+    v107(( ))
+    end
+    subgraph VCF_NORMALIZE_BCFTOOLS
+    v123([BCFTOOLS_NORM])
+    v125([BCFTOOLS_INDEX_1])
+    v132([BCFTOOLS_DEL_MLT_ALL])
+    v134([BCFTOOLS_INDEX_2])
+    v139([BCFTOOLS_CONVERT])
+    v128(( ))
+    end
+    subgraph VCF_SITES_EXTRACT_BCFTOOLS
+    v152([BCFTOOLS_VIEW])
+    v155([BCFTOOLS_INDEX])
+    v163([BCFTOOLS_QUERY])
+    v166([TABIX_BGZIP])
+    v170([TABIX_TABIX])
+    end
+    subgraph PREPARE_POSFILE_TSV
+    v180([BCFTOOLS_QUERY])
+    v183([GAWK])
+    end
+    subgraph VCF_PHASE_PANEL
+    subgraph VCF_PHASE_SHAPEIT5
+    v201([BEDTOOLS_MAKEWINDOWS])
+    v210([SHAPEIT5_PHASECOMMON])
+    v213([VCF_BCFTOOLS_INDEX_1])
+    v223([SHAPEIT5_LIGATE])
+    v226([VCF_BCFTOOLS_INDEX_2])
+    v190(( ))
+    v192(( ))
+    v194(( ))
+    v206(( ))
+    end
+    end
+    subgraph CONCAT_PANEL
+    v240([BCFTOOLS_CONCAT])
+    v243([BCFTOOLS_INDEX])
+    end
+    subgraph VCF_CHUNK_GLIMPSE
+    v251([GLIMPSE_CHUNK])
+    v262([GLIMPSE2_CHUNK])
+    v255(( ))
+    v265(( ))
+    end
+    subgraph BAM_IMPUTE_QUILT
+    v277([QUILT_QUILT])
+    v283([BCFTOOLS_INDEX_1])
+    v290([BCFTOOLS_ANNOTATE])
+    v293([BCFTOOLS_INDEX_2])
+    v102(( ))
+    v287(( ))
+    end
+    subgraph CONCAT_QUILT
+    v304([BCFTOOLS_CONCAT])
+    v307([BCFTOOLS_INDEX])
+    v297(( ))
+    end
+    subgraph GL_TRUTH
+    v327([BCFTOOLS_MPILEUP])
+    v334([BCFTOOLS_ANNOTATE])
+    v337([BCFTOOLS_INDEX])
+    v12(( ))
+    v331(( ))
+    end
+    subgraph VCF_CONCORDANCE_GLIMPSE2
+    v360([GLIMPSE2_CONCORDANCE])
+    v375([GUNZIP])
+    v378([ADD_COLUMNS])
+    v384([GAWK])
+    v381(( ))
+    end
+    v410([MULTIQC])
+    v6(( ))
+    v407(( ))
+    v408(( ))
+    v409(( ))
+    v414(( ))
+    end
+    end
+    v1 --> v3
+    v1 --> v8
+    v2 --> v3
+    v3 --> v5
+    v3 --> v4
+    v3 --> v6
+    v3 --> v8
+    v11 --> v12
+    v12 --> v16
+    v19 --> v8
+    v19 --> v107
+    v20 --> v12
+    v23 --> v12
+    v24 --> v58
+    v25 --> v26
+    v12 --> v39
+    v44 --> v45
+    v8 --> v45
+    v12 --> v45
+    v45 --> v53
+    v45 --> v50
+    v45 --> v49
+    v45 --> v48
+    v45 --> v47
+    v45 --> v46
+    v45 --> v6
+    v45 --> v58
+    v53 --> v55
+    v53 --> v54
+    v53 --> v6
+    v53 --> v58
+    v8 --> v64
+    v58 --> v64
+    v64 --> v6
+    v64 --> v58
+    v74 --> v75
+    v8 --> v75
+    v58 --> v75
+    v75 --> v83
+    v75 --> v80
+    v75 --> v79
+    v75 --> v78
+    v75 --> v77
+    v75 --> v76
+    v75 --> v6
+    v75 --> v88
+    v83 --> v85
+    v83 --> v84
+    v83 --> v6
+    v83 --> v88
+    v8 --> v92
+    v88 --> v92
+    v92 --> v97
+    v92 --> v96
+    v92 --> v95
+    v92 --> v94
+    v92 --> v93
+    v92 --> v102
+    v97 --> v99
+    v97 --> v98
+    v97 --> v6
+    v97 --> v102
+    v107 --> v108
+    v108 --> v6
+    v108 --> v8
+    v8 --> v116
+    v8 --> v123
+    v123 --> v125
+    v123 --> v6
+    v123 --> v128
+    v125 --> v126
+    v125 --> v6
+    v125 --> v128
+    v129 --> v132
+    v130 --> v132
+    v131 --> v132
+    v128 --> v132
+    v132 --> v134
+    v132 --> v6
+    v132 --> v12
+    v134 --> v135
+    v134 --> v6
+    v134 --> v12
+    v138 --> v139
+    v8 --> v139
+    v12 --> v139
+    v139 --> v144
+    v139 --> v143
+    v139 --> v142
+    v139 --> v141
+    v139 --> v140
+    v139 --> v6
+    v139 --> v102
+    v149 --> v152
+    v150 --> v152
+    v151 --> v152
+    v12 --> v152
+    v152 --> v155
+    v152 --> v6
+    v152 --> v12
+    v155 --> v156
+    v155 --> v6
+    v155 --> v12
+    v160 --> v163
+    v161 --> v163
+    v162 --> v163
+    v12 --> v163
+    v163 --> v166
+    v163 --> v6
+    v166 --> v170
+    v166 --> v167
+    v166 --> v6
+    v166 --> v12
+    v170 --> v171
+    v170 --> v6
+    v170 --> v12
+    v177 --> v180
+    v178 --> v180
+    v179 --> v180
+    v12 --> v180
+    v180 --> v183
+    v180 --> v6
+    v182 --> v183
+    v183 --> v184
+    v183 --> v6
+    v189 --> v190
+    v191 --> v192
+    v193 --> v194
+    v12 --> v201
+    v201 --> v6
+    v201 --> v12
+    v201 --> v206
+    v12 --> v210
+    v190 --> v210
+    v192 --> v210
+    v194 --> v210
+    v210 --> v213
+    v210 --> v6
+    v210 --> v206
+    v213 --> v214
+    v213 --> v6
+    v213 --> v206
+    v206 --> v223
+    v223 --> v226
+    v223 --> v6
+    v223 --> v12
+    v226 --> v227
+    v226 --> v6
+    v226 --> v12
+    v12 --> v240
+    v240 --> v243
+    v240 --> v6
+    v240 --> v12
+    v243 --> v244
+    v243 --> v6
+    v243 --> v12
+    v12 --> v251
+    v251 --> v6
+    v251 --> v102
+    v251 --> v255
+    v255 --> v257
+    v261 --> v262
+    v12 --> v262
+    v262 --> v6
+    v262 --> v265
+    v265 --> v267
+    v275 --> v277
+    v276 --> v277
+    v102 --> v277
+    v277 --> v283
+    v277 --> v280
+    v277 --> v279
+    v277 --> v278
+    v277 --> v6
+    v277 --> v287
+    v283 --> v284
+    v283 --> v6
+    v283 --> v287
+    v288 --> v287
+    v287 --> v290
+    v290 --> v293
+    v290 --> v6
+    v290 --> v297
+    v293 --> v294
+    v293 --> v6
+    v293 --> v297
+    v297 --> v300
+    v297 --> v304
+    v304 --> v307
+    v304 --> v6
+    v304 --> v12
+    v307 --> v308
+    v307 --> v6
+    v307 --> v12
+    v326 --> v327
+    v8 --> v327
+    v12 --> v327
+    v327 --> v328
+    v327 --> v6
+    v327 --> v331
+    v332 --> v331
+    v331 --> v334
+    v334 --> v337
+    v334 --> v6
+    v334 --> v12
+    v337 --> v338
+    v337 --> v6
+    v337 --> v12
+    v357 --> v360
+    v358 --> v360
+    v359 --> v360
+    v12 --> v360
+    v360 --> v375
+    v360 --> v6
+    v375 --> v378
+    v375 --> v6
+    v378 --> v6
+    v378 --> v381
+    v383 --> v384
+    v381 --> v384
+    v384 --> v385
+    v384 --> v6
+    v393 --> v6
+    v396 --> v407
+    v399 --> v6
+    v403 --> v6
+    v6 --> v410
+    v407 --> v410
+    v408 --> v410
+    v409 --> v410
+    v410 --> v413
+    v410 --> v412
+    v410 --> v411
+    v410 --> v414
+    v414 --> v415
+
+
+ + + diff --git a/docs/images/nf-core-phaseimpute_logo.png b/docs/images/nf-core-phaseimpute_logo.png deleted file mode 100644 index e66b74f0..00000000 Binary files a/docs/images/nf-core-phaseimpute_logo.png and /dev/null differ diff --git a/docs/images/nf-core-phaseimpute_logo_dark.png b/docs/images/nf-core-phaseimpute_logo_dark.png new file mode 100644 index 00000000..b3bd5aa7 Binary files /dev/null and b/docs/images/nf-core-phaseimpute_logo_dark.png differ diff --git a/docs/images/nf-core-phaseimpute_logo_light.png b/docs/images/nf-core-phaseimpute_logo_light.png new file mode 100644 index 00000000..4f5070fe Binary files /dev/null and b/docs/images/nf-core-phaseimpute_logo_light.png differ diff --git a/docs/output.md b/docs/output.md index 1db00bdd..3d5470cf 100644 --- a/docs/output.md +++ b/docs/output.md @@ -1,43 +1,218 @@ # nf-core/phaseimpute: Output +## Introduction + This document describes the output produced by the pipeline. Most of the plots are taken from the MultiQC report, which summarises results at the end of the pipeline. - +The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. ## Pipeline overview -The pipeline is built using [Nextflow](https://www.nextflow.io/) -and processes data using the following steps: +## Panel preparation outputs `--steps panelprep` + +This step of the pipeline performs a QC of the reference panel data and produces the necessary files for imputation (`--steps impute`). + +It has the following optional modes: + +- `--normalize` - Normalize the reference panel with `bcftools norm` and remove multiallelic sites. It also allow to remove samples using `--remove_samples`. +- `--compute_freq` - Compute allele frequencies with `vcffixup`. +- `--phase` - Phase the reference panel with `SHAPEIT5`. + +The pipeline will produce the following outputs: + +- [Normalize reference panel](#panel-directory) - Remove multiallelic sites from the reference panel and compute allele frequencies if needed. +- [Convert](#haplegend-directory) - Convert reference panel to `.hap` and `.legend` files. +- [Posfile](#sites-directory) - Produce a `.tsv` file with the list of positions to genotype for the different tools. +- [Chromosomes chunks](#chunks-directory) - Create chunks of the reference panel. +- [CSV](#csv-directory) - Obtained `.csv` files from this step. + +The directory structure from `--steps panelprep` is: + +```tree +├── panel +├── haplegend +├── sites +├── chunks +│ ├── glimpse1 +│ └── glimpse2 +├── csv +``` + +### Panel directory + +
+Output files + +- `prep_panel/panel/` + - `*.vcf.gz`: The reference panel VCF files after all the preprocessing steps are completed. + - `*.tbi`: The index file for the prepared reference panel. + +
+ +A directory containing the reference panel per chromosome after preprocessing. +The files will be normalized if the flag `--normalize` is used (with `_normalized` suffix). The files will have their allele frequency computed if the flaq `--compute_freq` is used (with `_fixup` suffix). +The files will be phased if the flag `--phase` is used (with `_phased` suffix). + +### Haplegend directory + +
+Output files + +- `prep_panel/haplegend/` + - `*.hap`: a `.hap` file for the reference panel containing the genotype. + - `*.legend*`: a `.legend` file for the reference panel containing the variants informations. + - `*.samples`: a `.samples` file for the reference panel containing the samples informations. + +
+ +[`bcftools convert`](https://samtools.github.io/bcftools/bcftools.html#convert) aids in the conversion of VCF files to `.hap` and `.legend` files. A `.samples` file is also generated. Once that you have generated the hap and legend files for your reference panel, you can skip the reference preparation steps and directly submit these files for imputation. The hap and legend files can be used as input files with the `--tools quilt` option. + +### Sites directory + +
+Output files + +- `prep_panel/sites/` + - `*.vcf.gz`: A VCF file with biallelic SNPs only. + - `*.csi`: Index file of the VCF file. + +
+ +[`bcftools query`](https://samtools.github.io/bcftools/bcftools.html#query) produces VCF (`*.vcf.gz`) files per chromosome. These QCed VCF files can be gathered into a CSV file and used with all the tools in `--steps impute` using the flag `--panel`. + +### Chunks directory + +
+Output files + +- `prep_panel/chunks/` + - `*.txt`: Text file containing the chunks obtained after running `GLIMPSE1_CHUNK`. + +
+ +[Glimpse1 chunk](https://odelaneau.github.io/GLIMPSE/glimpse1/) defines the chunks where imputation will be performed. For further reading and documentation see the [Glimpse1 documentation](https://odelaneau.github.io/GLIMPSE/glimpse1/commands.html). Once you have generated the chunks for your reference panel, you can skip the reference preparation steps and directly submit this file for imputation. + +### CSV directory + +
+Output files + +- `prep_panel/csv/` + - `chunks.csv`: A CSV file containing the list of chunks obtained for each chromosome and panel. + - `panel.csv`: A CSV file containing the final phased and prepared for each chromosome and input panel. + - `posfile.csv`: A CSV file containing the final list of panel positions, in VCF and TSV files, for each chromosome and input panel. + +
+ +## Imputation outputs `--steps impute` + +The results from `--steps impute` will have the following directory structure: + +```tree +├── batch +├── csv +├── glimpse1/glimpse2/quilt/stitch +│ ├── concat/ +│ └── samples/ +├── stats +``` + +
+Output files + +- `imputation/batch/all.batchi.id.txt`: List of samples names processed in the i^th^ batch. +- `imputation/csv/` + - `impute.csv`: A single CSV file containing the path to a VCF file and its index, of each imputed sample with their corresponding tool. +- `imputation/[glimpse1,glimpse2,quilt,stitch]/` + - `concat/all.batch*.vcf.gz`: The concatenated VCF files of all imputed samples by batches. + - `concat/all.batch*.vcf.gz.tbi`: The index file for the concatenated imputed VCF files of the samples. + - `samples/*.vcf.gz`: A VCF file of each imputed sample. + - `samples/*.vcf.gz.tbi`: The index file of the imputed VCF files. +- `imputation/*..bcftools_stats.txt`: The statistics of the imputed VCF target file produced by [`BCFTOOLS_STATS`](https://samtools.github.io/bcftools/bcftools.html#stats.) + +
+ +[`bcftools concat`](https://samtools.github.io/bcftools/bcftools.html#concat) will produce a single VCF file from a list of imputed VCF files in chunks. + +## Simulation outputs `--steps simulate` + +The results from `--steps simulate` will have the following directory structure: + +```tree +├── csv +├── samples +``` + +
+Output files + +- `simulation/` + - `csv`: + - `simulate.csv`: Samplesheet listing all downsampled target alignment files. + - `*.depth_*x.bam`: An alignment file from the target file downsampled at the desired depth. + - `*.bam.csi`: The corresponding index of the alignment file. + +
+ +## Validation outputs `--steps validate` + +The results from `--steps validate` will have the following directory structure: + +```tree +├── concat +├── samples +├── stats +``` + +
+Output files + +- `validation/` + - `concat/all.truth.vcf.gz`: The concatenated VCF file of all truth sample. + - `concat/all.truth.vcf.gz.tbi`: The index file of the concatenated truth VCF file of the samples. + - `samples/*.vcf.gz`: A VCF file of each truth sample. + - `samples/*.vcf.gz.tbi`: The index file of the truth VCF file. + - `stats/`: + - `*.truth.bcftools_stats.txt`: The statistics of the truth VCF target file produced by [`BCFTOOLS_STATS`](https://samtools.github.io/bcftools/bcftools.html#stats.) + - `*.P_T_SNP.txt`: Concordance metrics of the SNPs variants obtained with [`GLIMPSE2_CONCORDANCE`](https://odelaneau.github.io/GLIMPSE/docs/documentation/concordance/). + - `AllSamples.txt`: Aggregation of the above `GLIMPSE_CONCORDANCE` output across samples and tools. + +
+ +## Reports -* [FastQC](#fastqc) - read quality control -* [MultiQC](#multiqc) - aggregate report, describing results of the whole pipeline +Reports contain useful metrics and pipeline information for the different modes. -## FastQC +- [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline. +- [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution. -[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your reads. It provides information about the quality score distribution across your reads, the per base sequence content (%T/A/G/C). You get information about adapter contamination and other overrepresented sequences. +### MultiQC -For further reading and documentation see the [FastQC help](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). +
+Output files -> **NB:** The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They may contain adapter sequence and potentially regions with low quality. To see how your reads look after trimming, look at the FastQC reports in the `trim_galore` directory. +- `multiqc/` + - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. + - `multiqc_plots/`: directory containing static images from the report in various formats. -**Output directory: `results/fastqc`** +
-* `sample_fastqc.html` - * FastQC report, containing quality metrics for your untrimmed raw fastq files -* `zips/sample_fastqc.zip` - * zip file containing the FastQC report, tab-delimited data file and plot images +[MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarising all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in the report data directory. -## MultiQC +Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQC. The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see . -[MultiQC](http://multiqc.info) is a visualisation tool that generates a single HTML report summarising all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in within the report data directory. +### Pipeline information -The pipeline has special steps which allow the software versions used to be reported in the MultiQC output for future traceability. +
+Output files -**Output directory: `results/multiqc`** +- `pipeline_info/` + - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. + - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. + - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. + - Parameters used by the pipeline run: `params.json`. -* `Project_multiqc_report.html` - * MultiQC report - a standalone HTML file that can be viewed in your web browser -* `Project_multiqc_data/` - * Directory containing parsed statistics from the different tools used in the pipeline +
-For more information about how to use MultiQC reports, see [http://multiqc.info](http://multiqc.info) +[Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage. diff --git a/docs/usage.md b/docs/usage.md index 1e0fb29d..2465497b 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -1,331 +1,677 @@ # nf-core/phaseimpute: Usage -## Table of contents - -* [Table of contents](#table-of-contents) -* [Introduction](#introduction) -* [Running the pipeline](#running-the-pipeline) - * [Updating the pipeline](#updating-the-pipeline) - * [Reproducibility](#reproducibility) -* [Main arguments](#main-arguments) - * [`-profile`](#-profile) - * [`--reads`](#--reads) - * [`--single_end`](#--single_end) -* [Reference genomes](#reference-genomes) - * [`--genome` (using iGenomes)](#--genome-using-igenomes) - * [`--fasta`](#--fasta) - * [`--igenomes_ignore`](#--igenomes_ignore) -* [Job resources](#job-resources) - * [Automatic resubmission](#automatic-resubmission) - * [Custom resource requests](#custom-resource-requests) -* [AWS Batch specific parameters](#aws-batch-specific-parameters) - * [`--awsqueue`](#--awsqueue) - * [`--awsregion`](#--awsregion) - * [`--awscli`](#--awscli) -* [Other command line parameters](#other-command-line-parameters) - * [`--outdir`](#--outdir) - * [`--email`](#--email) - * [`--email_on_fail`](#--email_on_fail) - * [`--max_multiqc_email_size`](#--max_multiqc_email_size) - * [`-name`](#-name) - * [`-resume`](#-resume) - * [`-c`](#-c) - * [`--custom_config_version`](#--custom_config_version) - * [`--custom_config_base`](#--custom_config_base) - * [`--max_memory`](#--max_memory) - * [`--max_time`](#--max_time) - * [`--max_cpus`](#--max_cpus) - * [`--plaintext_email`](#--plaintext_email) - * [`--monochrome_logs`](#--monochrome_logs) - * [`--multiqc_config`](#--multiqc_config) +## :warning: Please read this documentation on the nf-core website: [https://nf-co.re/phaseimpute/usage](https://nf-co.re/phaseimpute/usage) + +> _Documentation of pipeline parameters is generated automatically from the pipeline schema and can no longer be found in markdown files._ ## Introduction -Nextflow handles job submissions on SLURM or other environments, and supervises running the jobs. Thus the Nextflow process must run until the pipeline is finished. We recommend that you put the process running in the background through `screen` / `tmux` or similar tool. Alternatively you can run nextflow within a cluster job submitted your job scheduler. +The **nf-core/phaseimpute** pipeline is designed to perform genomic phasing and imputation techniques. Some key functionalities include chromosome checking, panel preparation, imputation, simulation, and concordance. + +## Samplesheet input -It is recommended to limit the Nextflow Java virtual machines memory. We recommend adding the following line to your environment (typically in `~/.bashrc` or `~./bash_profile`): +You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use the `--input` parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. ```bash -NXF_OPTS='-Xms1g -Xmx4g' +--input '[path to samplesheet file]' +``` + +### Structure + +The samplesheet can have as many columns as you desire. However, there is a strict requirement for at least 3 columns to match those defined in the table below. + +A final samplesheet file may look something like the one below. This is for 6 samples. + +```console title="samplesheet.csv" +sample,file,index +SAMPLE1,AEG588A1.bam,AEG588A1.bai +SAMPLE2,AEG588A2.bam,AEG588A2.bai +SAMPLE3,AEG588A3.bam,AEG588A3.bai ``` - +| Column | Description | +| -------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `sample` | Custom sample name. Spaces in sample names are automatically converted to underscores (`_`). | +| `file` | Full path to an alignment or variant file. File has to have the extension ".bam", ".cram" or ".vcf", ".bcf" and optionally compressed with bgzip ".gz". All files in this column need to have the same extension. | +| `index` | Full path to index file. File has to be have the extension ".bai", ".crai", "csi", or "tbi". All files in this column need to have the same extension. | -## Running the pipeline +An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. -The typical command for running the pipeline is as follows: +## Samplesheet reference panel + +You will need to create a samplesheet with information about the reference panel you would like to use. Use the `--panel` parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. ```bash -nextflow run nf-core/phaseimpute --reads '*_R{1,2}.fastq.gz' -profile docker +--panel '[path to samplesheet file]' ``` -This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. +### Structure + +A final samplesheet file for the reference panel may look something like the one below. This is for 3 chromosomes. + +```console title="samplesheet_reference.csv" +panel,chr,vcf,index +1000G,chr1,ALL.chr1.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz, ALL.chr1.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz.tbi +1000G,chr2,ALL.chr2.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz, ALL.chr2.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz.tbi +1000G,chr3,ALL.chr3.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz, ALL.chr3.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz.tbi +``` + +| Column | Description | +| ------- | -------------------------------------------------------------------------------------------------------------- | +| `panel` | Name of the reference panel used. | +| `chr` | Name of the chromosome. Use the prefix 'chr' if the panel uses the prefix. | +| `vcf` | Full path to a VCF file for that chromosome. File has to be gzipped and have the extension ".vcf.gz". | +| `index` | Full path to the index for VCF file for that chromosome. File has to be gzipped and have the extension ".tbi". | + +An [example samplesheet](../assets/samplesheet_reference.csv) has been provided with the pipeline. + +## Samplesheet posfile + +You will need a samplesheet with information about the reference panel sites for using the `--steps [impute,validate]`. You can generate this samplesheet from `--steps panelprep`. Use the `--posfile` parameter to specify its location. It has to be a comma-separated file with at least 5 columns, and a header row as shown in the examples below. + +```bash +--posfile '[path to samplesheet file]' +``` + +### Structure + +A final samplesheet file for the posfile may look something like the one below. This is for 2 chromosomes. + +```console title="posfile.csv" +panel,chr,vcf,index,hap,legend +1000GP.s.norel,chr21,1000GP.chr21.s.norel.sites.vcf.gz,1000GP.chr21.s.norel.sites.vcf.gz.csi,1000GP.s.norel_chr21.hap.gz,1000GP.s.norel_chr21.legend.gz +1000GP.s.norel,chr22,1000GP.chr22.s.norel.sites.vcf.gz,1000GP.chr22.s.norel.sites.vcf.gz.csi,1000GP.s.norel_chr22.hap.gz,1000GP.s.norel_chr22.legend.gz +``` + +| Column | Description | +| -------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `panel` | Name of the reference panel used. | +| `chr` | Name of the chromosome. Use the prefix 'chr' if the panel uses the prefix. | +| `vcf` | Full path to a VCF containing the sites for that chromosome. File has to be gzipped and have the extension ".vcf.gz". (Required for validation step) | +| `index` | Full path to the index for the VCF file for that chromosome. File has to be gzipped and have the extension ".tbi". (Necessary for validation step) | +| `hap` | Full path to ".hap.gz" compressed file containing the reference panel haplotypes in ["haps" format](https://www.cog-genomics.org/plink/2.0/formats#haps). (Required by QUILT) | +| `legend` | Full path to ".legend.gz" compressed file containing the reference panel sites in ["legend" format](https://www.cog-genomics.org/plink/2.0/formats#legend). (Required by QUILT, GLIMPSE1 and STITCH) | + +The `legend` file should be a TSV with the following structure, similar to that from [`bcftools convert` documentation](https://samtools.github.io/bcftools/bcftools.html#convert) with the `--haplegendsample` command : File is space separated with a header ("id,position,a0,a1"), one row per SNP, with the following columns: + +- Column 1: chromosome:position_ref allele_alternate allele +- Column 2: physical position (sorted from smallest to largest) +- Column 3: reference base +- Column 4: alternate base + +```csv title="legend.tsv" +id position a0 a1 +chr21:16609287_C_T 16609287 C T +chr21:16609295_T_G 16609295 T G +chr21:16609345_A_T 16609345 A T +chr21:16609400_C_A 16609400 C A +chr21:16609437_G_A 16609437 G A +chr21:16609443_C_T 16609443 C T +chr21:16609476_A_G 16609476 A G +chr21:16609525_T_A 16609525 T A +``` + +## Reference genome + +Remember to use the same reference genome for all the files. You can specify the [reference genome](https://nf-co.re/docs/usage/reference_genomes) using: + +```bash +--genome GRCh37 +``` + +or you can specify a custom genome using: + +```bash +--fasta Homo_sapiens.GRCh38.dna_sm.primary_assembly.fa.gz +``` + +## Running the pipeline: quick example + +A quick running example only with the imputation step can be performed as follows: + +```bash +nextflow run nf-core/phaseimpute \ + --input samplesheet.csv \ + --steps impute \ + --chunks chunks.csv \ + --posfile posfile_legend.csv \ + --outdir results \ + --genome GRCh38 \ + --panel panel.csv \ + --tools glimpse1 \ + -profile docker +``` + +The typical command for running the pre-processing of the panel and imputation of samples is shown below: + +```bash +nextflow run nf-core/phaseimpute \ + --input samplesheet.csv \ + --steps panelprep,impute \ + --outdir results \ + --genome GRCh37 \ + -profile docker +``` + +This will launch the pipeline, preparing the reference panel and performing imputation, with the `docker` configuration profile. See below for more information about profiles. Note that the pipeline will create the following files in your working directory: ```bash -work # Directory containing the nextflow working files -results # Finished results (configurable, see below) -.nextflow_log # Log file from Nextflow +work # Directory containing the nextflow working files + # Finished results in specified location (defined with --outdir) +.nextflow_log # Log file from Nextflow # Other nextflow hidden files, eg. history of pipeline runs and old logs. ``` -### Updating the pipeline +To facilitate multiple runs of the pipeline with consistent settings without specifying each parameter in the command line, you can use a parameter file. This allows for setting parameters once and reusing them across different executions. -When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: +You can provide pipeline settings in a `yaml` or `json` file, which can be specified using the `-params-file` option: ```bash -nextflow pull nf-core/phaseimpute +nextflow run nf-core/phaseimpute -profile docker -params-file params.yaml ``` -### Reproducibility +Example of a `params.yaml` file: -It's a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. +```yaml title="params.yaml" +input: './samplesheet.csv' +outdir: './results/' +genome: 'GRCh37' +<...> +``` -First, go to the [nf-core/phaseimpute releases page](https://github.com/nf-core/phaseimpute/releases) and find the latest version number - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. +> [!WARNING] +> Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). -This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. +You can also generate `YAML` or `JSON` files easily using the [nf-core/launch](https://nf-co.re/launch) tool, which guides you creating the files that can be used directly with `-params-file`. -## Main arguments +### Running with the `tests` profile -### `-profile` +Multiple tests profiles are available to test the different functionalities of the pipeline on a small dataset. +The different tests profiles are: -Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. +- `test`: A profile to evaluate the imputation step with the `glimpse1` tool. +- `test_glimpse2`: A profile to evaluate the imputation step with the `glimpse2` tool. +- `test_quilt`: A profile to evaluate the imputation step with the `quilt` tool. +- `test_stitch`: A profile to evaluate the imputation step with the `stitch` tool. +- `test_panelprep`: A profile to evaluate the panel preparation step. +- `test_sim`: A profile to evaluate the simulation step. +- `test_validate`: A profile to evaluate the validation step. +- `test_batch`: A profile to evaluate the imputation and validation steps of the pipeline while splitting the input in different batch. +- `test_dog`: A profile to evaluate the panel preparation and imputation steps on dog samples using `glimpse1`, `glimpse2` and `quilt` tools. +- `test_all`: A profile with a complete configuration to evaluate all the steps of the pipeline. +- `test_all_fullchr`: A profile with a complete configuration to evaluate all the steps of the pipeline on the full size chromosomes 21 and 22. +- `test_full`: A profile with a complete configuration to evaluate all the steps of the pipeline on the whole genome. -Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Conda) - see below. +Each test can be run with the following command: -> We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. +```bash +nextflow run nf-core/phaseimpute -profile , --outdir results +``` -The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). +## Running the pipeline: detailed instructions -Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important! -They are loaded in sequence, so later profiles can overwrite earlier profiles. +nf-core/phaseimpute can be started at different points in the analysis by setting the flag `--steps` and the available options `[simulate, panelprep, impute, validate, all]`. You can also run several steps simultaneously by listing the required processes as `--steps panelprep,impute` or you can choose to run all steps sequentially by using `--steps all`. + +## Start with simulation `--steps simulate` + +simulate_metro + +This step of the pipeline allows to create synthetic low-coverage input files by downsizing high density input data. A typical use case is to obtain low-coverage input data from a sequenced sample. This method is useful for comparing the imputation results to a truth dataset and evaluate the quality of the imputation. The truth file for each sample here, is obtained by calling the variants from the high density input data provided it can also be provided through `--input-truth` CSV samplesheet. You can skip these steps if you already have low-pass genome sequencing data. + +A sample command for this steps is: + +```bash +nextflow run nf-core/phaseimpute \ + --input samplesheet.csv \ + --steps simulate \ + --depth 1 \ + --outdir results \ + --genome GRCh37 \ + -profile docker +``` + +The required flags for this mode are: + +- `--steps simulate`: The steps to run. +- `--input samplesheet.csv`: The samplesheet containing the input sample files in `bam` or `cram `format. +- `--depth`: The final depth of the file [default: 1]. +- `--genome` or `--fasta`: The reference genome of the samples. + +You can find an overview of the results produced by this step in the [Output](output.md). + +## Start with panel preparation `--steps panelprep` + +Panel preparation + +These steps pre-process the reference panel in order to be ready for imputation. There are a few quality control steps that are applied to reference panels. These include actions such as removing multiallelic SNPs and indels and removing certain samples from the reference panel (such as related samples). In addition, chunks are produced which are then used in the imputation steps. It is recommended that these steps are run once and the produced files are saved, to minimize the cost of reading the reference panel each time. Then, the output files from `--steps panelprep` can be used as input in the subsequent imputation steps, such as `--steps impute`. + +For starting from panel preparation, the required flags are `--steps panelprep` and `--panel samplesheet_reference.csv`. + +```bash +nextflow run nf-core/phaseimpute \ + --input samplesheet.csv \ + --panel samplesheet_reference.csv \ + --steps panelprep --outdir results \ + --genome GRCh37 -profile docker +``` -If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended. +The required flags for this mode are: -* `docker` - * A generic configuration profile to be used with [Docker](http://docker.com/) - * Pulls software from dockerhub: [`nfcore/phaseimpute`](http://hub.docker.com/r/nfcore/phaseimpute/) -* `singularity` - * A generic configuration profile to be used with [Singularity](http://singularity.lbl.gov/) - * Pulls software from DockerHub: [`nfcore/phaseimpute`](http://hub.docker.com/r/nfcore/phaseimpute/) -* `conda` - * Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker or Singularity. - * A generic configuration profile to be used with [Conda](https://conda.io/docs/) - * Pulls most software from [Bioconda](https://bioconda.github.io/) -* `test` - * A profile with a complete configuration for automated testing - * Includes links to test data so needs no other parameters +- `--steps panelprep`: The steps to run. +- `--panel reference.csv`: The samplesheet containing the reference panel files in `vcf.gz` format. +- `--phase`: (optional) Whether the reference panel should be phased (true|false). +- `--normalize`: (optional) Whether the reference panel needs to be normalized or not (true|false). The default value is true. +- `--remove_samples`: (optional) A comma-separated list of samples to remove from the reference during the normalization process. +- `--compute_freq`: (optional) Whether the frequency (AC/AN field) for each variants needs to be computed or not (true/false). This can be the case if the frequency is absent from the reference panel or if individuals have been removed. - +The panel will be chunked using the `GLIMPSE_CHUNKS` process. The size of the chunks can be optimized according to your needs (e.g. cluster resources, specie chromosomes size, ...) using the following config. The 4mb size (default value) is empirically determined to be a good value in humans (i.e. enough parallelization but not too much). -### `--reads` +```config title="panel.config" +withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHUNK_GLIMPSE:GLIMPSE_CHUNK' { + ext.args = "--window-size 4000000" +} + +withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHUNK_GLIMPSE:GLIMPSE2_CHUNK' { + ext.args = "--window-mb 4" +} +``` + +You can find an overview of the results produced by this steps in the [Output](output.md). + +## Start with imputation `--steps impute` + +Impute target + +For starting from the imputation steps, the required flags are: + +- `--steps impute` +- `--input input.csv`: The samplesheet containing the input sample files in `bam`, `cram` or `vcf`, `bcf` format. +- `--genome` or `--fasta`: The reference genome of the samples. +- `--tools [glimpse1, quilt, stitch]`: A selection of one or more of the available imputation tools. Each imputation tool has their own set of specific flags and input files. These required files are produced by `--steps panelprep` and used as input in: + + - `--chunks chunks.csv`: A samplesheet containing chunks per chromosome. These are produced by `--steps panelprep` using `GLIMPSE1`. + - `--posfile posfile.csv`: A samplesheet containing a `.legend.gz` file with the list of positions to genotype per chromosome. These are required by tools ( QUILT/STITCH/GLIMPSE1). It can also contain the `hap.gz` files (required by QUILT). The posfile can be generated with `--steps panelprep`. + - `--panel panel.csv`: A samplesheet containing the post-processed reference panel VCF (required by GLIMPSE1, GLIMPSE2). These files can be obtained with `--steps panelprep`. + +#### Summary table of required parameters in `--steps impute` + +| | `--steps impute` | `--input` | `--genome` or `--fasta` | `--panel` | `--chunks` | `--posfile` | +| ---------- | ---------------- | --------- | ----------------------- | --------- | ---------- | ----------- | +| `GLIMPSE1` | ✅ | ✅ ¹ | ✅ | ✅ | ✅ | ✅ ³ | +| `GLIMPSE2` | ✅ | ✅ ¹ | ✅ | ✅ | ✅ | ❌ | +| `QUILT` | ✅ | ✅ ² | ✅ | ❌ | ✅ | ✅ ⁴ | +| `STITCH` | ✅ | ✅ ² | ✅ | ❌ | ❌ | ✅ ³ | + +> ¹ Alignment files as well as variant calling format (i.e. BAM, CRAM, VCF or BCF) +> ² Alignment files only (i.e. BAM or CRAM) +> ³ `QUILT`: Should be a CSV with columns [panel id, chr, hap, legend] +> ⁴ `GLIMPSE1 and STITCH`: Should be a CSV with columns [panel id, chr, legend] + +Here is a representation on how the input files will be processed depending on the input files type and the selected imputation tool. + +![InputSoftwareCompatibility](images/InputSoftware_compatibility.png) + +#### Argument `--batch_size` + +The `--batch_size` argument is used to specify the number of samples to be processed at once. This is useful when the number of samples is large and the memory is limited. The default value is 100 but it might need to be adapted to the size of each individuals data, the number of samples to be processed in parallel and the available memory. + +Imputation software algorithms are time-consuming, with computational load dependent on the number of individuals, region size, and panel size. [Some steps have fixed computational costs](https://doi.org/10.1038/s41588-023-01438-3), meaning they take a similar amount of time whether imputing 2 or 200 individuals. By grouping individuals into larger batches, these fixed-cost steps are shared among more samples, reducing per-individual computational overhead and improving overall efficiency. However, memory usage must also be managed carefully when processing a large number of individuals within a single batch. Therefore, it is crucial to select a `batch_size` that is large enough to minimize fixed costs per individual but not so large that memory usage becomes unsustainable. + +When the number of samples exceeds the batch size, the pipeline will split the samples into batches and process them sequentially. The files for each batch are stored in the `${outputdir}/imputation/batch` folder. -Use this to specify the location of your input FastQ files. For example: +[STITCH](#stitch) and [GLIMPSE1](#glimpse1) do not support a batch size smaller than the total number of samples. This limit is set to prevent batch effects in the imputation process, as these tools rely on the genetic information from the entire target file to perform imputation. This approach, however, enhances the accuracy of phasing and imputation, as target individuals may provide a more informative genetic context (e.g., when related individuals are present in the target). + +> [!NOTE] +> If you want to disable this option and run each sample separately you can set `--batch_size 1` + +To summarize: + +- If you have Variant Calling Format (VCF) files, join them into a single file and choose either GLIMPSE1 or GLIMPSE2. +- If you have alignment files (e.g., BAM or CRAM), all tools are available, and processing will occur in `batch_size`: + - GLIMPSE1 and STITCH may induce batch effects, so all samples need to be imputed together. + - GLIMPSE2 and QUILT can process samples in separate batches. + +## Imputation tools `--steps impute --tools [glimpse1, glimpse2, quilt, stitch]` + +You can choose different software to perform the imputation. In the following sections, the typical commands for running the pipeline with each software are included. Multiple tools can be selected by separating them with a comma (eg. `--tools glimpse1,quilt`). + +### QUILT + +[QUILT](https://github.com/rwdavies/QUILT) is an R and C++ program for rapid genotype imputation from low-coverage sequence using a large reference panel. The required inputs for this program are bam samples provided in the input samplesheet (`--input`) and a CSV file with the genomic chunks (`--chunks`). ```bash ---reads 'path/to/data/sample_*_{1,2}.fastq' +nextflow run nf-core/phaseimpute \ + --input samplesheet.csv \ + --posfile posfile.csv \ + --chunks chunks.csv \ + --steps impute \ + --tools quilt \ + --outdir results \ + --genome GRCh37 \ + -profile docker ``` -Please note the following requirements: +The CSV file provided in `--posfile` has been described before and is produced by `--steps panelprep`. The `.hap` and `.legend` files in this CSV file are mandatory to use QUILT. + +```console title="posfile.csv" +panel,chr,hap,legend +1000GP,chr22,1000GP.s.norel_chr22.hap.gz,1000GP.s.norel_chr22.legend.gz +``` -1. The path must be enclosed in quotes -2. The path must have at least one `*` wildcard character -3. When using the pipeline with paired end data, the path must use `{1,2}` notation to specify read pairs. +The csv provided in `--chunks` has been described before in this document and is necessary to run this tool. -If left unspecified, a default pattern is used: `data/*{1,2}.fastq.gz` +```console title="chunks.csv" +panel,chr,file +1000GP,chr1,chunks_chr1.txt +1000GP,chr2,chunks_chr2.txt +1000GP,chr3,chunks_chr3.txt +``` -### `--single_end` +The file column should contain a TXT/TSV file obtained from GLIMPSE1 with the following [structure](https://github.com/nf-core/test-datasets/blob/phaseimpute/hum_data/panel/chr22/1000GP.chr22_chunks.txt). -By default, the pipeline expects paired-end data. If you have single-end data, you need to specify `--single_end` on the command line when you launch the pipeline. A normal glob pattern, enclosed in quotation marks, can then be used for `--reads`. For example: +If you do not have a CSV file with chunks, you can provide a reference panel to run the `--steps panelprep` which produces a CSV file with these chunks, which is then used as input for QUILT. You can choose to run both steps sequentially as `--steps panelprep,impute` or simply collect the files produced by `--steps panelprep`. ```bash ---single_end --reads '*.fastq' +nextflow run nf-core/phaseimpute \ + --input samplesheet.csv \ + --steps panelprep,impute \ + --tools quilt \ + --panel samplesheet_reference.csv \ + --outdir results \ + --genome GRCh37 \ + -profile docker ``` -It is not possible to run a mixture of single-end and paired-end files in one run. +### STITCH -## Reference genomes +[STITCH](https://github.com/rwdavies/STITCH) is an R program for low coverage sequencing genotype imputation without using a reference panel. The required inputs for this program are bam samples provided in the input samplesheet (`--input`) and a `.legend.gz` file with the list of positions to genotype (`--posfile`). See [Posfile section](#samplesheet-posfile) for more information. -The pipeline config files come bundled with paths to the illumina iGenomes reference index files. If running with docker or AWS, the configuration is set up to use the [AWS-iGenomes](https://ewels.github.io/AWS-iGenomes/) resource. +If you do not have a position file, you can run the `--steps panelprep` with a reference panel that will produces the TSV file. -### `--genome` (using iGenomes) +```bash +nextflow run nf-core/phaseimpute \ + --input samplesheet.csv \ + --steps panelprep \ + --panel samplesheet_reference.csv \ + --outdir results \ + --genome GRCh37 \ + -profile docker +``` -There are 31 different species supported in the iGenomes references. To run the pipeline, you must specify which to use with the `--genome` flag. +Otherwise, you can provide your own position file in the `--steps impute` with STITCH using the the `--posfile` parameter. -You can find the keys to specify the genomes in the [iGenomes config file](../conf/igenomes.config). Common genomes that are supported are: +```bash +nextflow run nf-core/phaseimpute \ + --input samplesheet.csv \ + --steps impute \ + --posfile posfile.csv \ + --tool stitch \ + --outdir results \ + --genome GRCh37 \ + -profile docker +``` -* Human - * `--genome GRCh37` -* Mouse - * `--genome GRCm38` -* _Drosophila_ - * `--genome BDGP6` -* _S. cerevisiae_ - * `--genome 'R64-1-1'` +The CSV file provided in `--posfile` must contain three columns [panel, chr, legend]. See [Posfile section](#samplesheet-posfile) for more information. -> There are numerous others - check the config file for more. +```console title="posfile.csv" +panel,chr,legend +1000GP,chr22,1000GP.s.norel_chr22.legend.gz +``` -Note that you can use the same configuration setup to save sets of reference files for your own use, even if they are not part of the iGenomes resource. See the [Nextflow documentation](https://www.nextflow.io/docs/latest/config.html) for instructions on where to save such a file. +STITCH only handles bi-allelic SNPs. -The syntax for this reference configuration is as follows: +If you do not have a reference panel and you would like to obtain the posfile you can use the following command: - +```bash +bcftools view -G -m 2 -M 2 -v ${vcf} +bcftools convert --haplegendsample ${vcf} +``` -```nextflow -params { - genomes { - 'GRCh37' { - fasta = '' // Used if no star index given - } - // Any number of additional genomes, key is used with --genome - } -} +### GLIMPSE1 + +[GLIMPSE1](https://github.com/odelaneau/GLIMPSE/tree/glimpse1) is a set of tools for phasing and imputation for low-coverage sequencing datasets. Recommended for many samples at >0.5x coverage and small reference panels. Glimpse1 works with alignment (i.e. BAM or CRAM) as well as variant (i.e. VCF or BCF) files as input. This is an example command to run this tool from the `--steps impute`: + +```bash +nextflow run nf-core/phaseimpute \ + --input samplesheet.csv \ + --panel samplesheet_reference.csv \ + --steps impute \ + --tool glimpse1 \ + --outdir results \ + --genome GRCh37 \ + -profile docker \ + --posfile posfile.csv + --chunks chunks.csv ``` - +The CSV file provided in `--posfile` must contain three columns [panel, chr, legend]. See [Posfile section](#samplesheet-posfile) for more information. -### `--fasta` +```console title="posfile.csv" +panel,chr,legend +1000GP,chr22,1000GP.s.norel_chr22.legend.gz +``` -If you prefer, you can specify the full path to your reference genome when you run the pipeline: +The CSV file provided in `--panel` must be prepared with `--steps panelprep` and must contain four columns [panel, chr, vcf, index]. + +### GLIMPSE2 + +[GLIMPSE2](https://github.com/odelaneau/GLIMPSE) is a set of tools for phasing and imputation for low-coverage sequencing datasets. This is an example command to run this tool from the `--steps impute`: ```bash ---fasta '[path to Fasta reference]' +nextflow run nf-core/phaseimpute \ + --input samplesheet.csv \ + --panel samplesheet_reference.csv \ + --steps impute \ + --tool glimpse2 \ + --outdir results \ + --chunks chunks.csv \ + --genome GRCh37 \ + -profile docker ``` -### `--igenomes_ignore` +Make sure the CSV file with the input panel is the output from `--step panelprep` or has been previously prepared. + +## Start with validation `--steps validate` + +concordance_metro -Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`. +This step compares a _truth_ VCF to an _imputed_ VCF in order to compute imputation accuracy. +This also needs the frequency of the alleles. They can be computed from the reference panel by running the `--steps panelprep` and using the `--panel` with the `--compute_freq` flag ; or by using `--posfile samplesheet.csv`. -## Job resources +```bash +nextflow run nf-core/phaseimpute \ + --input samplesheet.csv \ + --input_truth truth.csv \ + --posfile posfile.csv \ + --steps validate \ + --outdir results \ + --genome GRCh37 \ + -profile docker +``` -### Automatic resubmission +The required flags for this mode only are: -Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with an error code of `143` (exceeded requested resources) it will automatically resubmit with higher requests (2 x original, then 3 x original). If it still fails after three times then the pipeline is stopped. +- `--steps validate`: The steps to run. +- `--input input.csv`: The samplesheet containing the input sample files in `vcf` or `bcf` format. +- `--input_truth input_truth.csv`: The samplesheet containing the truth VCF files in `vcf` format. + This can also accept `bam` or `cram` files as input but will need the additional `legend` file in the `--posfile` to call the variants. + The structure of the `input_truth.csv` is the same as the `input.csv` file. See [Samplesheet input](#samplesheet-input) for more information. +- `--posfile posfile.csv`: A samplesheet containing the panel sites informations in `vcf` format for each chromosome. -### Custom resource requests +The CSV file provided in `--posfile` must contain four columns [panel, chr, vcf, index]. See [Posfile section](#samplesheet-posfile) for more information. -Wherever process-specific requirements are set in the pipeline, the default value can be changed by creating a custom config file. See the files hosted at [`nf-core/configs`](https://github.com/nf-core/configs/tree/master/conf) for examples. +```console title="posfile.csv" +panel,chr,vcf,index +1000GP,chr22,1000GP.s.norel_chr22.sites.vcf.gz,1000GP.s.norel_chr22.sites.csi +``` -If you are likely to be running `nf-core` pipelines regularly it may be a good idea to request that your custom config file is uploaded to the `nf-core/configs` git repository. Before you do this please can you test that the config file works with your pipeline of choice using the `-c` parameter (see definition below). You can then create a pull request to the `nf-core/configs` repository with the addition of your config file, associated documentation file (see examples in [`nf-core/configs/docs`](https://github.com/nf-core/configs/tree/master/docs)), and amending [`nfcore_custom.config`](https://github.com/nf-core/configs/blob/master/nfcore_custom.config) to include your custom profile. +## Run all steps sequentially `--steps all` -If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack). +This mode runs all the previous steps. This requires several flags: -## AWS Batch specific parameters +- `--steps all`: The steps to run. +- `--input input.csv`: The samplesheet containing the input sample files in `bam` or `cram` format. +- `--depth`: The final depth of the input file [default: 1]. +- `--genome` or `--fasta`: The reference genome of the samples. +- `--tools [glimpse1, glimpse2, quilt, stitch]`: A selection of one or more of the available imputation tools. +- `--panel panel.csv`: The samplesheet containing the reference panel files in `vcf.gz` format. +- `--remove_samples`: (optional) A comma-separated list of samples to remove from the reference. +- `--input_truth input_truth.csv`: The samplesheet containing the truth VCF files in `vcf` format. + This can also accept `bam` or `cram` files as input but will need the additional `legend` file in the `--posfile` to call the variants. + The structure of the `input_truth.csv` is the same as the `input.csv` file. See [Samplesheet input](#samplesheet-input) for more information. -Running the pipeline on AWS Batch requires a couple of specific parameters to be set according to your AWS Batch configuration. Please use [`-profile awsbatch`](https://github.com/nf-core/configs/blob/master/conf/awsbatch.config) and then specify all of the following parameters. +### Contig Name Validation and QC -### `--awsqueue` +The first step of the pipeline is to validate the consistency of contig names across all input files. Since the pipeline parallelizes the imputation process by contig, it needs to ensure that the contigs are consistently defined across several files. This step uses either the `--regions` samplesheet or the `.fai` file to identify the genomic regions to process. -The JobQueue that you intend to use on AWS Batch. +However, some contigs specified in these files may be absent from other key files, such as the `--panel`, `--posfile`, `--chunks`, `--map` (column `chr`), or `--fasta`. When this happens, the pipeline generates a warning to notify you of the missing contigs. It then narrows down the process to only the contigs that are **common across all required files**. -### `--awsregion` +Finally, the pipeline performs a detailed check with the `CHECKCHR` tool to verify that these contigs are present in every `--input` and `--input_truth` file, as well as in the individual reference panel files. This prevents inconsistencies in downstream steps. -The AWS region in which to run your job. Default is set to `eu-west-1` but can be adjusted to your needs. +### Updating the pipeline -### `--awscli` +When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: -The [AWS CLI](https://www.nextflow.io/docs/latest/awscloud.html#aws-cli-installation) path in your custom AMI. Default: `/home/ec2-user/miniconda/bin/aws`. +```bash +nextflow pull nf-core/phaseimpute +``` -Please make sure to also set the `-w/--work-dir` and `--outdir` parameters to a S3 storage bucket of your choice - you'll get an error message notifying you if you didn't. +### Reproducibility -## Other command line parameters +It is a good idea to specify the pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. - +First, go to the [nf-core/phaseimpute releases page](https://github.com/nf-core/phaseimpute/releases) and find the latest pipeline version - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. Of course, you can switch to another version by changing the number after the `-r` flag. -### `--outdir` +This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports. -The output directory where the results will be saved. +To further assist in reproducibility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. -### `--email` +> [!TIP] +> If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. -Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run. +## Core Nextflow arguments -### `--email_on_fail` +> [!NOTE] +> These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). -This works exactly as with `--email`, except emails are only sent if the workflow is not successful. +### `-profile` -### `--max_multiqc_email_size` +Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. -Threshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: 25MB). +Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below. -### `-name` +> [!IMPORTANT] +> We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. -Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic. +The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to check if your system is suported, please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). -This is used in the MultiQC report (if not default) and in the summary HTML / e-mail (always). +Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important! +They are loaded in sequence, so later profiles can overwrite earlier profiles. -**NB:** Single hyphen (core Nextflow option) +If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended, since it can lead to different results on different machines dependent on the computer enviroment. + +- `test` + - A profile with a complete configuration for automated testing + - Includes links to test data so needs no other parameters +- `docker` + - A generic configuration profile to be used with [Docker](https://docker.com/) +- `singularity` + - A generic configuration profile to be used with [Singularity](https://sylabs.io/docs/) +- `podman` + - A generic configuration profile to be used with [Podman](https://podman.io/) +- `shifter` + - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) +- `charliecloud` + - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) +- `apptainer` + - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) +- `wave` + - A generic configuration profile to enable [Wave](https://seqera.io/wave/) containers. Use together with one of the above (requires Nextflow ` 24.03.0-edge` or later). +- `conda` + - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. ### `-resume` -Specify this when restarting a pipeline. Nextflow will used cached results from any pipeline steps where the inputs are the same, continuing from where it got to previously. +Specify this when restarting a pipeline. Nextflow will use cached results from any pipeline steps where the inputs are the same, continuing from where it got to previously. For input to be considered the same, not only the names must be identical but the files' contents as well. For more info about this parameter, see [this blog post](https://www.nextflow.io/blog/2019/demystifying-nextflow-resume.html). You can also supply a run name to resume a specific run: `-resume [run-name]`. Use the `nextflow log` command to show previous run names. -**NB:** Single hyphen (core Nextflow option) - ### `-c` -Specify the path to a specific config file (this is a core NextFlow command). +Specify the path to a specific config file (this is a core Nextflow command). See the [nf-core website documentation](https://nf-co.re/usage/configuration) for more information. -**NB:** Single hyphen (core Nextflow option) +## Custom configuration -Note - you can use this to override pipeline defaults. +### Resource requests -### `--custom_config_version` +Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each steps in the pipeline has a default set of requirements for number of CPUs, memory and time. For most pipeline steps, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L18) it will automatically be resubmitted with higher resources request (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped. -Provide git commit id for custom Institutional configs hosted at `nf-core/configs`. This was implemented for reproducibility purposes. Default: `master`. +To change the resource requests, please see the [max resources](https://nf-co.re/docs/usage/configuration#max-resources) and [tuning workflow resources](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources) section of the nf-core website. -```bash -## Download and use config file with following git commid id ---custom_config_version d52db660777c4bf36546ddb188ec530c3ada1b96 -``` +### Custom Containers -### `--custom_config_base` +In some cases, you may wish to change the container or conda environment used by a pipeline steps for a particular tool. By default, nf-core pipelines use containers and software from the [biocontainers](https://biocontainers.pro/) or [bioconda](https://bioconda.github.io/) projects. However, in some cases the pipeline specified version maybe out of date. -If you're running offline, nextflow will not be able to fetch the institutional config files -from the internet. If you don't need them, then this is not a problem. If you do need them, -you should download the files from the repo and tell nextflow where to find them with the -`custom_config_base` option. For example: +To use a different container from the default container or conda environment specified in a pipeline, please see the [updating tool versions](https://nf-co.re/docs/usage/configuration#updating-tool-versions) section of the nf-core website. -```bash -## Download and unzip the config files -cd /path/to/my/configs -wget https://github.com/nf-core/configs/archive/master.zip -unzip master.zip +### Custom Tool Arguments -## Run the pipeline -cd /path/to/my/data -nextflow run /path/to/pipeline/ --custom_config_base /path/to/my/configs/configs-master/ +A pipeline might not always support every possible argument or option of a particular tool used in pipeline. Fortunately, nf-core pipelines provide some freedom to users to insert additional parameters that the pipeline does not include by default. The different configuration for the different processes are organized as follow: + +```tree +├── conf +│ ├── steps +│ │ ├── chrcheck.config # Configuration for the chrcheck workflow +│ │ ├── imputation_glimpse1.config # Configuration for the impute step with GLIMPSE1 +│ │ ├── imputation_glimpse2.config # Configuration for the impute step with GLIMPSE2 +│ │ ├── imputation_quilt.config # Configuration for the impute step with QUILT +│ │ ├── imputation_stitch.config # Configuration for the impute step with STITCH +│ │ ├── initialisation.config # Configuration for the initialisation subworkflow +│ │ ├── panel_prep.config # Configuration for the panelprep step +│ │ ├── simulation.config # Configuration for the simulation step +│ │ └── validation.config # Configuration for the validation step +│ ├── base.config # Base configuration for the pipeline +│ └── modules.config # Configuration for the statistical and multiqc modules ``` -> Note that the nf-core/tools helper package has a `download` command to download all required pipeline -> files + singularity containers + institutional configs in one go for you, to make this process easier. +One of the parameters that you might want to modify could be specific to each imputation software. As an example, running the pipeline, you may encounter that to reduce the impact of individual reads (for example in QUILT), you might need to lower coverage. This can be achieved by including any modification to a Nextflow process as an external argument using `ext.args`. You would customize the run by providing: + +```groovy +process { + withName:'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_IMPUTE_QUILT:QUILT_QUILT' { + ext.args = "--downsampleToCov=1" + } +} +``` -### `--max_memory` +To learn how to provide additional arguments to a particular tool of the pipeline, please see the [customising tool arguments](https://nf-co.re/docs/usage/configuration#customising-tool-arguments) section of the nf-core website. -Use to set a top-limit for the default memory requirement for each process. -Should be a string in the format integer-unit. eg. `--max_memory '8.GB'` +### nf-core/configs -### `--max_time` +In most cases, you will only need to create a custom config as a one-off but if you and others within your organisation are likely to be running nf-core pipelines regularly and need to use the same settings regularly it may be a good idea to request that your custom config file is uploaded to the `nf-core/configs` git repository. Before you do this please can you test that the config file works with your pipeline of choice using the `-c` parameter. You can then create a pull request to the `nf-core/configs` repository with the addition of your config file, associated documentation file (see examples in [`nf-core/configs/docs`](https://github.com/nf-core/configs/tree/master/docs)), and amending [`nfcore_custom.config`](https://github.com/nf-core/configs/blob/master/nfcore_custom.config) to include your custom profile. -Use to set a top-limit for the default time requirement for each process. -Should be a string in the format integer-unit. eg. `--max_time '2.h'` +See the main [Nextflow documentation](https://www.nextflow.io/docs/latest/config.html) for more information about creating your own configuration files. -### `--max_cpus` +If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack) on the [`#configs` channel](https://nfcore.slack.com/channels/configs). -Use to set a top-limit for the default CPU requirement for each process. -Should be a string in the format integer-unit. eg. `--max_cpus 1` +## Running in the background -### `--plaintext_email` +Nextflow handles job submissions and supervises the running jobs. The Nextflow process must run until the pipeline is finished. -Set to receive plain-text e-mails instead of HTML formatted. +The Nextflow `-bg` flag launches Nextflow in the background, detached from your terminal so that the workflow does not stop if you log out of your session. The logs are saved to a file. -### `--monochrome_logs` +Alternatively, you can use `screen` / `tmux` or similar tool to create a detached session which you can log back into at a later time. +Some HPC setups also allow you to run nextflow within a cluster job submitted your job scheduler (from where it submits more jobs). -Set to disable colourful command line output and live life in monochrome. +## Nextflow memory requirements -### `--multiqc_config` +In some cases, the Nextflow Java virtual machines can start to request a large amount of memory. +We recommend adding the following line to your environment to limit this (typically in `~/.bashrc` or `~./bash_profile`): + +```bash +NXF_OPTS='-Xms1g -Xmx4g' +``` -Specify a path to a custom MultiQC configuration file. +Specify the path to a specific config file (this is a core Nextflow command). See the [nf-core website documentation](https://nf-co.re/usage/configuration) for more information. diff --git a/environment.yml b/environment.yml deleted file mode 100644 index 370aabc4..00000000 --- a/environment.yml +++ /dev/null @@ -1,15 +0,0 @@ -# You can use this file to create a conda environment for this pipeline: -# conda env create -f environment.yml -name: nf-core-phaseimpute-1.0dev -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - conda-forge::python=3.7.3 - - conda-forge::markdown=3.1.1 - - conda-forge::pymdown-extensions=6.0 - - conda-forge::pygments=2.5.2 - # TODO nf-core: Add required software dependencies here - - bioconda::fastqc=0.11.8 - - bioconda::multiqc=1.7 diff --git a/main.nf b/main.nf index d9570860..b31db4b1 100644 --- a/main.nf +++ b/main.nf @@ -1,426 +1,159 @@ #!/usr/bin/env nextflow /* -======================================================================================== - nf-core/phaseimpute -======================================================================================== - nf-core/phaseimpute Analysis Pipeline. - #### Homepage / Documentation - https://github.com/nf-core/phaseimpute ----------------------------------------------------------------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + nf-core/phaseimpute +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Github : https://github.com/nf-core/phaseimpute + Website: https://nf-co.re/phaseimpute + Slack : https://nfcore.slack.com/channels/phaseimpute */ -def helpMessage() { - // TODO nf-core: Add to this help message with new command line parameters - log.info nfcoreHeader() - log.info""" - - Usage: - - The typical command for running the pipeline is as follows: - - nextflow run nf-core/phaseimpute --reads '*_R{1,2}.fastq.gz' -profile docker - - Mandatory arguments: - --reads [file] Path to input data (must be surrounded with quotes) - -profile [str] Configuration profile to use. Can use multiple (comma separated) - Available: conda, docker, singularity, test, awsbatch, and more - - Options: - --genome [str] Name of iGenomes reference - --single_end [bool] Specifies that the input is single-end reads - - References If not specified in the configuration file or you wish to overwrite any of the references - --fasta [file] Path to fasta reference - - Other options: - --outdir [file] The output directory where the results will be saved - --email [email] Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits - --email_on_fail [email] Same as --email, except only send mail if the workflow is not successful - --max_multiqc_email_size [str] Theshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: 25MB) - -name [str] Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic - - AWSBatch options: - --awsqueue [str] The AWSBatch JobQueue that needs to be set when running on AWSBatch - --awsregion [str] The AWS Region for your AWS Batch job to run on - --awscli [str] Path to the AWS CLI tool - """.stripIndent() -} +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ -// Show help message -if (params.help) { - helpMessage() - exit 0 -} +include { PHASEIMPUTE } from './workflows/phaseimpute' +include { CHRCHECK as CHRCHECK_INPUT } from './workflows/chrcheck' +include { CHRCHECK as CHRCHECK_TRUTH } from './workflows/chrcheck' +include { CHRCHECK as CHRCHECK_PANEL } from './workflows/chrcheck' +include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_phaseimpute_pipeline' +include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_phaseimpute_pipeline' +include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_phaseimpute_pipeline' /* - * SET UP CONFIGURATION VARIABLES - */ - -// Check if genome exists in the config file -if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - exit 1, "The provided genome '${params.genome}' is not available in the iGenomes file. Currently the available genomes are ${params.genomes.keySet().join(", ")}" -} +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + NAMED WORKFLOWS FOR PIPELINE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ -// TODO nf-core: Add any reference files that are needed -// Configurable reference genomes // -// NOTE - THIS IS NOT USED IN THIS PIPELINE, EXAMPLE ONLY -// If you want to use the channel below in a process, define the following: -// input: -// file fasta from ch_fasta +// WORKFLOW: Run main analysis pipeline depending on type of input // -params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false -if (params.fasta) { ch_fasta = file(params.fasta, checkIfExists: true) } - -// Has the run name been specified by the user? -// this has the bonus effect of catching both -name and --name -custom_runName = params.name -if (!(workflow.runName ==~ /[a-z]+_[a-z]+/)) { - custom_runName = workflow.runName -} - -if (workflow.profile.contains('awsbatch')) { - // AWSBatch sanity checking - if (!params.awsqueue || !params.awsregion) exit 1, "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" - // Check outdir paths to be S3 buckets if running on AWSBatch - // related: https://github.com/nextflow-io/nextflow/issues/813 - if (!params.outdir.startsWith('s3:')) exit 1, "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" - // Prevent trace files to be stored on S3 since S3 does not support rolling files. - if (params.tracedir.startsWith('s3:')) exit 1, "Specify a local tracedir or run without trace! S3 cannot be used for tracefiles." -} - -// Stage config files -ch_multiqc_config = file("$baseDir/assets/multiqc_config.yaml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() -ch_output_docs = file("$baseDir/docs/output.md", checkIfExists: true) - -/* - * Create a channel for input read files - */ -if (params.readPaths) { - if (params.single_end) { - Channel - .from(params.readPaths) - .map { row -> [ row[0], [ file(row[1][0], checkIfExists: true) ] ] } - .ifEmpty { exit 1, "params.readPaths was empty - no input files supplied" } - .into { ch_read_files_fastqc; ch_read_files_trimming } - } else { - Channel - .from(params.readPaths) - .map { row -> [ row[0], [ file(row[1][0], checkIfExists: true), file(row[1][1], checkIfExists: true) ] ] } - .ifEmpty { exit 1, "params.readPaths was empty - no input files supplied" } - .into { ch_read_files_fastqc; ch_read_files_trimming } +workflow NFCORE_PHASEIMPUTE { + + take: + ch_input // channel: samplesheet read in from --input + ch_input_truth // channel: samplesheet read in from --input-truth + ch_fasta // channel: reference genome FASTA file with index + ch_panel // channel: reference panel variants file + ch_regions // channel: regions to use [[chr, region], region] + ch_depth // channel: depth of coverage file [[depth], depth] + ch_map // channel: map file for imputation + ch_posfile // channel: samplesheet read in from --posfile + ch_chunks // channel: samplesheet read in from --chunks + chunk_model // parameter: chunk model + ch_versions // channel: versions of software used + + main: + + // + // Initialise input channels + // + + ch_input_impute = Channel.empty() + ch_input_simulate = Channel.empty() + ch_input_validate = Channel.empty() + + // Check input files for contigs names consistency + lst_chr = ch_regions.map { it[0].chr } + .unique() + .collect() + .toList() + + CHRCHECK_INPUT(ch_input.combine(lst_chr)) + ch_input = CHRCHECK_INPUT.out.output + ch_versions = ch_versions.mix(CHRCHECK_INPUT.out.versions) + + CHRCHECK_TRUTH(ch_input_truth.combine(lst_chr)) + ch_input_truth = CHRCHECK_TRUTH.out.output + + CHRCHECK_PANEL(ch_panel.map{ meta, file, index -> [meta, file, index, [meta.chr]]}) + ch_panel = CHRCHECK_PANEL.out.output + + if (params.steps.split(',').contains("simulate") || params.steps.split(',').contains("all")) { + ch_input_simulate = ch_input + } else if (params.steps.split(',').contains("impute")) { + ch_input_impute = ch_input + } else if (params.steps.split(',').contains("validate")) { + ch_input_validate = ch_input } -} else { - Channel - .fromFilePairs(params.reads, size: params.single_end ? 1 : 2) - .ifEmpty { exit 1, "Cannot find any reads matching: ${params.reads}\nNB: Path needs to be enclosed in quotes!\nIf this is single-end data, please specify --single_end on the command line." } - .into { ch_read_files_fastqc; ch_read_files_trimming } -} - -// Header log info -log.info nfcoreHeader() -def summary = [:] -if (workflow.revision) summary['Pipeline Release'] = workflow.revision -summary['Run Name'] = custom_runName ?: workflow.runName -// TODO nf-core: Report custom parameters here -summary['Reads'] = params.reads -summary['Fasta Ref'] = params.fasta -summary['Data Type'] = params.single_end ? 'Single-End' : 'Paired-End' -summary['Max Resources'] = "$params.max_memory memory, $params.max_cpus cpus, $params.max_time time per job" -if (workflow.containerEngine) summary['Container'] = "$workflow.containerEngine - $workflow.container" -summary['Output dir'] = params.outdir -summary['Launch dir'] = workflow.launchDir -summary['Working dir'] = workflow.workDir -summary['Script dir'] = workflow.projectDir -summary['User'] = workflow.userName -if (workflow.profile.contains('awsbatch')) { - summary['AWS Region'] = params.awsregion - summary['AWS Queue'] = params.awsqueue - summary['AWS CLI'] = params.awscli -} -summary['Config Profile'] = workflow.profile -if (params.config_profile_description) summary['Config Description'] = params.config_profile_description -if (params.config_profile_contact) summary['Config Contact'] = params.config_profile_contact -if (params.config_profile_url) summary['Config URL'] = params.config_profile_url -if (params.email || params.email_on_fail) { - summary['E-mail Address'] = params.email - summary['E-mail on failure'] = params.email_on_fail - summary['MultiQC maxsize'] = params.max_multiqc_email_size -} -log.info summary.collect { k,v -> "${k.padRight(18)}: $v" }.join("\n") -log.info "-\033[2m--------------------------------------------------\033[0m-" - -// Check the hostnames against configured profiles -checkHostname() - -Channel.from(summary.collect{ [it.key, it.value] }) - .map { k,v -> "
$k
${v ?: 'N/A'}
" } - .reduce { a, b -> return [a, b].join("\n ") } - .map { x -> """ - id: 'nf-core-phaseimpute-summary' - description: " - this information is collected when the pipeline is started." - section_name: 'nf-core/phaseimpute Workflow Summary' - section_href: 'https://github.com/nf-core/phaseimpute' - plot_type: 'html' - data: | -
- $x -
- """.stripIndent() } - .set { ch_workflow_summary } - -/* - * Parse software version numbers - */ -process get_software_versions { - publishDir "${params.outdir}/pipeline_info", mode: 'copy', - saveAs: { filename -> - if (filename.indexOf(".csv") > 0) filename - else null - } - - output: - file 'software_versions_mqc.yaml' into ch_software_versions_yaml - file "software_versions.csv" - - script: - // TODO nf-core: Get all tools to print their version number here - """ - echo $workflow.manifest.version > v_pipeline.txt - echo $workflow.nextflow.version > v_nextflow.txt - fastqc --version > v_fastqc.txt - multiqc --version > v_multiqc.txt - scrape_software_versions.py &> software_versions_mqc.yaml - """ -} - -/* - * STEP 1 - FastQC - */ -process fastqc { - tag "$name" - label 'process_medium' - publishDir "${params.outdir}/fastqc", mode: 'copy', - saveAs: { filename -> - filename.indexOf(".zip") > 0 ? "zips/$filename" : "$filename" - } - - input: - set val(name), file(reads) from ch_read_files_fastqc - - output: - file "*_fastqc.{zip,html}" into ch_fastqc_results - script: - """ - fastqc --quiet --threads $task.cpus $reads - """ + // + // WORKFLOW: Run pipeline + // + PHASEIMPUTE ( + ch_input_impute, + ch_input_simulate, + ch_input_validate, + ch_input_truth, + ch_fasta, + ch_panel, + ch_regions, + ch_depth, + ch_map, + ch_posfile, + ch_chunks, + chunk_model, + ch_versions + ) + emit: + multiqc_report = PHASEIMPUTE.out.multiqc_report // channel: /path/to/multiqc_report.html } /* - * STEP 2 - MultiQC - */ -process multiqc { - publishDir "${params.outdir}/MultiQC", mode: 'copy' - - input: - file (multiqc_config) from ch_multiqc_config - file (mqc_custom_config) from ch_multiqc_custom_config.collect().ifEmpty([]) - // TODO nf-core: Add in log files from your new processes for MultiQC to find! - file ('fastqc/*') from ch_fastqc_results.collect().ifEmpty([]) - file ('software_versions/*') from ch_software_versions_yaml.collect() - file workflow_summary from ch_workflow_summary.collectFile(name: "workflow_summary_mqc.yaml") - - output: - file "*multiqc_report.html" into ch_multiqc_report - file "*_data" - file "multiqc_plots" - - script: - rtitle = custom_runName ? "--title \"$custom_runName\"" : '' - rfilename = custom_runName ? "--filename " + custom_runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report" : '' - custom_config_file = params.multiqc_config ? "--config $mqc_custom_config" : '' - // TODO nf-core: Specify which MultiQC modules to use with -m for a faster run time - """ - multiqc -f $rtitle $rfilename $custom_config_file . - """ -} - -/* - * STEP 3 - Output Description HTML - */ -process output_documentation { - publishDir "${params.outdir}/pipeline_info", mode: 'copy' - - input: - file output_docs from ch_output_docs - - output: - file "results_description.html" +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ - script: - """ - markdown_to_html.py $output_docs -o results_description.html - """ +workflow { + + main: + // + // SUBWORKFLOW: Run initialisation tasks + // + PIPELINE_INITIALISATION ( + params.version, + params.validate_params, + params.monochrome_logs, + args, + params.outdir + ) + + // + // WORKFLOW: Run main workflow + // + NFCORE_PHASEIMPUTE ( + PIPELINE_INITIALISATION.out.input, + PIPELINE_INITIALISATION.out.input_truth, + PIPELINE_INITIALISATION.out.fasta, + PIPELINE_INITIALISATION.out.panel, + PIPELINE_INITIALISATION.out.regions, + PIPELINE_INITIALISATION.out.depth, + PIPELINE_INITIALISATION.out.gmap, + PIPELINE_INITIALISATION.out.posfile, + PIPELINE_INITIALISATION.out.chunks, + PIPELINE_INITIALISATION.out.chunk_model, + PIPELINE_INITIALISATION.out.versions + ) + // + // SUBWORKFLOW: Run completion tasks + // + PIPELINE_COMPLETION ( + params.email, + params.email_on_fail, + params.plaintext_email, + params.outdir, + params.monochrome_logs, + params.hook_url, + NFCORE_PHASEIMPUTE.out.multiqc_report + ) } /* - * Completion e-mail notification - */ -workflow.onComplete { - - // Set up the e-mail variables - def subject = "[nf-core/phaseimpute] Successful: $workflow.runName" - if (!workflow.success) { - subject = "[nf-core/phaseimpute] FAILED: $workflow.runName" - } - def email_fields = [:] - email_fields['version'] = workflow.manifest.version - email_fields['runName'] = custom_runName ?: workflow.runName - email_fields['success'] = workflow.success - email_fields['dateComplete'] = workflow.complete - email_fields['duration'] = workflow.duration - email_fields['exitStatus'] = workflow.exitStatus - email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - email_fields['errorReport'] = (workflow.errorReport ?: 'None') - email_fields['commandLine'] = workflow.commandLine - email_fields['projectDir'] = workflow.projectDir - email_fields['summary'] = summary - email_fields['summary']['Date Started'] = workflow.start - email_fields['summary']['Date Completed'] = workflow.complete - email_fields['summary']['Pipeline script file path'] = workflow.scriptFile - email_fields['summary']['Pipeline script hash ID'] = workflow.scriptId - if (workflow.repository) email_fields['summary']['Pipeline repository Git URL'] = workflow.repository - if (workflow.commitId) email_fields['summary']['Pipeline repository Git Commit'] = workflow.commitId - if (workflow.revision) email_fields['summary']['Pipeline Git branch/tag'] = workflow.revision - email_fields['summary']['Nextflow Version'] = workflow.nextflow.version - email_fields['summary']['Nextflow Build'] = workflow.nextflow.build - email_fields['summary']['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - - // TODO nf-core: If not using MultiQC, strip out this code (including params.max_multiqc_email_size) - // On success try attach the multiqc report - def mqc_report = null - try { - if (workflow.success) { - mqc_report = ch_multiqc_report.getVal() - if (mqc_report.getClass() == ArrayList) { - log.warn "[nf-core/phaseimpute] Found multiple reports from process 'multiqc', will use only one" - mqc_report = mqc_report[0] - } - } - } catch (all) { - log.warn "[nf-core/phaseimpute] Could not attach MultiQC report to summary email" - } - - // Check if we are only sending emails on failure - email_address = params.email - if (!params.email && params.email_on_fail && !workflow.success) { - email_address = params.email_on_fail - } - - // Render the TXT template - def engine = new groovy.text.GStringTemplateEngine() - def tf = new File("$baseDir/assets/email_template.txt") - def txt_template = engine.createTemplate(tf).make(email_fields) - def email_txt = txt_template.toString() - - // Render the HTML template - def hf = new File("$baseDir/assets/email_template.html") - def html_template = engine.createTemplate(hf).make(email_fields) - def email_html = html_template.toString() - - // Render the sendmail template - def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, baseDir: "$baseDir", mqcFile: mqc_report, mqcMaxSize: params.max_multiqc_email_size.toBytes() ] - def sf = new File("$baseDir/assets/sendmail_template.txt") - def sendmail_template = engine.createTemplate(sf).make(smail_fields) - def sendmail_html = sendmail_template.toString() - - // Send the HTML e-mail - if (email_address) { - try { - if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } - // Try to send HTML e-mail using sendmail - [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "[nf-core/phaseimpute] Sent summary e-mail to $email_address (sendmail)" - } catch (all) { - // Catch failures and try with plaintext - [ 'mail', '-s', subject, email_address ].execute() << email_txt - log.info "[nf-core/phaseimpute] Sent summary e-mail to $email_address (mail)" - } - } - - // Write summary e-mail HTML to a file - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def output_hf = new File(output_d, "pipeline_report.html") - output_hf.withWriter { w -> w << email_html } - def output_tf = new File(output_d, "pipeline_report.txt") - output_tf.withWriter { w -> w << email_txt } - - c_green = params.monochrome_logs ? '' : "\033[0;32m"; - c_purple = params.monochrome_logs ? '' : "\033[0;35m"; - c_red = params.monochrome_logs ? '' : "\033[0;31m"; - c_reset = params.monochrome_logs ? '' : "\033[0m"; - - if (workflow.stats.ignoredCount > 0 && workflow.success) { - log.info "-${c_purple}Warning, pipeline completed, but with errored process(es) ${c_reset}-" - log.info "-${c_red}Number of ignored errored process(es) : ${workflow.stats.ignoredCount} ${c_reset}-" - log.info "-${c_green}Number of successfully ran process(es) : ${workflow.stats.succeedCount} ${c_reset}-" - } - - if (workflow.success) { - log.info "-${c_purple}[nf-core/phaseimpute]${c_green} Pipeline completed successfully${c_reset}-" - } else { - checkHostname() - log.info "-${c_purple}[nf-core/phaseimpute]${c_red} Pipeline completed with errors${c_reset}-" - } - -} - - -def nfcoreHeader() { - // Log colors ANSI codes - c_black = params.monochrome_logs ? '' : "\033[0;30m"; - c_blue = params.monochrome_logs ? '' : "\033[0;34m"; - c_cyan = params.monochrome_logs ? '' : "\033[0;36m"; - c_dim = params.monochrome_logs ? '' : "\033[2m"; - c_green = params.monochrome_logs ? '' : "\033[0;32m"; - c_purple = params.monochrome_logs ? '' : "\033[0;35m"; - c_reset = params.monochrome_logs ? '' : "\033[0m"; - c_white = params.monochrome_logs ? '' : "\033[0;37m"; - c_yellow = params.monochrome_logs ? '' : "\033[0;33m"; - - return """ -${c_dim}--------------------------------------------------${c_reset}- - ${c_green},--.${c_black}/${c_green},-.${c_reset} - ${c_blue} ___ __ __ __ ___ ${c_green}/,-._.--~\'${c_reset} - ${c_blue} |\\ | |__ __ / ` / \\ |__) |__ ${c_yellow}} {${c_reset} - ${c_blue} | \\| | \\__, \\__/ | \\ |___ ${c_green}\\`-._,-`-,${c_reset} - ${c_green}`._,._,\'${c_reset} - ${c_purple} nf-core/phaseimpute v${workflow.manifest.version}${c_reset} - -${c_dim}--------------------------------------------------${c_reset}- - """.stripIndent() -} - -def checkHostname() { - def c_reset = params.monochrome_logs ? '' : "\033[0m" - def c_white = params.monochrome_logs ? '' : "\033[0;37m" - def c_red = params.monochrome_logs ? '' : "\033[1;91m" - def c_yellow_bold = params.monochrome_logs ? '' : "\033[1;93m" - if (params.hostnames) { - def hostname = "hostname".execute().text.trim() - params.hostnames.each { prof, hnames -> - hnames.each { hname -> - if (hostname.contains(hname) && !workflow.profile.contains(prof)) { - log.error "====================================================\n" + - " ${c_red}WARNING!${c_reset} You are running with `-profile $workflow.profile`\n" + - " but your machine hostname is ${c_white}'$hostname'${c_reset}\n" + - " ${c_yellow_bold}It's highly recommended that you use `-profile $prof${c_reset}`\n" + - "============================================================" - } - } - } - } -} +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ diff --git a/modules.json b/modules.json new file mode 100644 index 00000000..25071320 --- /dev/null +++ b/modules.json @@ -0,0 +1,226 @@ +{ + "name": "nf-core/phaseimpute", + "homePage": "https://github.com/nf-core/phaseimpute", + "repos": { + "https://github.com/nf-core/modules.git": { + "modules": { + "nf-core": { + "bcftools/annotate": { + "branch": "master", + "git_sha": "cb08035150685b11d890d90c9534d4f16869eaec", + "installed_by": ["modules"], + "patch": "modules/nf-core/bcftools/annotate/bcftools-annotate.diff" + }, + "bcftools/concat": { + "branch": "master", + "git_sha": "d1e0ec7670fa77905a378627232566ce54c3c26d", + "installed_by": ["modules"], + "patch": "modules/nf-core/bcftools/concat/bcftools-concat.diff" + }, + "bcftools/convert": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "bcftools/index": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["bam_impute_glimpse", "vcf_phase_shapeit5"] + }, + "bcftools/merge": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"], + "patch": "modules/nf-core/bcftools/merge/bcftools-merge.diff" + }, + "bcftools/mpileup": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"], + "patch": "modules/nf-core/bcftools/mpileup/bcftools-mpileup.diff" + }, + "bcftools/norm": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "bcftools/pluginsplit": { + "branch": "master", + "git_sha": "14c910af1f9c20c65e5df9325a1e4d3939d524d1", + "installed_by": ["modules"], + "patch": "modules/nf-core/bcftools/pluginsplit/bcftools-pluginsplit.diff" + }, + "bcftools/query": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "bcftools/stats": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "bcftools/view": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "bedtools/makewindows": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["vcf_phase_shapeit5"] + }, + "gawk": { + "branch": "master", + "git_sha": "caab1314ca62679b629da4c79afa9a4cab2bb8ee", + "installed_by": ["modules"] + }, + "glimpse/chunk": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["bam_impute_glimpse"] + }, + "glimpse/ligate": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["bam_impute_glimpse"] + }, + "glimpse/phase": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["bam_impute_glimpse"] + }, + "glimpse2/chunk": { + "branch": "master", + "git_sha": "cc64e71652f67ce627064af51008fe0a00850987", + "installed_by": ["modules"], + "patch": "modules/nf-core/glimpse2/chunk/glimpse2-chunk.diff" + }, + "glimpse2/concordance": { + "branch": "master", + "git_sha": "6aed50284f6b208fd8eff1ec1dae4b25bf03c432", + "installed_by": ["modules"] + }, + "glimpse2/ligate": { + "branch": "master", + "git_sha": "cc64e71652f67ce627064af51008fe0a00850987", + "installed_by": ["modules"] + }, + "glimpse2/phase": { + "branch": "master", + "git_sha": "cc64e71652f67ce627064af51008fe0a00850987", + "installed_by": ["modules"], + "patch": "modules/nf-core/glimpse2/phase/glimpse2-phase.diff" + }, + "gunzip": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "multiqc": { + "branch": "master", + "git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d", + "installed_by": ["modules"] + }, + "quilt/quilt": { + "branch": "master", + "git_sha": "82dcdacf8aa932e6683e15a8992ed1cd6fcb174f", + "installed_by": ["modules"], + "patch": "modules/nf-core/quilt/quilt/quilt-quilt.diff" + }, + "samtools/coverage": { + "branch": "master", + "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", + "installed_by": ["modules"], + "patch": "modules/nf-core/samtools/coverage/samtools-coverage.diff" + }, + "samtools/depth": { + "branch": "master", + "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", + "installed_by": ["modules"], + "patch": "modules/nf-core/samtools/depth/samtools-depth.diff" + }, + "samtools/faidx": { + "branch": "master", + "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", + "installed_by": ["modules"] + }, + "samtools/index": { + "branch": "master", + "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", + "installed_by": ["modules"] + }, + "samtools/merge": { + "branch": "master", + "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", + "installed_by": ["modules"], + "patch": "modules/nf-core/samtools/merge/samtools-merge.diff" + }, + "samtools/reheader": { + "branch": "master", + "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", + "installed_by": ["modules"], + "patch": "modules/nf-core/samtools/reheader/samtools-reheader.diff" + }, + "samtools/view": { + "branch": "master", + "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", + "installed_by": ["modules"], + "patch": "modules/nf-core/samtools/view/samtools-view.diff" + }, + "shapeit5/ligate": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["vcf_phase_shapeit5"] + }, + "shapeit5/phasecommon": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["vcf_phase_shapeit5"], + "patch": "modules/nf-core/shapeit5/phasecommon/shapeit5-phasecommon.diff" + }, + "stitch": { + "branch": "master", + "git_sha": "55e411128ca5036def5dae1aa28a09e3776f59ea", + "installed_by": ["modules"], + "patch": "modules/nf-core/stitch/stitch.diff" + }, + "tabix/bgzip": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "tabix/tabix": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "vcflib/vcffixup": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + } + } + }, + "subworkflows": { + "nf-core": { + "utils_nextflow_pipeline": { + "branch": "master", + "git_sha": "3aa0aec1d52d492fe241919f0c6100ebf0074082", + "installed_by": ["subworkflows"] + }, + "utils_nfcore_pipeline": { + "branch": "master", + "git_sha": "1b6b9a3338d011367137808b49b923515080e3ba", + "installed_by": ["subworkflows"] + }, + "utils_nfschema_plugin": { + "branch": "master", + "git_sha": "2fd2cd6d0e7b273747f32e465fdc6bcc3ae0814e", + "installed_by": ["subworkflows"] + } + } + } + } + } +} diff --git a/modules/local/add_columns/environment.yml b/modules/local/add_columns/environment.yml new file mode 100644 index 00000000..315f6dc6 --- /dev/null +++ b/modules/local/add_columns/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::gawk=5.3.0 diff --git a/modules/local/add_columns/main.nf b/modules/local/add_columns/main.nf new file mode 100644 index 00000000..79f9a194 --- /dev/null +++ b/modules/local/add_columns/main.nf @@ -0,0 +1,42 @@ +process ADD_COLUMNS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gawk:5.3.0' : + 'biocontainers/gawk:5.3.0' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path('*.txt'), emit: txt + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + # Find the header line + HEADER_STR="#Genotype concordance by allele frequency bin (Variants: SNPs + indels)" + HEADER_LINE=\$(grep -n -m 1 "^\${HEADER_STR}" $input | cut -d: -f1 ) + HEADER_START=\$((HEADER_LINE + 1)) + + tail -n +\$HEADER_START $input | \\ + awk 'NR==1{\$(NF+1)="ID"} NR>1{\$(NF+1)="${meta.id}"}1' | \\ + awk 'NR==1{\$(NF+1)="Depth"} NR>1{\$(NF+1)="${meta.depth}"}1' | \\ + awk 'NR==1{\$(NF+1)="GPArray"} NR>1{\$(NF+1)="${meta.gparray}"}1' | \\ + awk 'NR==1{\$(NF+1)="Tools"} NR>1{\$(NF+1)="${meta.tools}"}1' | \\ + awk 'NR==1{\$(NF+1)="Panel"} NR>1{\$(NF+1)="${meta.panel}"}1' > \\ + ${prefix}.txt + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') + END_VERSIONS + """ +} diff --git a/modules/local/add_columns/tests/main.nf.test b/modules/local/add_columns/tests/main.nf.test new file mode 100644 index 00000000..b289a930 --- /dev/null +++ b/modules/local/add_columns/tests/main.nf.test @@ -0,0 +1,54 @@ +nextflow_process { + + name "Test Process ADD_COLUMNS" + script "../main.nf" + process "ADD_COLUMNS" + + tag "modules" + tag "modules_local" + tag "add_columns" + tag "gawk" + + test("Add columns to txt file") { + + when { + process { + """ + file = Channel.of("# Other content\\ntest\\ntest1\\n#Genotype concordance by allele frequency bin (Variants: SNPs + indels)\\nCol1\\tCol2\\nA\\t1\\nB\\t2") + .collectFile(name: 'sample.txt', newLine: true) + input[0] = Channel.of([[id:"MyId", depth:2, gparray:"SNPArray", tools:"Glimpse", panel:"1000GP"]]).combine(file) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot(path(process.out.txt[0][1]).readLines()).match("Content") } + ) + } + } + test("Add columns to txt file with missing fields") { + + when { + process { + """ + file = Channel.of("# Other content\\ntest\\ntest1\\n" + + "#Genotype concordance by allele frequency bin (Variants: SNPs + indels)\\n" + + "Col1\\tCol2\\nA\\t1\\nB\\t2") + .collectFile(name: 'sample.txt', newLine: true) + input[0] = Channel.of([[id:"MyId", tools:"Glimpse"]]).combine(file) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot(path(process.out.txt[0][1]).readLines()).match("ContentWithNA") } + ) + } + } +} diff --git a/modules/local/add_columns/tests/main.nf.test.snap b/modules/local/add_columns/tests/main.nf.test.snap new file mode 100644 index 00000000..5cf5457e --- /dev/null +++ b/modules/local/add_columns/tests/main.nf.test.snap @@ -0,0 +1,106 @@ +{ + "Content": { + "content": [ + [ + "Col1 Col2 ID Depth GPArray Tools Panel", + "A 1 MyId 2 SNPArray Glimpse 1000GP", + "B 2 MyId 2 SNPArray Glimpse 1000GP" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-17T14:43:39.482844695" + }, + "ContentWithNA": { + "content": [ + [ + "Col1 Col2 ID Depth GPArray Tools Panel", + "A 1 MyId null null Glimpse null", + "B 2 MyId null null Glimpse null" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-17T14:44:39.395698015" + }, + "Add columns to txt file": { + "content": [ + { + "0": [ + [ + { + "id": "MyId", + "depth": 2, + "gparray": "SNPArray", + "tools": "Glimpse", + "panel": "1000GP" + }, + "MyId.txt:md5,da3aa04353303c5566d1bea2dbc8f883" + ] + ], + "1": [ + "versions.yml:md5,ab0afe509bddeef28fcf8d00db1cec81" + ], + "txt": [ + [ + { + "id": "MyId", + "depth": 2, + "gparray": "SNPArray", + "tools": "Glimpse", + "panel": "1000GP" + }, + "MyId.txt:md5,da3aa04353303c5566d1bea2dbc8f883" + ] + ], + "versions": [ + "versions.yml:md5,ab0afe509bddeef28fcf8d00db1cec81" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-11-06T13:32:32.608346737" + }, + "Add columns to txt file with missing fields": { + "content": [ + { + "0": [ + [ + { + "id": "MyId", + "tools": "Glimpse" + }, + "MyId.txt:md5,87ae2dd6e6451eee25c04e23f7219fa5" + ] + ], + "1": [ + "versions.yml:md5,ab0afe509bddeef28fcf8d00db1cec81" + ], + "txt": [ + [ + { + "id": "MyId", + "tools": "Glimpse" + }, + "MyId.txt:md5,87ae2dd6e6451eee25c04e23f7219fa5" + ] + ], + "versions": [ + "versions.yml:md5,ab0afe509bddeef28fcf8d00db1cec81" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-11-06T13:32:39.650698807" + } +} \ No newline at end of file diff --git a/modules/local/bam_chr_extract/environment.yml b/modules/local/bam_chr_extract/environment.yml new file mode 100644 index 00000000..6178a297 --- /dev/null +++ b/modules/local/bam_chr_extract/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/local/bam_chr_extract/main.nf b/modules/local/bam_chr_extract/main.nf new file mode 100644 index 00000000..b82f59b8 --- /dev/null +++ b/modules/local/bam_chr_extract/main.nf @@ -0,0 +1,48 @@ +process BAM_CHR_EXTRACT { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0' : + 'biocontainers/samtools:1.20--h50ea8bc_0' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.txt"), emit: chr + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + samtools \\ + head \\ + $input \| \\ + grep '^@SQ' | cut -d\$'\t' -f2 | sed -e 's/^SN://g' \\ + > ${prefix}.txt + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$( samtools --version |& sed '1!d; s/^.*samtools //' ) + grep: \$( grep --version |& grep -o -E '[0-9]+\\.[0-9]+' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$( samtools --version |& sed '1!d; s/^.*samtools //' ) + grep: \$( grep --help |& grep -o -E '[0-9]+\\.[0-9]+\\.[0-9]+' ) + END_VERSIONS + """ +} diff --git a/modules/local/bam_chr_extract/meta.yml b/modules/local/bam_chr_extract/meta.yml new file mode 100644 index 00000000..f645129c --- /dev/null +++ b/modules/local/bam_chr_extract/meta.yml @@ -0,0 +1,45 @@ +name: bam_chr_extract +description: Extract all contigs name into txt file +keywords: + - samtools + - bam + - cram + - head + - contig +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM, CRAM or SAM file +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - chr: + type: file + description: List of contigs in the VCF file + pattern: "*{txt}" +authors: + - "@louislenezet" +maintainers: + - "@louislenezet" diff --git a/modules/local/bam_chr_extract/tests/main.nf.test b/modules/local/bam_chr_extract/tests/main.nf.test new file mode 100644 index 00000000..9c182d15 --- /dev/null +++ b/modules/local/bam_chr_extract/tests/main.nf.test @@ -0,0 +1,41 @@ +nextflow_process { + + name "Test Process BAM_CHR_EXTRACT" + script "../main.nf" + process "BAM_CHR_EXTRACT" + + tag "modules" + tag "modules_local" + tag "bam_chr_extract" + + test("Extract chr from bam") { + when { + process { + """ + input[0] = Channel.fromList([ + [ + [ id:'test_single_end_bam' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.bam', checkIfExists: true) + ], + [ + [id: 'test2_paired_end_sorted_bam'], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true) + ], + [ + [id: 'test_paired_end_sorted_cram'], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true) + ] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot(process.out.chr.collect{path(it[1]).readLines()}).match("chr") } + ) + } + } +} diff --git a/modules/local/bam_chr_extract/tests/main.nf.test.snap b/modules/local/bam_chr_extract/tests/main.nf.test.snap new file mode 100644 index 00000000..a0e0bf23 --- /dev/null +++ b/modules/local/bam_chr_extract/tests/main.nf.test.snap @@ -0,0 +1,83 @@ +{ + "Extract chr from bam": { + "content": [ + { + "0": [ + [ + { + "id": "test2_paired_end_sorted_bam" + }, + "test2_paired_end_sorted_bam.txt:md5,5ae68a67b70976ee95342a7451cb5af1" + ], + [ + { + "id": "test_paired_end_sorted_cram" + }, + "test_paired_end_sorted_cram.txt:md5,5ae68a67b70976ee95342a7451cb5af1" + ], + [ + { + "id": "test_single_end_bam" + }, + "test_single_end_bam.txt:md5,3a9ea6d336e113a74d7fdca5e7b623fc" + ] + ], + "1": [ + "versions.yml:md5,696c749ab73e8b0a4fcce6ff19999e9d", + "versions.yml:md5,696c749ab73e8b0a4fcce6ff19999e9d", + "versions.yml:md5,696c749ab73e8b0a4fcce6ff19999e9d" + ], + "chr": [ + [ + { + "id": "test2_paired_end_sorted_bam" + }, + "test2_paired_end_sorted_bam.txt:md5,5ae68a67b70976ee95342a7451cb5af1" + ], + [ + { + "id": "test_paired_end_sorted_cram" + }, + "test_paired_end_sorted_cram.txt:md5,5ae68a67b70976ee95342a7451cb5af1" + ], + [ + { + "id": "test_single_end_bam" + }, + "test_single_end_bam.txt:md5,3a9ea6d336e113a74d7fdca5e7b623fc" + ] + ], + "versions": [ + "versions.yml:md5,696c749ab73e8b0a4fcce6ff19999e9d", + "versions.yml:md5,696c749ab73e8b0a4fcce6ff19999e9d", + "versions.yml:md5,696c749ab73e8b0a4fcce6ff19999e9d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-17T14:57:17.861877534" + }, + "chr": { + "content": [ + [ + [ + "chr22" + ], + [ + "chr22" + ], + [ + "MT192765.1" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-17T14:57:44.989492548" + } +} \ No newline at end of file diff --git a/modules/local/list_to_file/environment.yml b/modules/local/list_to_file/environment.yml new file mode 100644 index 00000000..315f6dc6 --- /dev/null +++ b/modules/local/list_to_file/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::gawk=5.3.0 diff --git a/modules/local/list_to_file/main.nf b/modules/local/list_to_file/main.nf new file mode 100644 index 00000000..dcb48d99 --- /dev/null +++ b/modules/local/list_to_file/main.nf @@ -0,0 +1,46 @@ +process LIST_TO_FILE { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gawk:5.3.0' : + 'biocontainers/gawk:5.3.0' }" + + input: + tuple val(meta), path(input, arity: '0..*'), val(id) + + output: + tuple val(meta), path('*.id.txt'), path('*.noid.txt'), emit: txt + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + # Take all files of the input and list them in a file + # and add as second column the id + awk 'BEGIN { + split("${input}", f, " "); + ids = "${id}"; + gsub(/[\\[\\]]/, "", ids); + split(ids, i, ", "); + for (j in f) print f[j], i[j] + }' > ${prefix}.id.txt + + # Take all files of the input and list them in a file + # without the id + + awk 'BEGIN { + split("${input}", f, " "); + for (j in f) print f[j] + }' > ${prefix}.noid.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') + END_VERSIONS + """ +} diff --git a/modules/local/list_to_file/tests/main.nf.test b/modules/local/list_to_file/tests/main.nf.test new file mode 100644 index 00000000..918d3d6b --- /dev/null +++ b/modules/local/list_to_file/tests/main.nf.test @@ -0,0 +1,35 @@ +nextflow_process { + + name "Test Process LIST_TO_FILE" + script "../main.nf" + process "LIST_TO_FILE" + + tag "modules" + tag "modules_local" + tag "list_to_file" + tag "gawk" + + test("Aggregate all files into a list") { + + when { + process { + """ + input[0] = Channel.of([ + [id: "all"], + [file("file1.txt"), file("file2.txt")], + ["A", "B"] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot(path(process.out.txt[0][1]).readLines()).match("Content_withid") }, + { assert snapshot(path(process.out.txt[0][2]).readLines()).match("Content_noid") } + ) + } + } +} diff --git a/modules/local/list_to_file/tests/main.nf.test.snap b/modules/local/list_to_file/tests/main.nf.test.snap new file mode 100644 index 00000000..5e8195f9 --- /dev/null +++ b/modules/local/list_to_file/tests/main.nf.test.snap @@ -0,0 +1,63 @@ +{ + "Aggregate all files into a list": { + "content": [ + { + "0": [ + [ + { + "id": "all" + }, + "all.id.txt:md5,6ffc4e703a69bb652dc666a8dcae1863", + "all.noid.txt:md5,045a6e673d95a6e8152a1d06959db93a" + ] + ], + "1": [ + "versions.yml:md5,ab51934043f45af87e4348a704066b0f" + ], + "txt": [ + [ + { + "id": "all" + }, + "all.id.txt:md5,6ffc4e703a69bb652dc666a8dcae1863", + "all.noid.txt:md5,045a6e673d95a6e8152a1d06959db93a" + ] + ], + "versions": [ + "versions.yml:md5,ab51934043f45af87e4348a704066b0f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-11-06T13:55:57.105169612" + }, + "Content_withid": { + "content": [ + [ + "file1.txt A", + "file2.txt B" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-16T15:51:19.289182489" + }, + "Content_noid": { + "content": [ + [ + "file1.txt", + "file2.txt" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-16T16:15:18.915373799" + } +} \ No newline at end of file diff --git a/modules/local/vcf_chr_extract/environment.yml b/modules/local/vcf_chr_extract/environment.yml new file mode 100644 index 00000000..5c00b116 --- /dev/null +++ b/modules/local/vcf_chr_extract/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bcftools=1.20 diff --git a/modules/local/vcf_chr_extract/main.nf b/modules/local/vcf_chr_extract/main.nf new file mode 100644 index 00000000..56d796d1 --- /dev/null +++ b/modules/local/vcf_chr_extract/main.nf @@ -0,0 +1,49 @@ +process VCF_CHR_EXTRACT { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.20--h8b25389_0': + 'biocontainers/bcftools:1.20--h8b25389_0' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.txt"), emit: chr + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + bcftools \\ + query \\ + -f '%CHROM\\n' \\ + $input \\ + \| uniq \\ + > ${prefix}.txt + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$( bcftools --version |& sed '1!d; s/^.*bcftools //' ) + grep: \$( grep --version |& grep -o -E '[0-9]+\\.[0-9]+' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$( bcftools --version |& sed '1!d; s/^.*bcftools //' ) + uniq: \$( uniq --version |& grep -o -E '[0-9]+\\.[0-9]+' ) + END_VERSIONS + """ +} diff --git a/modules/local/vcf_chr_extract/meta.yml b/modules/local/vcf_chr_extract/meta.yml new file mode 100644 index 00000000..8286f2fc --- /dev/null +++ b/modules/local/vcf_chr_extract/meta.yml @@ -0,0 +1,41 @@ +name: vcf_chr_extract +description: Extract all contigs name into txt file +keywords: + - bcftools + - vcf + - head + - contig +tools: + - head: + description: Extract header from variant calling file. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: https://samtools.github.io/bcftools/bcftools.html#head + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: Query VCF or BCF file, can be either uncompressed or compressed +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - chr: + type: file + description: List of contigs in the VCF file + pattern: "*{txt}" +authors: + - "@louislenezet" +maintainers: + - "@louislenezet" diff --git a/modules/local/vcf_chr_extract/tests/main.nf.test b/modules/local/vcf_chr_extract/tests/main.nf.test new file mode 100644 index 00000000..f8a61005 --- /dev/null +++ b/modules/local/vcf_chr_extract/tests/main.nf.test @@ -0,0 +1,41 @@ +nextflow_process { + + name "Test Process VCF_CHR_EXTRACT" + script "../main.nf" + process "VCF_CHR_EXTRACT" + + tag "modules" + tag "modules_local" + tag "vcf_chr_extract" + + test("Extract chr from vcf") { + when { + process { + """ + input[0] = Channel.fromList([ + [ + [id:'test_bcf'], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.bcf', checkIfExists: true) + ], + [ + [id: 'test2_vcf'], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf', checkIfExists: true) + ], + [ + [id: 'test2_haplotc_vcf_gz'], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.vcf.gz', checkIfExists: true) + ] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot(process.out.chr.collect{path(it[1]).readLines()}).match("chr") } + ) + } + } +} diff --git a/modules/local/vcf_chr_extract/tests/main.nf.test.snap b/modules/local/vcf_chr_extract/tests/main.nf.test.snap new file mode 100644 index 00000000..cfd9a524 --- /dev/null +++ b/modules/local/vcf_chr_extract/tests/main.nf.test.snap @@ -0,0 +1,83 @@ +{ + "Extract chr from vcf": { + "content": [ + { + "0": [ + [ + { + "id": "test2_haplotc_vcf_gz" + }, + "test2_haplotc_vcf_gz.txt:md5,e99d7d1051eee43ceab5563c2d09fcee" + ], + [ + { + "id": "test2_vcf" + }, + "test2_vcf.txt:md5,3a9ea6d336e113a74d7fdca5e7b623fc" + ], + [ + { + "id": "test_bcf" + }, + "test_bcf.txt:md5,3a9ea6d336e113a74d7fdca5e7b623fc" + ] + ], + "1": [ + "versions.yml:md5,ef0347837e562ce29da2cf994c87ebbe", + "versions.yml:md5,ef0347837e562ce29da2cf994c87ebbe", + "versions.yml:md5,ef0347837e562ce29da2cf994c87ebbe" + ], + "chr": [ + [ + { + "id": "test2_haplotc_vcf_gz" + }, + "test2_haplotc_vcf_gz.txt:md5,e99d7d1051eee43ceab5563c2d09fcee" + ], + [ + { + "id": "test2_vcf" + }, + "test2_vcf.txt:md5,3a9ea6d336e113a74d7fdca5e7b623fc" + ], + [ + { + "id": "test_bcf" + }, + "test_bcf.txt:md5,3a9ea6d336e113a74d7fdca5e7b623fc" + ] + ], + "versions": [ + "versions.yml:md5,ef0347837e562ce29da2cf994c87ebbe", + "versions.yml:md5,ef0347837e562ce29da2cf994c87ebbe", + "versions.yml:md5,ef0347837e562ce29da2cf994c87ebbe" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-17T14:52:17.414553172" + }, + "chr": { + "content": [ + [ + [ + "chr21" + ], + [ + "MT192765.1" + ], + [ + "MT192765.1" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-17T14:55:53.822382037" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/annotate/bcftools-annotate.diff b/modules/nf-core/bcftools/annotate/bcftools-annotate.diff new file mode 100644 index 00000000..c9614e6c --- /dev/null +++ b/modules/nf-core/bcftools/annotate/bcftools-annotate.diff @@ -0,0 +1,42 @@ +Changes in module 'nf-core/bcftools/annotate' +'modules/nf-core/bcftools/annotate/environment.yml' is unchanged +Changes in 'bcftools/annotate/main.nf': +--- modules/nf-core/bcftools/annotate/main.nf ++++ modules/nf-core/bcftools/annotate/main.nf +@@ -8,8 +8,7 @@ + 'biocontainers/bcftools:1.20--h8b25389_0' }" + + input: +- tuple val(meta), path(input), path(index), path(annotations), path(annotations_index) +- path(header_lines) ++ tuple val(meta), path(input), path(index), path(annotations), path(annotations_index), path(header_lines), path(rename_chr) + + output: + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf +@@ -25,6 +24,7 @@ + def prefix = task.ext.prefix ?: "${meta.id}" + def header_file = header_lines ? "--header-lines ${header_lines}" : '' + def annotations_file = annotations ? "--annotations ${annotations}" : '' ++ def rename_chr_cmd = rename_chr ? "--rename-chrs ${rename_chr}" : '' + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : +@@ -41,6 +41,7 @@ + $args \\ + $annotations_file \\ + $header_file \\ ++ $rename_chr_cmd \\ + --output ${prefix}.${extension} \\ + --threads $task.cpus \\ + $input + +'modules/nf-core/bcftools/annotate/meta.yml' is unchanged +'modules/nf-core/bcftools/annotate/tests/bcf.config' is unchanged +'modules/nf-core/bcftools/annotate/tests/main.nf.test' is unchanged +'modules/nf-core/bcftools/annotate/tests/main.nf.test.snap' is unchanged +'modules/nf-core/bcftools/annotate/tests/tags.yml' is unchanged +'modules/nf-core/bcftools/annotate/tests/vcf.config' is unchanged +'modules/nf-core/bcftools/annotate/tests/vcf_gz_index.config' is unchanged +'modules/nf-core/bcftools/annotate/tests/vcf_gz_index_csi.config' is unchanged +'modules/nf-core/bcftools/annotate/tests/vcf_gz_index_tbi.config' is unchanged +************************************************************ diff --git a/modules/nf-core/bcftools/annotate/environment.yml b/modules/nf-core/bcftools/annotate/environment.yml new file mode 100644 index 00000000..5c00b116 --- /dev/null +++ b/modules/nf-core/bcftools/annotate/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bcftools=1.20 diff --git a/modules/nf-core/bcftools/annotate/main.nf b/modules/nf-core/bcftools/annotate/main.nf new file mode 100644 index 00000000..b6850bba --- /dev/null +++ b/modules/nf-core/bcftools/annotate/main.nf @@ -0,0 +1,79 @@ +process BCFTOOLS_ANNOTATE { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.20--h8b25389_0': + 'biocontainers/bcftools:1.20--h8b25389_0' }" + + input: + tuple val(meta), path(input), path(index), path(annotations), path(annotations_index), path(header_lines), path(rename_chr) + + output: + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi, optional: true + tuple val(meta), path("*.csi") , emit: csi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def header_file = header_lines ? "--header-lines ${header_lines}" : '' + def annotations_file = annotations ? "--annotations ${annotations}" : '' + def rename_chr_cmd = rename_chr ? "--rename-chrs ${rename_chr}" : '' + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + def index_command = !index ? "bcftools index $input" : '' + + if ("$input" == "${prefix}.${extension}") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + """ + $index_command + + bcftools \\ + annotate \\ + $args \\ + $annotations_file \\ + $header_file \\ + $rename_chr_cmd \\ + --output ${prefix}.${extension} \\ + --threads $task.cpus \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$( bcftools --version |& sed '1!d; s/^.*bcftools //' ) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + def index_extension = args.contains("--write-index=tbi") || args.contains("-W=tbi") ? "tbi" : + args.contains("--write-index=csi") || args.contains("-W=csi") ? "csi" : + args.contains("--write-index") || args.contains("-W") ? "csi" : + "" + def create_cmd = extension.endsWith(".gz") ? "echo '' | gzip >" : "touch" + def create_index = extension.endsWith(".gz") && index_extension.matches("csi|tbi") ? "touch ${prefix}.${extension}.${index_extension}" : "" + + """ + ${create_cmd} ${prefix}.${extension} + ${create_index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$( bcftools --version |& sed '1!d; s/^.*bcftools //' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/annotate/meta.yml b/modules/nf-core/bcftools/annotate/meta.yml new file mode 100644 index 00000000..5bfccd2b --- /dev/null +++ b/modules/nf-core/bcftools/annotate/meta.yml @@ -0,0 +1,79 @@ +name: bcftools_annotate +description: Add or remove annotations. +keywords: + - bcftools + - annotate + - vcf + - remove + - add +tools: + - annotate: + description: Add or remove annotations. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: https://samtools.github.io/bcftools/bcftools.html#annotate + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: Query VCF or BCF file, can be either uncompressed or compressed + - index: + type: file + description: Index of the query VCF or BCF file + - annotations: + type: file + description: Bgzip-compressed file with annotations + - annotations_index: + type: file + description: Index of the annotations file + - - header_lines: + type: file + description: Contains lines to append to the output VCF header +output: + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{vcf,vcf.gz,bcf,bcf.gz}": + type: file + description: Compressed annotated VCF file + pattern: "*{vcf,vcf.gz,bcf,bcf.gz}" + - tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tbi": + type: file + description: Alternative VCF file index + pattern: "*.tbi" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: Default VCF file index + pattern: "*.csi" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@projectoriented" + - "@ramprasadn" +maintainers: + - "@projectoriented" + - "@ramprasadn" diff --git a/modules/nf-core/bcftools/annotate/tests/bcf.config b/modules/nf-core/bcftools/annotate/tests/bcf.config new file mode 100644 index 00000000..79d26779 --- /dev/null +++ b/modules/nf-core/bcftools/annotate/tests/bcf.config @@ -0,0 +1,4 @@ +process { + ext.args = "-x ID,INFO/DP,FORMAT/DP --output-type u" + ext.prefix = { "${meta.id}_ann" } +} diff --git a/modules/nf-core/bcftools/annotate/tests/main.nf.test b/modules/nf-core/bcftools/annotate/tests/main.nf.test new file mode 100644 index 00000000..3a5c4933 --- /dev/null +++ b/modules/nf-core/bcftools/annotate/tests/main.nf.test @@ -0,0 +1,327 @@ +nextflow_process { + + name "Test Process BCFTOOLS_ANNOTATE" + script "../main.nf" + process "BCFTOOLS_ANNOTATE" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/annotate" + + test("sarscov2 - [vcf, tbi, annotation, annotation_tbi], [] - vcf_output") { + + config "./vcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, [], annotation, annotation_tbi], [] - vcf_output") { + + config "./vcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.versions + ).match() } + ) + } + + } + test("sarscov2 - [vcf, tbi, annotation, annotation_tbi], [] - vcf_gz_index") { + + config "./vcf_gz_index.config" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("sarscov2 - [vcf, tbi, annotation, annotation_tbi], [] - vcf_gz_index_csi") { + + config "./vcf_gz_index_csi.config" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("sarscov2 - [vcf, tbi, annotation, annotation_tbi], [] - vcf_gz_index_tbi") { + + config "./vcf_gz_index_tbi.config" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match() }, + { assert process.out.tbi[0][1].endsWith(".tbi") } + ) + } + + } + test("sarscov2 - [vcf, [], annotation, annotation_tbi], header - bcf_output") { + + config "./bcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = Channel.of( + '##INFO=', + '##INFO=' + ).collectFile(name:"headers.vcf", newLine:true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.versions + ).match("bcf") } + ) + } + + } + + test("sarscov2 - [vcf, tbi, annotation, annotation_tbi], [] - stub") { + + config "./vcf.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi, annotation, annotation_tbi], [] - vcf_gz_index - stub") { + + config "./vcf_gz_index.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match()}, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("sarscov2 - [vcf, tbi, annotation, annotation_tbi], [] - vcf_gz_index_csi - stub") { + + config "./vcf_gz_index_csi.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("sarscov2 - [vcf, tbi, annotation, annotation_tbi], [] - vcf_gz_index_tbi - stub") { + + config "./vcf_gz_index_tbi.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.tbi[0][1].endsWith(".tbi") } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/annotate/tests/main.nf.test.snap b/modules/nf-core/bcftools/annotate/tests/main.nf.test.snap new file mode 100644 index 00000000..bac2224a --- /dev/null +++ b/modules/nf-core/bcftools/annotate/tests/main.nf.test.snap @@ -0,0 +1,388 @@ +{ + "bcf": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.bcf" + ] + ], + [ + "versions.yml:md5,ea53f98610d42597cf384ff1fa3eb204" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-12T16:39:33.331888" + }, + "sarscov2 - [vcf, tbi, annotation, annotation_tbi], [] - vcf_gz_index": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz" + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz.csi" + ] + ], + [ + "versions.yml:md5,ea53f98610d42597cf384ff1fa3eb204" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-08-15T10:07:59.658031137" + }, + "sarscov2 - [vcf, tbi, annotation, annotation_tbi], [] - vcf_gz_index_csi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,ea53f98610d42597cf384ff1fa3eb204" + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,ea53f98610d42597cf384ff1fa3eb204" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-08-15T10:09:05.096883418" + }, + "sarscov2 - [vcf, tbi, annotation, annotation_tbi], [] - vcf_gz_index_csi": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz" + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz.csi" + ] + ], + [ + "versions.yml:md5,ea53f98610d42597cf384ff1fa3eb204" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-08-15T10:08:10.581301219" + }, + "sarscov2 - [vcf, tbi, annotation, annotation_tbi], [] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,ea53f98610d42597cf384ff1fa3eb204" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,ea53f98610d42597cf384ff1fa3eb204" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-08-15T10:08:43.975017625" + }, + "sarscov2 - [vcf, tbi, annotation, annotation_tbi], [] - vcf_gz_index_tbi": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz.tbi" + ] + ], + [ + + ], + [ + "versions.yml:md5,ea53f98610d42597cf384ff1fa3eb204" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-08-15T10:08:21.354059092" + }, + "sarscov2 - [vcf, tbi, annotation, annotation_tbi], [] - vcf_output": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz" + ] + ], + [ + "versions.yml:md5,ea53f98610d42597cf384ff1fa3eb204" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-08-15T10:07:37.788393317" + }, + "sarscov2 - [vcf, [], annotation, annotation_tbi], [] - vcf_output": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz" + ] + ], + [ + "versions.yml:md5,ea53f98610d42597cf384ff1fa3eb204" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-08-15T10:07:48.500746325" + }, + "sarscov2 - [vcf, tbi, annotation, annotation_tbi], [] - vcf_gz_index_tbi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,ea53f98610d42597cf384ff1fa3eb204" + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,ea53f98610d42597cf384ff1fa3eb204" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-08-15T10:09:16.094918834" + }, + "sarscov2 - [vcf, tbi, annotation, annotation_tbi], [] - vcf_gz_index - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,ea53f98610d42597cf384ff1fa3eb204" + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,ea53f98610d42597cf384ff1fa3eb204" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-08-15T10:08:54.366358502" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/annotate/tests/tags.yml b/modules/nf-core/bcftools/annotate/tests/tags.yml new file mode 100644 index 00000000..f97a1afc --- /dev/null +++ b/modules/nf-core/bcftools/annotate/tests/tags.yml @@ -0,0 +1,2 @@ +bcftools/annotate: + - "modules/nf-core/bcftools/annotate/**" diff --git a/modules/nf-core/bcftools/annotate/tests/vcf.config b/modules/nf-core/bcftools/annotate/tests/vcf.config new file mode 100644 index 00000000..611868d5 --- /dev/null +++ b/modules/nf-core/bcftools/annotate/tests/vcf.config @@ -0,0 +1,4 @@ +process { + ext.args = "-x ID,INFO/DP,FORMAT/DP --output-type z" + ext.prefix = { "${meta.id}_vcf" } +} diff --git a/modules/nf-core/bcftools/annotate/tests/vcf_gz_index.config b/modules/nf-core/bcftools/annotate/tests/vcf_gz_index.config new file mode 100644 index 00000000..2fd9a225 --- /dev/null +++ b/modules/nf-core/bcftools/annotate/tests/vcf_gz_index.config @@ -0,0 +1,4 @@ +process { + ext.args = "--output-type z --write-index --no-version" + ext.prefix = { "${meta.id}_vcf" } +} diff --git a/modules/nf-core/bcftools/annotate/tests/vcf_gz_index_csi.config b/modules/nf-core/bcftools/annotate/tests/vcf_gz_index_csi.config new file mode 100644 index 00000000..512c1dfb --- /dev/null +++ b/modules/nf-core/bcftools/annotate/tests/vcf_gz_index_csi.config @@ -0,0 +1,4 @@ +process { + ext.args = "--output-type z --write-index=csi --no-version" + ext.prefix = { "${meta.id}_vcf" } +} diff --git a/modules/nf-core/bcftools/annotate/tests/vcf_gz_index_tbi.config b/modules/nf-core/bcftools/annotate/tests/vcf_gz_index_tbi.config new file mode 100644 index 00000000..7feb5ebb --- /dev/null +++ b/modules/nf-core/bcftools/annotate/tests/vcf_gz_index_tbi.config @@ -0,0 +1,4 @@ +process { + ext.args = "--output-type z --write-index=tbi --no-version" + ext.prefix = { "${meta.id}_vcf" } +} diff --git a/modules/nf-core/bcftools/concat/bcftools-concat.diff b/modules/nf-core/bcftools/concat/bcftools-concat.diff new file mode 100644 index 00000000..079cd30f --- /dev/null +++ b/modules/nf-core/bcftools/concat/bcftools-concat.diff @@ -0,0 +1,19 @@ +Changes in module 'nf-core/bcftools/concat' +--- modules/nf-core/bcftools/concat/main.nf ++++ modules/nf-core/bcftools/concat/main.nf +@@ -23,11 +23,12 @@ + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ ++ ls -1v ${vcfs} > order_files.txt + bcftools concat \\ + --output ${prefix}.vcf.gz \\ + $args \\ + --threads $task.cpus \\ +- ${vcfs} ++ -f order_files.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + +************************************************************ diff --git a/modules/nf-core/bcftools/concat/environment.yml b/modules/nf-core/bcftools/concat/environment.yml new file mode 100644 index 00000000..5c00b116 --- /dev/null +++ b/modules/nf-core/bcftools/concat/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bcftools=1.20 diff --git a/modules/nf-core/bcftools/concat/main.nf b/modules/nf-core/bcftools/concat/main.nf new file mode 100644 index 00000000..17e3cf4a --- /dev/null +++ b/modules/nf-core/bcftools/concat/main.nf @@ -0,0 +1,59 @@ +process BCFTOOLS_CONCAT { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.20--h8b25389_0': + 'biocontainers/bcftools:1.20--h8b25389_0' }" + + input: + tuple val(meta), path(vcfs), path(tbi) + + output: + tuple val(meta), path("${prefix}.vcf.gz") , emit: vcf + tuple val(meta), path("${prefix}.vcf.gz.tbi"), emit: tbi, optional: true + tuple val(meta), path("${prefix}.vcf.gz.csi"), emit: csi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def tbi_names = tbi.findAll { file -> !(file instanceof List) }.collect { file -> file.name } + def create_input_index = vcfs.collect { vcf -> tbi_names.contains(vcf.name + ".tbi") ? "" : "tabix ${vcf}" }.join("\n ") + """ + ${create_input_index} + ls -1v ${vcfs} > order_files.txt + bcftools concat \\ + --output ${prefix}.vcf.gz \\ + $args \\ + --threads $task.cpus \\ + -f order_files.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def index = args.contains("--write-index=tbi") || args.contains("-W=tbi") ? "tbi" : + args.contains("--write-index=csi") || args.contains("-W=csi") ? "csi" : + args.contains("--write-index") || args.contains("-W") ? "csi" : + "" + def create_index = index.matches("csi|tbi") ? "touch ${prefix}.vcf.gz.${index}" : "" + """ + echo "" | gzip > ${prefix}.vcf.gz + ${create_index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/concat/meta.yml b/modules/nf-core/bcftools/concat/meta.yml new file mode 100644 index 00000000..d2565b28 --- /dev/null +++ b/modules/nf-core/bcftools/concat/meta.yml @@ -0,0 +1,83 @@ +name: bcftools_concat +description: Concatenate VCF files +keywords: + - variant calling + - concat + - bcftools + - VCF +tools: + - concat: + description: | + Concatenate VCF files. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcfs: + type: list + description: | + List containing 2 or more vcf files + e.g. [ 'file1.vcf', 'file2.vcf' ] + - tbi: + type: list + description: | + List containing 2 or more index files (optional) + e.g. [ 'file1.tbi', 'file2.tbi' ] +output: + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.{vcf.gz}" + - ${prefix}.vcf.gz: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.{vcf.gz}" + - tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.tbi" + - ${prefix}.vcf.gz.tbi: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.tbi" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.csi" + - ${prefix}.vcf.gz.csi: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.csi" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@abhi18av" + - "@nvnieuwk" +maintainers: + - "@abhi18av" + - "@nvnieuwk" diff --git a/modules/nf-core/bcftools/concat/tests/main.nf.test b/modules/nf-core/bcftools/concat/tests/main.nf.test new file mode 100644 index 00000000..cb4642b2 --- /dev/null +++ b/modules/nf-core/bcftools/concat/tests/main.nf.test @@ -0,0 +1,316 @@ +nextflow_process { + + name "Test Process BCFTOOLS_CONCAT" + script "../main.nf" + process "BCFTOOLS_CONCAT" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/concat" + + + test("homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]]") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test3' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz.tbi', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]] - vcf_gz_index") { + + config "./vcf_gz_index.config" + + when { + process { + """ + input[0] = [ + [ id:'test3' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz.tbi', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]] - vcf_gz_index_csi") { + + config "./vcf_gz_index_csi.config" + + when { + process { + """ + input[0] = [ + [ id:'test3' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz.tbi', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]] - vcf_gz_index_tbi") { + + config "./vcf_gz_index_tbi.config" + + when { + process { + """ + input[0] = [ + [ id:'test3' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz.tbi', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match() }, + { assert process.out.tbi[0][1].endsWith(".tbi") } + ) + } + + } + + + test("homo_sapiens - [[vcf1, vcf2], []]") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test3' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true) + ], + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]] - stub") { + + config "./nextflow.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test3' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz.tbi', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]] - vcf_gz_index - stub") { + + config "./vcf_gz_index.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test3' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz.tbi', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]] - vcf_gz_index_csi - stub") { + + config "./vcf_gz_index_csi.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test3' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz.tbi', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]] - vcf_gz_index_tbi - stub") { + + config "./vcf_gz_index_tbi.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test3' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz.tbi', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.tbi[0][1].endsWith(".tbi") } + ) + } + + } + + +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/concat/tests/main.nf.test.snap b/modules/nf-core/bcftools/concat/tests/main.nf.test.snap new file mode 100644 index 00000000..09e87cd3 --- /dev/null +++ b/modules/nf-core/bcftools/concat/tests/main.nf.test.snap @@ -0,0 +1,395 @@ +{ + "homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]] - vcf_gz_index - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test3" + }, + "test3_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test3" + }, + "test3_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,c6e19f105510a46af1c5da9064e2e659" + ], + "csi": [ + [ + { + "id": "test3" + }, + "test3_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test3" + }, + "test3_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,c6e19f105510a46af1c5da9064e2e659" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-26T11:04:11.178539482" + }, + "homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]]": { + "content": [ + { + "0": [ + [ + { + "id": "test3" + }, + "test3.vcf.gz:md5,5f6796c3ae109a1a5b87353954693f5a" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,c6e19f105510a46af1c5da9064e2e659" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test3" + }, + "test3.vcf.gz:md5,5f6796c3ae109a1a5b87353954693f5a" + ] + ], + "versions": [ + "versions.yml:md5,c6e19f105510a46af1c5da9064e2e659" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-26T11:03:08.765639958" + }, + "homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]] - vcf_gz_index": { + "content": [ + [ + [ + { + "id": "test3" + }, + "test3_vcf.vcf.gz:md5,5f6796c3ae109a1a5b87353954693f5a" + ] + ], + [ + [ + { + "id": "test3" + }, + "test3_vcf.vcf.gz.csi" + ] + ], + [ + + ], + [ + "versions.yml:md5,c6e19f105510a46af1c5da9064e2e659" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-26T11:03:21.607274757" + }, + "homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]] - vcf_gz_index_tbi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test3" + }, + "test3_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test3" + }, + "test3_vcf.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,c6e19f105510a46af1c5da9064e2e659" + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "test3" + }, + "test3_vcf.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "test3" + }, + "test3_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,c6e19f105510a46af1c5da9064e2e659" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-26T11:04:27.332133878" + }, + "homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]] - vcf_gz_index_csi": { + "content": [ + [ + [ + { + "id": "test3" + }, + "test3_vcf.vcf.gz:md5,5f6796c3ae109a1a5b87353954693f5a" + ] + ], + [ + [ + { + "id": "test3" + }, + "test3_vcf.vcf.gz.csi" + ] + ], + [ + + ], + [ + "versions.yml:md5,c6e19f105510a46af1c5da9064e2e659" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-26T11:03:36.575719606" + }, + "homo_sapiens - [[vcf1, vcf2], []]": { + "content": [ + { + "0": [ + [ + { + "id": "test3" + }, + "test3.vcf.gz:md5,5f6796c3ae109a1a5b87353954693f5a" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,c6e19f105510a46af1c5da9064e2e659" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test3" + }, + "test3.vcf.gz:md5,5f6796c3ae109a1a5b87353954693f5a" + ] + ], + "versions": [ + "versions.yml:md5,c6e19f105510a46af1c5da9064e2e659" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-26T11:03:54.069826178" + }, + "homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test3" + }, + "test3.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,c6e19f105510a46af1c5da9064e2e659" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test3" + }, + "test3.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,c6e19f105510a46af1c5da9064e2e659" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-26T11:04:02.45346063" + }, + "homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]] - vcf_gz_index_tbi": { + "content": [ + [ + [ + { + "id": "test3" + }, + "test3_vcf.vcf.gz:md5,5f6796c3ae109a1a5b87353954693f5a" + ] + ], + [ + + ], + [ + [ + { + "id": "test3" + }, + "test3_vcf.vcf.gz.tbi" + ] + ], + [ + "versions.yml:md5,c6e19f105510a46af1c5da9064e2e659" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-26T11:03:44.618596639" + }, + "homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]] - vcf_gz_index_csi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test3" + }, + "test3_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test3" + }, + "test3_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,c6e19f105510a46af1c5da9064e2e659" + ], + "csi": [ + [ + { + "id": "test3" + }, + "test3_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test3" + }, + "test3_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,c6e19f105510a46af1c5da9064e2e659" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-26T11:04:19.745768656" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/concat/tests/nextflow.config b/modules/nf-core/bcftools/concat/tests/nextflow.config new file mode 100644 index 00000000..f3e1e98c --- /dev/null +++ b/modules/nf-core/bcftools/concat/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + ext.args = "--no-version" +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/concat/tests/tags.yml b/modules/nf-core/bcftools/concat/tests/tags.yml new file mode 100644 index 00000000..21710d4e --- /dev/null +++ b/modules/nf-core/bcftools/concat/tests/tags.yml @@ -0,0 +1,2 @@ +bcftools/concat: + - "modules/nf-core/bcftools/concat/**" diff --git a/modules/nf-core/bcftools/concat/tests/vcf_gz_index.config b/modules/nf-core/bcftools/concat/tests/vcf_gz_index.config new file mode 100644 index 00000000..7dd696ee --- /dev/null +++ b/modules/nf-core/bcftools/concat/tests/vcf_gz_index.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index --no-version" +} diff --git a/modules/nf-core/bcftools/concat/tests/vcf_gz_index_csi.config b/modules/nf-core/bcftools/concat/tests/vcf_gz_index_csi.config new file mode 100644 index 00000000..aebffb6f --- /dev/null +++ b/modules/nf-core/bcftools/concat/tests/vcf_gz_index_csi.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index=csi --no-version" +} diff --git a/modules/nf-core/bcftools/concat/tests/vcf_gz_index_tbi.config b/modules/nf-core/bcftools/concat/tests/vcf_gz_index_tbi.config new file mode 100644 index 00000000..b192ae7d --- /dev/null +++ b/modules/nf-core/bcftools/concat/tests/vcf_gz_index_tbi.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index=tbi --no-version" +} diff --git a/modules/nf-core/bcftools/convert/environment.yml b/modules/nf-core/bcftools/convert/environment.yml new file mode 100644 index 00000000..5c00b116 --- /dev/null +++ b/modules/nf-core/bcftools/convert/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bcftools=1.20 diff --git a/modules/nf-core/bcftools/convert/main.nf b/modules/nf-core/bcftools/convert/main.nf new file mode 100644 index 00000000..6a22567e --- /dev/null +++ b/modules/nf-core/bcftools/convert/main.nf @@ -0,0 +1,73 @@ +process BCFTOOLS_CONVERT { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.20--h8b25389_0': + 'biocontainers/bcftools:1.20--h8b25389_0' }" + + input: + tuple val(meta), path(input), path(input_index) + tuple val(meta2), path(fasta) + path(bed) + + output: + tuple val(meta), path("*.vcf.gz"), optional:true , emit: vcf_gz + tuple val(meta), path("*.vcf") , optional:true , emit: vcf + tuple val(meta), path("*.bcf.gz"), optional:true , emit: bcf_gz + tuple val(meta), path("*.bcf") , optional:true , emit: bcf + tuple val(meta), path("*.hap.gz"), optional:true , emit: hap + tuple val(meta), path("*.legend.gz"), optional:true , emit: legend + tuple val(meta), path("*.samples"), optional:true , emit: samples + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def regions = bed ? "--regions-file $bed" : "" + def reference = fasta ? "--fasta-ref $fasta" : "" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "--output ${prefix}.bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "--output ${prefix}.bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "--output ${prefix}.vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "--output ${prefix}.vcf" : + args.contains("--haplegendsample") || args.contains("-h") ? "" : + "--output ${prefix}.vcf.gz" + + """ + bcftools convert \\ + $args \\ + $regions \\ + $extension \\ + --threads $task.cpus \\ + $reference \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf.gz" + """ + touch ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/convert/meta.yml b/modules/nf-core/bcftools/convert/meta.yml new file mode 100644 index 00000000..4f2a9f3f --- /dev/null +++ b/modules/nf-core/bcftools/convert/meta.yml @@ -0,0 +1,140 @@ +name: "bcftools_convert" +description: Converts certain output formats to VCF +keywords: + - bcftools + - convert + - vcf + - gvcf +tools: + - "bcftools": + description: "BCFtools is a set of utilities that manipulate variant calls in + the Variant Call Format (VCF) and its binary counterpart BCF. All commands work + transparently with both VCFs and BCFs, both uncompressed and BGZF-compressed.\ + \ Most commands accept VCF, bgzipped VCF and BCF with filetype detected automatically + even when streaming from a pipe. Indexed VCF and BCF will work in all situations. + Un-indexed VCF and BCF and streams will work in most, but not all situations." + homepage: "https://samtools.github.io/bcftools/bcftools.html" + documentation: "https://samtools.github.io/bcftools/bcftools.html#convert" + tool_dev_url: "https://github.com/samtools/bcftools" + doi: "10.1093/gigascience/giab008" + licence: ["GPL"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: | + The input format. Each format needs a seperate parameter to be specified in the `args`: + - GEN/SAMPLE file: `--gensample2vcf` + - gVCF file: `--gvcf2vcf` + - HAP/SAMPLE file: `--hapsample2vcf` + - HAP/LEGEND/SAMPLE file: `--haplegendsample2vcf` + - TSV file: `--tsv2vcf` + pattern: "*.{gen,sample,g.vcf,hap,legend}{.gz,}" + - input_index: + type: file + description: (Optional) The index for the input files, if needed + pattern: "*.bed" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: (Optional) The reference fasta, only needed for gVCF conversion + pattern: "*.{fa,fasta}" + - - bed: + type: file + description: (Optional) The BED file containing the regions for the VCF file + pattern: "*.bed" +output: + - vcf_gz: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.vcf.gz": + type: file + description: VCF merged output file (bgzipped) => when `--output-type z` is + used + pattern: "*.vcf.gz" + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.vcf": + type: file + description: VCF merged output file => when `--output-type v` is used + pattern: "*.vcf" + - bcf_gz: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bcf.gz": + type: file + description: BCF merged output file (bgzipped) => when `--output-type b` is + used + pattern: "*.bcf.gz" + - bcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bcf": + type: file + description: BCF merged output file => when `--output-type u` is used + pattern: "*.bcf" + - hap: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.hap.gz": + type: file + description: hap format used by IMPUTE2 and SHAPEIT + pattern: "*.hap.gz" + - legend: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.legend.gz": + type: file + description: legend format used by IMPUTE2 and SHAPEIT + pattern: "*.legend.gz" + - samples: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.samples": + type: file + description: samples format used by IMPUTE2 and SHAPEIT + pattern: "*.samples" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@nvnieuwk" + - "@ramprasadn" + - "@atrigila" +maintainers: + - "@nvnieuwk" + - "@ramprasadn" + - "@atrigila" diff --git a/modules/nf-core/bcftools/index/environment.yml b/modules/nf-core/bcftools/index/environment.yml new file mode 100644 index 00000000..5c00b116 --- /dev/null +++ b/modules/nf-core/bcftools/index/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bcftools=1.20 diff --git a/modules/nf-core/bcftools/index/main.nf b/modules/nf-core/bcftools/index/main.nf new file mode 100644 index 00000000..408e584c --- /dev/null +++ b/modules/nf-core/bcftools/index/main.nf @@ -0,0 +1,51 @@ +process BCFTOOLS_INDEX { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.20--h8b25389_0': + 'biocontainers/bcftools:1.20--h8b25389_0' }" + + input: + tuple val(meta), path(vcf) + + output: + tuple val(meta), path("*.csi"), optional:true, emit: csi + tuple val(meta), path("*.tbi"), optional:true, emit: tbi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + bcftools \\ + index \\ + $args \\ + --threads $task.cpus \\ + $vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--tbi") || args.contains("-t") ? "tbi" : + "csi" + """ + touch ${vcf}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/index/meta.yml b/modules/nf-core/bcftools/index/meta.yml new file mode 100644 index 00000000..6897d1bd --- /dev/null +++ b/modules/nf-core/bcftools/index/meta.yml @@ -0,0 +1,63 @@ +name: bcftools_index +description: Index VCF tools +keywords: + - vcf + - index + - bcftools + - csi + - tbi +tools: + - bcftools: + description: BCFtools is a set of utilities that manipulate variant calls in the + Variant Call Format (VCF) and its binary counterpart BCF. All commands work + transparently with both VCFs and BCFs, both uncompressed and BGZF-compressed. Most + commands accept VCF, bgzipped VCF and BCF with filetype detected automatically + even when streaming from a pipe. Indexed VCF and BCF will work in all situations. + Un-indexed VCF and BCF and streams will work in most, but not all situations. + homepage: https://samtools.github.io/bcftools/ + documentation: https://samtools.github.io/bcftools/howtos/index.html + tool_dev_url: https://github.com/samtools/bcftools + doi: "10.1093/gigascience/giab008" + licence: ["MIT", "GPL-3.0-or-later"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: VCF file (optionally GZIPPED) + pattern: "*.{vcf,vcf.gz}" +output: + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: Default VCF file index file + pattern: "*.csi" + - tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tbi": + type: file + description: Alternative VCF file index file for larger files (activated with + -t parameter) + pattern: "*.tbi" + - versions: + - versions.yml: + type: file + description: File containing software version + pattern: "versions.yml" +authors: + - "@jfy133" +maintainers: + - "@jfy133" diff --git a/modules/nf-core/bcftools/index/tests/main.nf.test b/modules/nf-core/bcftools/index/tests/main.nf.test new file mode 100644 index 00000000..9b374853 --- /dev/null +++ b/modules/nf-core/bcftools/index/tests/main.nf.test @@ -0,0 +1,113 @@ +nextflow_process { + + name "Test Process BCFTOOLS_INDEX" + script "../main.nf" + process "BCFTOOLS_INDEX" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/index" + + test("sarscov2 - vcf - csi") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions).match() + } + ) + } + + } + + test("sarscov2 - vcf - tbi") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions).match() + } + ) + } + + } + + test("sarscov2 - vcf - csi - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - vcf - tbi - stub") { + + config "./nextflow.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/index/tests/main.nf.test.snap b/modules/nf-core/bcftools/index/tests/main.nf.test.snap new file mode 100644 index 00000000..b6f7b700 --- /dev/null +++ b/modules/nf-core/bcftools/index/tests/main.nf.test.snap @@ -0,0 +1,120 @@ +{ + "sarscov2 - vcf - csi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,b4ea0f633dba7f5992fbf41b518f98e9" + ], + "csi": [ + [ + { + "id": "test" + }, + "test.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + + ], + "versions": [ + "versions.yml:md5,b4ea0f633dba7f5992fbf41b518f98e9" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T11:43:09.347303629" + }, + "sarscov2 - vcf - tbi": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi" + ] + ], + [ + "versions.yml:md5,b4ea0f633dba7f5992fbf41b518f98e9" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T11:42:46.38669168" + }, + "sarscov2 - vcf - tbi - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,b4ea0f633dba7f5992fbf41b518f98e9" + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,b4ea0f633dba7f5992fbf41b518f98e9" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T11:43:32.494612317" + }, + "sarscov2 - vcf - csi": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.vcf.gz.csi" + ] + ], + [ + "versions.yml:md5,b4ea0f633dba7f5992fbf41b518f98e9" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T11:42:33.652109509" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/index/tests/nextflow.config b/modules/nf-core/bcftools/index/tests/nextflow.config new file mode 100644 index 00000000..db83f7e5 --- /dev/null +++ b/modules/nf-core/bcftools/index/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + ext.args = '--tbi' +} diff --git a/modules/nf-core/bcftools/index/tests/tags.yml b/modules/nf-core/bcftools/index/tests/tags.yml new file mode 100644 index 00000000..b4c349be --- /dev/null +++ b/modules/nf-core/bcftools/index/tests/tags.yml @@ -0,0 +1,2 @@ +bcftools/index: + - "modules/nf-core/bcftools/index/**" diff --git a/modules/nf-core/bcftools/merge/bcftools-merge.diff b/modules/nf-core/bcftools/merge/bcftools-merge.diff new file mode 100644 index 00000000..8da3841c --- /dev/null +++ b/modules/nf-core/bcftools/merge/bcftools-merge.diff @@ -0,0 +1,48 @@ +Changes in module 'nf-core/bcftools/merge' +'modules/nf-core/bcftools/merge/environment.yml' is unchanged +Changes in 'bcftools/merge/main.nf': +--- modules/nf-core/bcftools/merge/main.nf ++++ modules/nf-core/bcftools/merge/main.nf +@@ -8,14 +8,13 @@ + 'biocontainers/bcftools:1.20--h8b25389_0' }" + + input: +- tuple val(meta), path(vcfs), path(tbis) +- tuple val(meta2), path(fasta) +- tuple val(meta3), path(fai) +- tuple val(meta4), path(bed) ++ tuple val(meta), path(vcfs), path(tbis), path(bed) ++ tuple val(meta2), path(fasta), path(fai) + + output: + tuple val(meta), path("*.{bcf,vcf}{,.gz}"), emit: vcf +- tuple val(meta), path("*.{csi,tbi}") , emit: index, optional: true ++ tuple val(meta), path("*.tbi") , emit: tbi, optional: true ++ tuple val(meta), path("*.csi") , emit: csi, optional: true + path "versions.yml" , emit: versions + + when: +@@ -31,7 +30,7 @@ + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : +- "vcf" ++ "vcf.gz" + + """ + bcftools merge \\ + +'modules/nf-core/bcftools/merge/meta.yml' is unchanged +'modules/nf-core/bcftools/merge/tests/bcf.config' is unchanged +'modules/nf-core/bcftools/merge/tests/bcf_gz.config' is unchanged +'modules/nf-core/bcftools/merge/tests/main.nf.test' is unchanged +'modules/nf-core/bcftools/merge/tests/main.nf.test.snap' is unchanged +'modules/nf-core/bcftools/merge/tests/nextflow.config' is unchanged +'modules/nf-core/bcftools/merge/tests/nextflow.gvcf.config' is unchanged +'modules/nf-core/bcftools/merge/tests/tags.yml' is unchanged +'modules/nf-core/bcftools/merge/tests/vcf.config' is unchanged +'modules/nf-core/bcftools/merge/tests/vcf_gz.config' is unchanged +'modules/nf-core/bcftools/merge/tests/vcf_gz_index.config' is unchanged +'modules/nf-core/bcftools/merge/tests/vcf_gz_index_csi.config' is unchanged +'modules/nf-core/bcftools/merge/tests/vcf_gz_index_tbi.config' is unchanged +************************************************************ diff --git a/modules/nf-core/bcftools/merge/environment.yml b/modules/nf-core/bcftools/merge/environment.yml new file mode 100644 index 00000000..5c00b116 --- /dev/null +++ b/modules/nf-core/bcftools/merge/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bcftools=1.20 diff --git a/modules/nf-core/bcftools/merge/main.nf b/modules/nf-core/bcftools/merge/main.nf new file mode 100644 index 00000000..c922b9ca --- /dev/null +++ b/modules/nf-core/bcftools/merge/main.nf @@ -0,0 +1,73 @@ +process BCFTOOLS_MERGE { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.20--h8b25389_0': + 'biocontainers/bcftools:1.20--h8b25389_0' }" + + input: + tuple val(meta), path(vcfs), path(tbis), path(bed) + tuple val(meta2), path(fasta), path(fai) + + output: + tuple val(meta), path("*.{bcf,vcf}{,.gz}"), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi, optional: true + tuple val(meta), path("*.csi") , emit: csi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def input = (vcfs.collect().size() > 1) ? vcfs.sort{ it.name } : vcfs + def regions = bed ? "--regions-file $bed" : "" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf.gz" + + """ + bcftools merge \\ + $args \\ + $regions \\ + --threads $task.cpus \\ + --output ${prefix}.${extension} \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + def index = args.contains("--write-index=tbi") || args.contains("-W=tbi") ? "tbi" : + args.contains("--write-index=csi") || args.contains("-W=csi") ? "csi" : + args.contains("--write-index") || args.contains("-W") ? "csi" : + "" + def create_cmd = extension.endsWith(".gz") ? "echo '' | gzip >" : "touch" + def create_index = extension.endsWith(".gz") && index.matches("csi|tbi") ? "touch ${prefix}.${extension}.${index}" : "" + + """ + ${create_cmd} ${prefix}.${extension} + ${create_index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/merge/meta.yml b/modules/nf-core/bcftools/merge/meta.yml new file mode 100644 index 00000000..2cf09a1d --- /dev/null +++ b/modules/nf-core/bcftools/merge/meta.yml @@ -0,0 +1,96 @@ +name: bcftools_merge +description: Merge VCF files +keywords: + - variant calling + - merge + - VCF +tools: + - merge: + description: | + Merge VCF files. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcfs: + type: file + description: | + List containing 2 or more vcf files + e.g. [ 'file1.vcf', 'file2.vcf' ] + - tbis: + type: file + description: | + List containing the tbi index files corresponding to the vcfs input files + e.g. [ 'file1.vcf.tbi', 'file2.vcf.tbi' ] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: "(Optional) The fasta reference file (only necessary for the `--gvcf + FILE` parameter)" + pattern: "*.{fasta,fa}" + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: "(Optional) The fasta reference file index (only necessary for + the `--gvcf FILE` parameter)" + pattern: "*.fai" + - - meta4: + type: map + description: | + Groovy Map containing bed information + e.g. [ id:'genome' ] + - bed: + type: file + description: "(Optional) The bed regions to merge on" + pattern: "*.bed" +output: + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{bcf,vcf}{,.gz}": + type: file + description: merged output file + pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}" + - index: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{csi,tbi}": + type: file + description: index of merged output + pattern: "*.{csi,tbi}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@nvnieuwk" + - "@ramprasadn" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@nvnieuwk" + - "@ramprasadn" diff --git a/modules/nf-core/bcftools/merge/tests/bcf.config b/modules/nf-core/bcftools/merge/tests/bcf.config new file mode 100644 index 00000000..4467d07d --- /dev/null +++ b/modules/nf-core/bcftools/merge/tests/bcf.config @@ -0,0 +1,3 @@ +process { + ext.args = '--output-type u --no-version' +} diff --git a/modules/nf-core/bcftools/merge/tests/bcf_gz.config b/modules/nf-core/bcftools/merge/tests/bcf_gz.config new file mode 100644 index 00000000..280de8db --- /dev/null +++ b/modules/nf-core/bcftools/merge/tests/bcf_gz.config @@ -0,0 +1,3 @@ +process { + ext.args = '--output-type b --no-version' +} diff --git a/modules/nf-core/bcftools/merge/tests/main.nf.test b/modules/nf-core/bcftools/merge/tests/main.nf.test new file mode 100644 index 00000000..3995fc1a --- /dev/null +++ b/modules/nf-core/bcftools/merge/tests/main.nf.test @@ -0,0 +1,853 @@ +nextflow_process { + + name "Test Process BCFTOOLS_MERGE" + script "../main.nf" + process "BCFTOOLS_MERGE" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/merge" + + test("sarscov2 - [vcf, tbi], [], [], []") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("vcf") }, + { assert snapshot( + path(process.out.vcf.get(0).get(1)).vcf.variantsMD5, + process.out.versions, + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf output") { + + config "./vcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("vcf") }, + { assert snapshot( + path(process.out.vcf.get(0).get(1)).md5, + process.out.versions, + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output") { + + config "./vcf_gz.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("vcf.gz") }, + { assert snapshot( + path(process.out.vcf.get(0).get(1)).vcf.variantsMD5, + process.out.versions, + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - bcf output") { + + config "./bcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("bcf") }, + { assert snapshot( + file(process.out.vcf.get(0).get(1)).name, + process.out.versions, + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - bcf.gz output") { + + config "./bcf_gz.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("bcf.gz") }, + { assert snapshot( + file(process.out.vcf.get(0).get(1)).name, + process.out.versions, + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - index") { + + config "./vcf_gz_index.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("vcf.gz") }, + { assert process.out.index.get(0).get(1).endsWith("csi") }, + { assert snapshot( + path(process.out.vcf.get(0).get(1)).vcf.variantsMD5, + file(process.out.index.get(0).get(1)).name, + process.out.versions, + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - csi index") { + + config "./vcf_gz_index_csi.config" + + when { + + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("vcf.gz") }, + { assert process.out.index.get(0).get(1).endsWith("csi") }, + { assert snapshot( + path(process.out.vcf.get(0).get(1)).vcf.variantsMD5, + file(process.out.index.get(0).get(1)).name, + process.out.versions, + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - tbi index") { + + config "./vcf_gz_index_tbi.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("vcf.gz") }, + { assert process.out.index.get(0).get(1).endsWith("tbi") }, + { assert snapshot( + path(process.out.vcf.get(0).get(1)).vcf.variantsMD5, + file(process.out.index.get(0).get(1)).name, + process.out.versions, + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], bed") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [ + [ id:'test' ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("vcf") }, + { assert snapshot( + path(process.out.vcf.get(0).get(1)).md5, + process.out.versions, + ).match() } + ) + } + + } + + test("homo_sapiens - [vcf, tbi], fasta, fai, bed - vcf.gz output") { + + config "./nextflow.gvcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.gz.tbi', checkIfExists: true), + ] + ] + input[1] = [ + [ id:'test' ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ] + input[2] = [ + [ id:'test' ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) ] + ] + input[3] = [ + [ id:'test' ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("vcf.gz") }, + { assert snapshot( + path(process.out.vcf.get(0).get(1)).vcf.variantsMD5, + process.out.versions, + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - one sample") { + + config "./nextflow.config" + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("vcf") }, + { assert snapshot( + path(process.out.vcf.get(0).get(1)).md5, + process.out.versions, + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - stub") { + + options "-stub" + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("vcf") }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf output - stub") { + + options "-stub" + config "./vcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("vcf") }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - stub") { + + options "-stub" + config "./vcf_gz.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("vcf.gz") }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - bcf output - stub") { + + options "-stub" + config "./bcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("bcf") }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - bcf.gz output - stub") { + + options "-stub" + config "./bcf_gz.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("bcf.gz") }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - index - stub") { + + options "-stub" + config "./vcf_gz_index.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("vcf.gz") }, + { assert process.out.index.get(0).get(1).endsWith("csi") }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - csi index - stub") { + + options "-stub" + config "./vcf_gz_index_csi.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("vcf.gz") }, + { assert process.out.index.get(0).get(1).endsWith("csi") }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - tbi index - stub") { + + options "-stub" + config "./vcf_gz_index_tbi.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("vcf.gz") }, + { assert process.out.index.get(0).get(1).endsWith("tbi") }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], bed - stub") { + + options "-stub" + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [ + [ id:'test' ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("vcf") }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - [vcf, tbi], fasta, fai, bed - vcf.gz output - stub") { + + options "-stub" + config "./nextflow.gvcf.config" + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.gz.tbi', checkIfExists: true), + ] + ] + input[1] = [ + [ id:'test' ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ] + input[2] = [ + [ id:'test' ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) ] + ] + input[3] = [ + [ id:'test' ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("vcf.gz") }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - one sample - stub") { + + options "-stub" + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("vcf") }, + { assert snapshot(process.out).match() } + ) + } + + } +} diff --git a/modules/nf-core/bcftools/merge/tests/main.nf.test.snap b/modules/nf-core/bcftools/merge/tests/main.nf.test.snap new file mode 100644 index 00000000..b3b62556 --- /dev/null +++ b/modules/nf-core/bcftools/merge/tests/main.nf.test.snap @@ -0,0 +1,607 @@ +{ + "sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - tbi index": { + "content": [ + "e0de448dc8e712956a03ce68d79a0b3a", + "test.vcf.gz.tbi", + [ + "versions.yml:md5,d2c0a30d9a4cc6df89a464ae82e0c38a" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-09-05T12:34:16.977726522" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf output - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,d2c0a30d9a4cc6df89a464ae82e0c38a" + ], + "index": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,d2c0a30d9a4cc6df89a464ae82e0c38a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-05T13:17:34.532910365" + }, + "sarscov2 - [vcf, tbi], [], [], bed": { + "content": [ + "febdcfb851dcfc83d8248520830aef10", + [ + "versions.yml:md5,d2c0a30d9a4cc6df89a464ae82e0c38a" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-05T13:29:48.630057872" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - index - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,d2c0a30d9a4cc6df89a464ae82e0c38a" + ], + "index": [ + [ + { + "id": "test" + }, + "test.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,d2c0a30d9a4cc6df89a464ae82e0c38a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-05T13:17:59.983157569" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf output": { + "content": [ + "57bb84274f336465d0a0946b532093b0", + [ + "versions.yml:md5,d2c0a30d9a4cc6df89a464ae82e0c38a" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-05T13:29:05.528412678" + }, + "sarscov2 - [vcf, tbi], [], [], [] - bcf.gz output - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,d2c0a30d9a4cc6df89a464ae82e0c38a" + ], + "index": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.bcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,d2c0a30d9a4cc6df89a464ae82e0c38a" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-09-05T11:58:46.619657457" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - tbi index - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,d2c0a30d9a4cc6df89a464ae82e0c38a" + ], + "index": [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,d2c0a30d9a4cc6df89a464ae82e0c38a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-05T13:18:12.848227353" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,d2c0a30d9a4cc6df89a464ae82e0c38a" + ], + "index": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,d2c0a30d9a4cc6df89a464ae82e0c38a" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-09-05T11:23:15.794389239" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - csi index": { + "content": [ + "e0de448dc8e712956a03ce68d79a0b3a", + "test.vcf.gz.csi", + [ + "versions.yml:md5,d2c0a30d9a4cc6df89a464ae82e0c38a" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-09-05T11:57:16.850641473" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output": { + "content": [ + "e0de448dc8e712956a03ce68d79a0b3a", + [ + "versions.yml:md5,d2c0a30d9a4cc6df89a464ae82e0c38a" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-09-05T11:56:27.949031071" + }, + "sarscov2 - [vcf, tbi], [], [], bed - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,d2c0a30d9a4cc6df89a464ae82e0c38a" + ], + "index": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,d2c0a30d9a4cc6df89a464ae82e0c38a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-05T13:18:19.273064822" + }, + "sarscov2 - [vcf, tbi], [], [], [] - bcf output": { + "content": [ + "test.bcf", + [ + "versions.yml:md5,d2c0a30d9a4cc6df89a464ae82e0c38a" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-09-05T11:56:38.567500859" + }, + "sarscov2 - [vcf, tbi], [], [], [] - bcf output - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,d2c0a30d9a4cc6df89a464ae82e0c38a" + ], + "index": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,d2c0a30d9a4cc6df89a464ae82e0c38a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-05T13:17:47.356328326" + }, + "sarscov2 - [vcf, tbi], [], [], [] - one sample - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,d2c0a30d9a4cc6df89a464ae82e0c38a" + ], + "index": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,d2c0a30d9a4cc6df89a464ae82e0c38a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-05T15:13:18.708495878" + }, + "homo_sapiens - [vcf, tbi], fasta, fai, bed - vcf.gz output - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,d2c0a30d9a4cc6df89a464ae82e0c38a" + ], + "index": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,d2c0a30d9a4cc6df89a464ae82e0c38a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-05T13:56:45.706125286" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - index": { + "content": [ + "e0de448dc8e712956a03ce68d79a0b3a", + "test.vcf.gz.csi", + [ + "versions.yml:md5,d2c0a30d9a4cc6df89a464ae82e0c38a" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-09-05T12:33:43.639646108" + }, + "homo_sapiens - [vcf, tbi], fasta, fai, bed - vcf.gz output": { + "content": [ + "645b7f7f9131bfe350a9ec3cf82c17fe", + [ + "versions.yml:md5,d2c0a30d9a4cc6df89a464ae82e0c38a" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-05T13:55:44.299812124" + }, + "sarscov2 - [vcf, tbi], [], [], [] - one sample": { + "content": [ + "2a374cf02f0c32cf607646167e7f153b", + [ + "versions.yml:md5,d2c0a30d9a4cc6df89a464ae82e0c38a" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-05T15:37:38.491844702" + }, + "sarscov2 - [vcf, tbi], [], [], [] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,d2c0a30d9a4cc6df89a464ae82e0c38a" + ], + "index": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,d2c0a30d9a4cc6df89a464ae82e0c38a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-05T13:17:28.188178904" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - csi index - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,d2c0a30d9a4cc6df89a464ae82e0c38a" + ], + "index": [ + [ + { + "id": "test" + }, + "test.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,d2c0a30d9a4cc6df89a464ae82e0c38a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-05T13:18:06.430943593" + }, + "sarscov2 - [vcf, tbi], [], [], []": { + "content": [ + "e0de448dc8e712956a03ce68d79a0b3a", + [ + "versions.yml:md5,d2c0a30d9a4cc6df89a464ae82e0c38a" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-09-05T12:31:03.893007442" + }, + "sarscov2 - [vcf, tbi], [], [], [] - bcf.gz output": { + "content": [ + "test.bcf.gz", + [ + "versions.yml:md5,d2c0a30d9a4cc6df89a464ae82e0c38a" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-09-05T11:56:56.416361069" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/merge/tests/nextflow.config b/modules/nf-core/bcftools/merge/tests/nextflow.config new file mode 100644 index 00000000..c3f0b715 --- /dev/null +++ b/modules/nf-core/bcftools/merge/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: BCFTOOLS_MERGE { + ext.args = '--force-samples --force-single --no-version' + } +} diff --git a/modules/nf-core/bcftools/merge/tests/nextflow.gvcf.config b/modules/nf-core/bcftools/merge/tests/nextflow.gvcf.config new file mode 100644 index 00000000..8c457b71 --- /dev/null +++ b/modules/nf-core/bcftools/merge/tests/nextflow.gvcf.config @@ -0,0 +1,5 @@ +process { + withName: BCFTOOLS_MERGE { + ext.args = { "--force-samples --no-version --output-type z --gvcf $fasta" } + } +} diff --git a/modules/nf-core/bcftools/merge/tests/tags.yml b/modules/nf-core/bcftools/merge/tests/tags.yml new file mode 100644 index 00000000..1464d0c1 --- /dev/null +++ b/modules/nf-core/bcftools/merge/tests/tags.yml @@ -0,0 +1,2 @@ +bcftools/merge: + - "modules/nf-core/bcftools/merge/**" diff --git a/modules/nf-core/bcftools/merge/tests/vcf.config b/modules/nf-core/bcftools/merge/tests/vcf.config new file mode 100644 index 00000000..759222e5 --- /dev/null +++ b/modules/nf-core/bcftools/merge/tests/vcf.config @@ -0,0 +1,3 @@ +process { + ext.args = '--output-type v --no-version' +} diff --git a/modules/nf-core/bcftools/merge/tests/vcf_gz.config b/modules/nf-core/bcftools/merge/tests/vcf_gz.config new file mode 100644 index 00000000..8b6ad8b4 --- /dev/null +++ b/modules/nf-core/bcftools/merge/tests/vcf_gz.config @@ -0,0 +1,3 @@ +process { + ext.args = '--output-type z --no-version' +} diff --git a/modules/nf-core/bcftools/merge/tests/vcf_gz_index.config b/modules/nf-core/bcftools/merge/tests/vcf_gz_index.config new file mode 100644 index 00000000..9f1e9b1d --- /dev/null +++ b/modules/nf-core/bcftools/merge/tests/vcf_gz_index.config @@ -0,0 +1,3 @@ +process { + ext.args = "--output-type z --write-index --no-version" +} diff --git a/modules/nf-core/bcftools/merge/tests/vcf_gz_index_csi.config b/modules/nf-core/bcftools/merge/tests/vcf_gz_index_csi.config new file mode 100644 index 00000000..8308ee1a --- /dev/null +++ b/modules/nf-core/bcftools/merge/tests/vcf_gz_index_csi.config @@ -0,0 +1,3 @@ +process { + ext.args = "--output-type z --write-index=csi --no-version" +} diff --git a/modules/nf-core/bcftools/merge/tests/vcf_gz_index_tbi.config b/modules/nf-core/bcftools/merge/tests/vcf_gz_index_tbi.config new file mode 100644 index 00000000..9be4075b --- /dev/null +++ b/modules/nf-core/bcftools/merge/tests/vcf_gz_index_tbi.config @@ -0,0 +1,3 @@ +process { + ext.args = "--output-type z --write-index=tbi --no-version" +} diff --git a/modules/nf-core/bcftools/mpileup/bcftools-mpileup.diff b/modules/nf-core/bcftools/mpileup/bcftools-mpileup.diff new file mode 100644 index 00000000..a24a121f --- /dev/null +++ b/modules/nf-core/bcftools/mpileup/bcftools-mpileup.diff @@ -0,0 +1,30 @@ +Changes in module 'nf-core/bcftools/mpileup' +'modules/nf-core/bcftools/mpileup/environment.yml' is unchanged +Changes in 'bcftools/mpileup/main.nf': +--- modules/nf-core/bcftools/mpileup/main.nf ++++ modules/nf-core/bcftools/mpileup/main.nf +@@ -9,7 +9,7 @@ + + input: + tuple val(meta), path(bam), path(intervals) +- tuple val(meta2), path(fasta) ++ tuple val(meta2), path(fasta), path(fai) + val save_mpileup + + output: +@@ -40,7 +40,7 @@ + $bam \\ + $intervals \\ + $mpileup \\ +- | bcftools call --output-type v $args2 \\ ++ | bcftools call --output-type v $args2 $intervals \\ + | bcftools reheader --samples sample_name.list \\ + | bcftools view --output-file ${prefix}.vcf.gz --output-type z $args3 + + +'modules/nf-core/bcftools/mpileup/meta.yml' is unchanged +'modules/nf-core/bcftools/mpileup/tests/main.nf.test' is unchanged +'modules/nf-core/bcftools/mpileup/tests/main.nf.test.snap' is unchanged +'modules/nf-core/bcftools/mpileup/tests/nextflow.config' is unchanged +'modules/nf-core/bcftools/mpileup/tests/tags.yml' is unchanged +************************************************************ diff --git a/modules/nf-core/bcftools/mpileup/environment.yml b/modules/nf-core/bcftools/mpileup/environment.yml new file mode 100644 index 00000000..5c00b116 --- /dev/null +++ b/modules/nf-core/bcftools/mpileup/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bcftools=1.20 diff --git a/modules/nf-core/bcftools/mpileup/main.nf b/modules/nf-core/bcftools/mpileup/main.nf new file mode 100644 index 00000000..0bddc95f --- /dev/null +++ b/modules/nf-core/bcftools/mpileup/main.nf @@ -0,0 +1,72 @@ +process BCFTOOLS_MPILEUP { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.20--h8b25389_0': + 'biocontainers/bcftools:1.20--h8b25389_0' }" + + input: + tuple val(meta), path(bam), path(intervals) + tuple val(meta2), path(fasta), path(fai) + val save_mpileup + + output: + tuple val(meta), path("*vcf.gz") , emit: vcf + tuple val(meta), path("*vcf.gz.tbi") , emit: tbi + tuple val(meta), path("*stats.txt") , emit: stats + tuple val(meta), path("*.mpileup.gz"), emit: mpileup, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def args3 = task.ext.args3 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def mpileup = save_mpileup ? "| tee ${prefix}.mpileup" : "" + def bgzip_mpileup = save_mpileup ? "bgzip ${prefix}.mpileup" : "" + def intervals = intervals ? "-T ${intervals}" : "" + """ + echo "${meta.id}" > sample_name.list + + bcftools \\ + mpileup \\ + --fasta-ref $fasta \\ + $args \\ + $bam \\ + $intervals \\ + $mpileup \\ + | bcftools call --output-type v $args2 $intervals \\ + | bcftools reheader --samples sample_name.list \\ + | bcftools view --output-file ${prefix}.vcf.gz --output-type z $args3 + + $bgzip_mpileup + + tabix -p vcf -f ${prefix}.vcf.gz + + bcftools stats ${prefix}.vcf.gz > ${prefix}.bcftools_stats.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bcftools_stats.txt + echo "" | gzip > ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + echo "" | gzip > ${prefix}.mpileup.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/mpileup/meta.yml b/modules/nf-core/bcftools/mpileup/meta.yml new file mode 100644 index 00000000..febcb33f --- /dev/null +++ b/modules/nf-core/bcftools/mpileup/meta.yml @@ -0,0 +1,93 @@ +name: bcftools_mpileup +description: Compresses VCF files +keywords: + - variant calling + - mpileup + - VCF +tools: + - mpileup: + description: | + Generates genotype likelihoods at each genomic position with coverage. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Input BAM file + pattern: "*.{bam}" + - intervals: + type: file + description: Input intervals file. A file (commonly '.bed') containing regions + to subset + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: FASTA reference file + pattern: "*.{fasta,fa}" + - - save_mpileup: + type: boolean + description: Save mpileup file generated by bcftools mpileup +output: + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*vcf.gz": + type: file + description: VCF gzipped output file + pattern: "*.{vcf.gz}" + - tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*vcf.gz.tbi": + type: file + description: tabix index file + pattern: "*.{vcf.gz.tbi}" + - stats: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*stats.txt": + type: file + description: Text output file containing stats + pattern: "*{stats.txt}" + - mpileup: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.mpileup.gz": + type: file + description: mpileup gzipped output for all positions + pattern: "{*.mpileup.gz}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/bcftools/mpileup/tests/main.nf.test b/modules/nf-core/bcftools/mpileup/tests/main.nf.test new file mode 100644 index 00000000..665a349f --- /dev/null +++ b/modules/nf-core/bcftools/mpileup/tests/main.nf.test @@ -0,0 +1,208 @@ +nextflow_process { + + name "Test Process BCFTOOLS_MPILEUP" + script "../main.nf" + process "BCFTOOLS_MPILEUP" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/mpileup" + + config "./nextflow.config" + + test("sarscov2 - [bam, []], fasta, false") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'sarscov2' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.vcf[0][1]).name).match("bam_fasta_false.vcf.gz") }, + { assert snapshot(file(process.out.tbi[0][1]).name).match("bam_fasta_false.vcf.gz.tbi") }, + { assert snapshot(file(process.out.stats[0][1]).name).match("bam_fasta_false.bcftools_stats.txt") }, + { assert snapshot(process.out.versions).match("bam_fasta_false_versions") } + ) + } + + } + + test("sarscov2 - [bam, []], fasta, false stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'sarscov2' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.vcf[0][1]).name).match("bam_fasta_false_stub.vcf.gz") }, + { assert snapshot(file(process.out.tbi[0][1]).name).match("bam_fasta_false_stub.vcf.gz.tbi") }, + { assert snapshot(file(process.out.stats[0][1]).name).match("bam_fasta_false_stub.bcftools_stats.txt") }, + { assert snapshot(process.out.versions).match("bam_fasta_false_stub_versions") } + ) + } + + } + + test("sarscov2 - [bam, []], fasta, true") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'sarscov2' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.vcf[0][1]).name).match("bam_bed_fasta_true_stub.vcf.gz") }, + { assert snapshot(file(process.out.tbi[0][1]).name).match("bam_bed_fasta_true_stub.vcf.gz.tbi") }, + { assert snapshot(file(process.out.stats[0][1]).name).match("bam_bed_fasta_true_stub.bcftools_stats.txt") }, + { assert snapshot(file(process.out.mpileup[0][1]).name).match("bam_bed_fasta_true_stub.mpileup.gz") }, + { assert snapshot(process.out.versions).match("bam_bed_fasta_true_stub_versions") } + ) + } + + } + + test("sarscov2 - [bam, []], fasta, true stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'sarscov2' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.vcf[0][1]).name).match("bam_bed_fasta_true.vcf.gz") }, + { assert snapshot(file(process.out.tbi[0][1]).name).match("bam_bed_fasta_true.vcf.gz.tbi") }, + { assert snapshot(file(process.out.stats[0][1]).name).match("bam_bed_fasta_true.bcftools_stats.txt") }, + { assert snapshot(file(process.out.mpileup[0][1]).name).match("bam_bed_fasta_true.mpileup.gz") }, + { assert snapshot(process.out.versions).match("bam_bed_fasta_true_versions") } + ) + } + + } + + test("sarscov2 - [bam, bed], fasta, false") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) + ] + input[1] = [ + [ id:'sarscov2' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.vcf[0][1]).name).match("bam_bed_fasta_false.vcf.gz") }, + { assert snapshot(file(process.out.tbi[0][1]).name).match("bam_bed_fasta_false.vcf.gz.tbi") }, + { assert snapshot(file(process.out.stats[0][1]).name).match("bam_bed_fasta_false.bcftools_stats.txt") }, + { assert snapshot(process.out.versions).match("bam_bed_fasta_false_versions") } + ) + } + + } + + test("sarscov2 - [bam, bed], fasta, false stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) + ] + input[1] = [ + [ id:'sarscov2' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.vcf[0][1]).name).match("bam_bed_fasta_false_stub.vcf.gz") }, + { assert snapshot(file(process.out.tbi[0][1]).name).match("bam_bed_fasta_false_stub.vcf.gz.tbi") }, + { assert snapshot(file(process.out.stats[0][1]).name).match("bam_bed_fasta_false_stub.bcftools_stats.txt") }, + { assert snapshot(process.out.versions).match("bam_bed_fasta_false_stub_versions") } + ) + } + + } + +} diff --git a/modules/nf-core/bcftools/mpileup/tests/main.nf.test.snap b/modules/nf-core/bcftools/mpileup/tests/main.nf.test.snap new file mode 100644 index 00000000..a7726898 --- /dev/null +++ b/modules/nf-core/bcftools/mpileup/tests/main.nf.test.snap @@ -0,0 +1,274 @@ +{ + "bam_bed_fasta_true.vcf.gz.tbi": { + "content": [ + "test.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T11:45:32.654601222" + }, + "bam_bed_fasta_false_stub.vcf.gz": { + "content": [ + "test.vcf.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T11:46:19.532461322" + }, + "bam_fasta_false_stub.vcf.gz.tbi": { + "content": [ + "test.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T11:44:44.944919263" + }, + "bam_bed_fasta_false_stub.bcftools_stats.txt": { + "content": [ + "test.bcftools_stats.txt" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:57.844573" + }, + "bam_bed_fasta_true_stub.mpileup.gz": { + "content": [ + "test.mpileup.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:39.462382" + }, + "bam_bed_fasta_true.vcf.gz": { + "content": [ + "test.vcf.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T11:45:32.596363535" + }, + "bam_bed_fasta_true_stub.vcf.gz": { + "content": [ + "test.vcf.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T11:45:10.034842649" + }, + "bam_fasta_false_versions": { + "content": [ + [ + "versions.yml:md5,6af9a67cd12c721ccc9702c17bc2f3a5" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T15:14:23.424052671" + }, + "bam_fasta_false_stub.bcftools_stats.txt": { + "content": [ + "test.bcftools_stats.txt" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:39:15.746204" + }, + "bam_bed_fasta_false_versions": { + "content": [ + [ + "versions.yml:md5,6af9a67cd12c721ccc9702c17bc2f3a5" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T15:14:44.101963218" + }, + "bam_bed_fasta_false.vcf.gz": { + "content": [ + "test.vcf.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T11:45:57.39416797" + }, + "bam_bed_fasta_true_versions": { + "content": [ + [ + "versions.yml:md5,6af9a67cd12c721ccc9702c17bc2f3a5" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T15:14:38.630394619" + }, + "bam_fasta_false.vcf.gz": { + "content": [ + "test.vcf.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T11:44:21.337711533" + }, + "bam_fasta_false.bcftools_stats.txt": { + "content": [ + "test.bcftools_stats.txt" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:39:10.123726" + }, + "bam_bed_fasta_false.bcftools_stats.txt": { + "content": [ + "test.bcftools_stats.txt" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:51.761517" + }, + "bam_bed_fasta_false_stub.vcf.gz.tbi": { + "content": [ + "test.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T11:46:19.593445488" + }, + "bam_bed_fasta_false.vcf.gz.tbi": { + "content": [ + "test.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T11:45:57.444615176" + }, + "bam_fasta_false_stub.vcf.gz": { + "content": [ + "test.vcf.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T11:44:44.888373837" + }, + "bam_bed_fasta_true_stub.bcftools_stats.txt": { + "content": [ + "test.bcftools_stats.txt" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:39.453121" + }, + "bam_fasta_false.vcf.gz.tbi": { + "content": [ + "test.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T11:44:21.401424919" + }, + "bam_fasta_false_stub_versions": { + "content": [ + [ + "versions.yml:md5,6af9a67cd12c721ccc9702c17bc2f3a5" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T15:14:28.530439931" + }, + "bam_bed_fasta_true.bcftools_stats.txt": { + "content": [ + "test.bcftools_stats.txt" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:45.18304" + }, + "bam_bed_fasta_true_stub.vcf.gz.tbi": { + "content": [ + "test.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T11:45:10.101920455" + }, + "bam_bed_fasta_false_stub_versions": { + "content": [ + [ + "versions.yml:md5,6af9a67cd12c721ccc9702c17bc2f3a5" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T15:14:49.139377519" + }, + "bam_bed_fasta_true.mpileup.gz": { + "content": [ + "test.mpileup.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:45.192888" + }, + "bam_bed_fasta_true_stub_versions": { + "content": [ + [ + "versions.yml:md5,6af9a67cd12c721ccc9702c17bc2f3a5" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T15:14:33.646218607" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/mpileup/tests/nextflow.config b/modules/nf-core/bcftools/mpileup/tests/nextflow.config new file mode 100644 index 00000000..a7ba19fe --- /dev/null +++ b/modules/nf-core/bcftools/mpileup/tests/nextflow.config @@ -0,0 +1,4 @@ +process { + ext.args2 = '--no-version --ploidy 1 --multiallelic-caller' + ext.args3 = '--no-version' +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/mpileup/tests/tags.yml b/modules/nf-core/bcftools/mpileup/tests/tags.yml new file mode 100644 index 00000000..07b91f98 --- /dev/null +++ b/modules/nf-core/bcftools/mpileup/tests/tags.yml @@ -0,0 +1,2 @@ +bcftools/mpileup: + - "modules/nf-core/bcftools/mpileup/**" diff --git a/modules/nf-core/bcftools/norm/environment.yml b/modules/nf-core/bcftools/norm/environment.yml new file mode 100644 index 00000000..5c00b116 --- /dev/null +++ b/modules/nf-core/bcftools/norm/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bcftools=1.20 diff --git a/modules/nf-core/bcftools/norm/main.nf b/modules/nf-core/bcftools/norm/main.nf new file mode 100644 index 00000000..bd7a2501 --- /dev/null +++ b/modules/nf-core/bcftools/norm/main.nf @@ -0,0 +1,70 @@ +process BCFTOOLS_NORM { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.20--h8b25389_0': + 'biocontainers/bcftools:1.20--h8b25389_0' }" + + input: + tuple val(meta), path(vcf), path(tbi) + tuple val(meta2), path(fasta) + + output: + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi, optional: true + tuple val(meta), path("*.csi") , emit: csi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '--output-type z' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf.gz" + + """ + bcftools norm \\ + --fasta-ref ${fasta} \\ + --output ${prefix}.${extension} \\ + $args \\ + --threads $task.cpus \\ + ${vcf} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '--output-type z' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf.gz" + def index = args.contains("--write-index=tbi") || args.contains("-W=tbi") ? "tbi" : + args.contains("--write-index=csi") || args.contains("-W=csi") ? "csi" : + args.contains("--write-index") || args.contains("-W") ? "csi" : + "" + def create_cmd = extension.endsWith(".gz") ? "echo '' | gzip >" : "touch" + def create_index = extension.endsWith(".gz") && index.matches("csi|tbi") ? "touch ${prefix}.${extension}.${index}" : "" + + """ + ${create_cmd} ${prefix}.${extension} + ${create_index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/norm/meta.yml b/modules/nf-core/bcftools/norm/meta.yml new file mode 100644 index 00000000..b6edeb4a --- /dev/null +++ b/modules/nf-core/bcftools/norm/meta.yml @@ -0,0 +1,85 @@ +name: bcftools_norm +description: Normalize VCF file +keywords: + - normalize + - norm + - variant calling + - VCF +tools: + - norm: + description: | + Normalize VCF files. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: | + The vcf file to be normalized + e.g. 'file1.vcf' + pattern: "*.{vcf,vcf.gz}" + - tbi: + type: file + description: | + An optional index of the VCF file (for when the VCF is compressed) + pattern: "*.vcf.gz.tbi" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: FASTA reference file + pattern: "*.{fasta,fa}" +output: + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{vcf,vcf.gz,bcf,bcf.gz}": + type: file + description: One of uncompressed VCF (.vcf), compressed VCF (.vcf.gz), compressed + BCF (.bcf.gz) or uncompressed BCF (.bcf) normalized output file + pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}" + - tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tbi": + type: file + description: Alternative VCF file index + pattern: "*.tbi" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: Default VCF file index + pattern: "*.csi" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@abhi18av" + - "@ramprasadn" +maintainers: + - "@abhi18av" + - "@ramprasadn" diff --git a/modules/nf-core/bcftools/norm/tests/main.nf.test b/modules/nf-core/bcftools/norm/tests/main.nf.test new file mode 100644 index 00000000..dbc41502 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/main.nf.test @@ -0,0 +1,563 @@ +nextflow_process { + + name "Test Process BCFTOOLS_NORM" + script "../main.nf" + process "BCFTOOLS_NORM" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/norm" + + test("sarscov2 - [ vcf, [] ], fasta") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index") { + + config "./vcf_gz_index.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } } + ).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_csi") { + + config "./vcf_gz_index_csi.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } } + ).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_tbi") { + + config "./vcf_gz_index_tbi.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } } + ).match() }, + { assert process.out.tbi[0][1].endsWith(".tbi") } + ) + } + + } + + test("sarscov2 - [ vcf, tbi ], fasta") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [ vcf, tbi ], fasta - vcf output") { + + config "./nextflow.vcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [ vcf, tbi ], fasta - vcf_gz output") { + + config "./nextflow.vcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [ vcf, tbi ], fasta - bcf output") { + + config "./nextflow.bcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [ vcf, tbi ], fasta - bcf_gz output") { + + config "./nextflow.bcf_gz.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [ vcf, [] ], fasta - stub") { + + config "./nextflow.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [ vcf, tbi ], fasta -stub") { + + config "./nextflow.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [ vcf, tbi ], fasta - vcf output -stub") { + + config "./nextflow.vcf.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [ vcf, tbi ], fasta - vcf_gz output - stub") { + + config "./nextflow.vcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [ vcf, tbi ], fasta - bcf output - stub") { + + config "./nextflow.bcf.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [ vcf, tbi ], fasta - bcf_gz output - stub") { + + config "./nextflow.bcf_gz.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index - stub") { + + config "./vcf_gz_index.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_csi - stub") { + + config "./vcf_gz_index_csi.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_tbi - stub") { + + config "./vcf_gz_index_tbi.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.tbi[0][1].endsWith(".tbi") } + ) + } + + } + + +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/norm/tests/main.nf.test.snap b/modules/nf-core/bcftools/norm/tests/main.nf.test.snap new file mode 100644 index 00000000..3be52116 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/main.nf.test.snap @@ -0,0 +1,758 @@ +{ + "sarscov2 - [ vcf, tbi ], fasta - vcf_gz output - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T14:38:42.639095032" + }, + "sarscov2 - [ vcf, [] ], fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T14:38:05.448449893" + }, + "sarscov2 - [ vcf, tbi ], fasta - vcf output": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T14:37:12.741719961" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T14:39:22.875147941" + }, + "sarscov2 - [ vcf, tbi ], fasta - vcf_gz output": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T08:15:23.38765384" + }, + "sarscov2 - [ vcf, [] ], fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T14:36:21.519977754" + }, + "sarscov2 - [ vcf, tbi ], fasta - vcf output -stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T14:38:27.8230994" + }, + "sarscov2 - [ vcf, tbi ], fasta - bcf_gz output": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.bcf:md5,f35545c26a788b5eb697d9c0490339d9" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.bcf:md5,f35545c26a788b5eb697d9c0490339d9" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T14:37:53.942403192" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_csi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T13:56:05.3799488" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_tbi": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T13:53:28.356741947" + }, + "sarscov2 - [ vcf, tbi ], fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T14:36:58.39445154" + }, + "sarscov2 - [ vcf, tbi ], fasta -stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T14:38:16.259516142" + }, + "sarscov2 - [ vcf, tbi ], fasta - bcf_gz output - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T14:39:10.503208929" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T07:52:58.381931979" + }, + "sarscov2 - [ vcf, tbi ], fasta - bcf output - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.bcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.bcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T14:38:59.121377258" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_tbi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T13:56:16.404380471" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_csi": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T13:53:09.808834237" + }, + "sarscov2 - [ vcf, tbi ], fasta - bcf output": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.bcf.gz:md5,638c3c25bdd495c90ecbccb69ee77f07" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.bcf.gz:md5,638c3c25bdd495c90ecbccb69ee77f07" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T14:37:42.141945244" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/norm/tests/nextflow.bcf.config b/modules/nf-core/bcftools/norm/tests/nextflow.bcf.config new file mode 100644 index 00000000..b79af868 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/nextflow.bcf.config @@ -0,0 +1,4 @@ +process { + ext.args = '-m -any --output-type b --no-version' + ext.prefix = "test_norm" +} diff --git a/modules/nf-core/bcftools/norm/tests/nextflow.bcf_gz.config b/modules/nf-core/bcftools/norm/tests/nextflow.bcf_gz.config new file mode 100644 index 00000000..f36f397c --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/nextflow.bcf_gz.config @@ -0,0 +1,4 @@ +process { + ext.args = '-m -any --output-type u --no-version' + ext.prefix = "test_norm" +} diff --git a/modules/nf-core/bcftools/norm/tests/nextflow.config b/modules/nf-core/bcftools/norm/tests/nextflow.config new file mode 100644 index 00000000..510803b4 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/nextflow.config @@ -0,0 +1,4 @@ +process { + ext.args = '-m -any --no-version' + ext.prefix = "test_norm" +} diff --git a/modules/nf-core/bcftools/norm/tests/nextflow.vcf.config b/modules/nf-core/bcftools/norm/tests/nextflow.vcf.config new file mode 100644 index 00000000..10bf93e3 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/nextflow.vcf.config @@ -0,0 +1,4 @@ +process { + ext.args = '-m -any --output-type v --no-version' + ext.prefix = "test_norm" +} diff --git a/modules/nf-core/bcftools/norm/tests/nextflow.vcf_gz.config b/modules/nf-core/bcftools/norm/tests/nextflow.vcf_gz.config new file mode 100644 index 00000000..b31dd2de --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/nextflow.vcf_gz.config @@ -0,0 +1,4 @@ +process { + ext.args = '-m -any --output-type z ---no-version' + ext.prefix = "test_norm" +} diff --git a/modules/nf-core/bcftools/norm/tests/tags.yml b/modules/nf-core/bcftools/norm/tests/tags.yml new file mode 100644 index 00000000..f6f5e356 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/tags.yml @@ -0,0 +1,2 @@ +bcftools/norm: + - "modules/nf-core/bcftools/norm/**" diff --git a/modules/nf-core/bcftools/norm/tests/vcf_gz_index.config b/modules/nf-core/bcftools/norm/tests/vcf_gz_index.config new file mode 100644 index 00000000..7dd696ee --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/vcf_gz_index.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index --no-version" +} diff --git a/modules/nf-core/bcftools/norm/tests/vcf_gz_index_csi.config b/modules/nf-core/bcftools/norm/tests/vcf_gz_index_csi.config new file mode 100644 index 00000000..aebffb6f --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/vcf_gz_index_csi.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index=csi --no-version" +} diff --git a/modules/nf-core/bcftools/norm/tests/vcf_gz_index_tbi.config b/modules/nf-core/bcftools/norm/tests/vcf_gz_index_tbi.config new file mode 100644 index 00000000..b192ae7d --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/vcf_gz_index_tbi.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index=tbi --no-version" +} diff --git a/modules/nf-core/bcftools/pluginsplit/bcftools-pluginsplit.diff b/modules/nf-core/bcftools/pluginsplit/bcftools-pluginsplit.diff new file mode 100644 index 00000000..5e3092d8 --- /dev/null +++ b/modules/nf-core/bcftools/pluginsplit/bcftools-pluginsplit.diff @@ -0,0 +1,22 @@ +Changes in module 'nf-core/bcftools/pluginsplit' +'modules/nf-core/bcftools/pluginsplit/environment.yml' is unchanged +Changes in 'bcftools/pluginsplit/main.nf': +--- modules/nf-core/bcftools/pluginsplit/main.nf ++++ modules/nf-core/bcftools/pluginsplit/main.nf +@@ -8,8 +8,7 @@ + 'biocontainers/bcftools:1.20--h8b25389_0' }" + + input: +- tuple val(meta), path(vcf, stageAs: "input/*"), path(tbi, stageAs: "input/*") +- path(samples) ++ tuple val(meta), path(vcf, stageAs: "input/*"), path(tbi, stageAs: "input/*"), path(samples) + path(groups) + path(regions) + path(targets) + +'modules/nf-core/bcftools/pluginsplit/meta.yml' is unchanged +'modules/nf-core/bcftools/pluginsplit/tests/main.nf.test' is unchanged +'modules/nf-core/bcftools/pluginsplit/tests/main.nf.test.snap' is unchanged +'modules/nf-core/bcftools/pluginsplit/tests/nextflow.config' is unchanged +'modules/nf-core/bcftools/pluginsplit/tests/tags.yml' is unchanged +************************************************************ diff --git a/modules/nf-core/bcftools/pluginsplit/environment.yml b/modules/nf-core/bcftools/pluginsplit/environment.yml new file mode 100644 index 00000000..5c00b116 --- /dev/null +++ b/modules/nf-core/bcftools/pluginsplit/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bcftools=1.20 diff --git a/modules/nf-core/bcftools/pluginsplit/main.nf b/modules/nf-core/bcftools/pluginsplit/main.nf new file mode 100644 index 00000000..d1977fbc --- /dev/null +++ b/modules/nf-core/bcftools/pluginsplit/main.nf @@ -0,0 +1,78 @@ +process BCFTOOLS_PLUGINSPLIT { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.20--h8b25389_0': + 'biocontainers/bcftools:1.20--h8b25389_0' }" + + input: + tuple val(meta), path(vcf, stageAs: "input/*"), path(tbi, stageAs: "input/*"), path(samples) + path(groups) + path(regions) + path(targets) + + output: + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi, optional: true + tuple val(meta), path("*.csi") , emit: csi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def samples_arg = samples ? "--samples-file ${samples}" : "" + def groups_arg = groups ? "--groups-file ${groups}" : "" + def regions_arg = regions ? "--regions-file ${regions}" : "" + def targets_arg = targets ? "--targets-file ${targets}" : "" + + """ + bcftools plugin split \\ + ${args} \\ + ${vcf} \\ + ${samples_arg} \\ + ${groups_arg} \\ + ${regions_arg} \\ + ${targets_arg} \\ + --output . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + def index = args.contains("--write-index=tbi") || args.contains("-W=tbi") ? "tbi" : + args.contains("--write-index=csi") || args.contains("-W=csi") ? "csi" : + args.contains("--write-index") || args.contains("-W") ? "csi" : + "" + def determination_file = samples ?: targets + def create_cmd = extension.matches("vcf|bcf") ? "touch " : "echo '' | gzip > " + """ + cut -f 3 ${determination_file} | sed -e 's/\$/.${extension}/' > files.txt + while IFS= read -r filename; + do ${create_cmd} "./\$filename"; + if [ -n "${index}" ]; then + index_file=\$(sed -e 's/\$/.${index}/' <<< \$filename); + touch ./\$index_file; + fi; + done < files.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/pluginsplit/meta.yml b/modules/nf-core/bcftools/pluginsplit/meta.yml new file mode 100644 index 00000000..41f76658 --- /dev/null +++ b/modules/nf-core/bcftools/pluginsplit/meta.yml @@ -0,0 +1,96 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: "bcftools_pluginsplit" +description: Split VCF by sample, creating single- or multi-sample VCFs. +keywords: + - split + - vcf + - genomics +tools: + - pluginsplit: + description: | + Split VCF by sample, creating single- or multi-sample VCFs. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: The VCF file to split + pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}" + - tbi: + type: file + description: OPTIONAL - The index of the input VCF/BCF + pattern: "*.tbi" + - - samples: + type: file + description: | + A tab-separated file determining which samples should be in which output file + column 1: The sample name(s) in the input file + column 2: The sample name(s) to use in the output file (use `-` to keep the original name) + column 3: The name of the output file + Either this or a groups file should be given + pattern: "*" + - - groups: + type: file + description: | + A tab-separated file determining which samples should be in which output file(s) + column 1: The sample name(s) in the input file + column 2: The sample name(s) to use in the output file (use `-` to keep the original name) + column 3: The name of the output file(s) + Either this or a samples file should be given + pattern: "*" + - - regions: + type: file + description: A BED file containing regions to use + pattern: "*.bed" + - - targets: + type: file + description: A BED file containing regions to use (but streams rather than index-jumps) + pattern: "*.bed" +output: + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{vcf,vcf.gz,bcf,bcf.gz}": + type: file + description: The resulting VCF files from the split + pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}" + - tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tbi": + type: file + description: TBI file + pattern: "*.tbi" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: CSI file + pattern: "*.csi" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@nvnieuwk" +maintainers: + - "@nvnieuwk" diff --git a/modules/nf-core/bcftools/pluginsplit/tests/main.nf.test b/modules/nf-core/bcftools/pluginsplit/tests/main.nf.test new file mode 100644 index 00000000..e7ae574e --- /dev/null +++ b/modules/nf-core/bcftools/pluginsplit/tests/main.nf.test @@ -0,0 +1,216 @@ +nextflow_process { + + name "Test Process BCFTOOLS_PLUGINSPLIT" + script "../main.nf" + process "BCFTOOLS_PLUGINSPLIT" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/pluginsplit" + + test("homo_sapiens - [ vcf, tbi ], samples, [], [], []") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = Channel.of("normal\t-\tnormal", "tumour\t-\ttumour") + .collectFile(name:"samples.txt", newLine:true) + input[2] = [] + input[3] = [] + input[4] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - [ vcf, tbi ], [], groups, regions, targets") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = Channel.of("normal\t-\tnormal", "tumour\t-\ttumour") + .collectFile(name:"samples.txt", newLine:true) + input[3] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + input[4] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.match).match() } + + ) + } + + } + + test("homo_sapiens - [ vcf, tbi ], [], groups, regions, targets - tbi") { + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = Channel.of("normal\t-\tnormal", "tumour\t-\ttumour") + .collectFile(name:"samples.txt", newLine:true) + input[3] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + input[4] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf.collect{ it[1].collect { file(it).name } }, + process.out.tbi.collect{ it[1].collect { file(it).name } }, + ).match() } + ) + } + + } + + test("homo_sapiens - [ vcf, tbi ], [], [], [], [], - error no sample") { + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + input[4] = [] + """ + } + } + + then { + assertAll( + { assert process.failed }, + { assert process.errorReport.contains("No samples to split: input/dbsnp_146.hg38.vcf.gz") } + ) + } + + } + + test("homo_sapiens - [ vcf, tbi ], samples, [], [], [] -stub") { + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = Channel.of("normal\t-\tnormal", "tumour\t-\ttumour") + .collectFile(name:"samples.txt", newLine:true) + input[2] = [] + input[3] = [] + input[4] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - [ vcf, tbi ], [], groups, regions, targets -stub") { + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = Channel.of("normal\t-\tnormal", "tumour\t-\ttumour") + .collectFile(name:"samples.txt", newLine:true) + input[3] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + input[4] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - [ vcf, tbi ], [], groups, regions, targets - tbi -stub") { + config "./nextflow.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = Channel.of("normal\t-\tnormal", "tumour\t-\ttumour") + .collectFile(name:"samples.txt", newLine:true) + input[3] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + input[4] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } +} diff --git a/modules/nf-core/bcftools/pluginsplit/tests/main.nf.test.snap b/modules/nf-core/bcftools/pluginsplit/tests/main.nf.test.snap new file mode 100644 index 00000000..b915b7cf --- /dev/null +++ b/modules/nf-core/bcftools/pluginsplit/tests/main.nf.test.snap @@ -0,0 +1,261 @@ +{ + "homo_sapiens - [ vcf, tbi ], samples, [], [], [] -stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "normal.vcf:md5,d41d8cd98f00b204e9800998ecf8427e", + "tumour.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,8625f8c08503e47d029d48430c0bfccc" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "normal.vcf:md5,d41d8cd98f00b204e9800998ecf8427e", + "tumour.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,8625f8c08503e47d029d48430c0bfccc" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-20T14:56:54.383979416" + }, + "homo_sapiens - [ vcf, tbi ], [], groups, regions, targets - tbi": { + "content": [ + [ + [ + "normal.vcf.gz", + "tumour.vcf.gz" + ] + ], + [ + [ + "normal.vcf.gz.tbi", + "tumour.vcf.gz.tbi" + ] + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-20T14:56:44.796391578" + }, + "homo_sapiens - [ vcf, tbi ], [], groups, regions, targets": { + "content": null, + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-20T14:56:36.709842966" + }, + "homo_sapiens - [ vcf, tbi ], [], groups, regions, targets - tbi -stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "15000.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "40001.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "15000.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "40001.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,8625f8c08503e47d029d48430c0bfccc" + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "15000.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "40001.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "15000.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "40001.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "versions": [ + "versions.yml:md5,8625f8c08503e47d029d48430c0bfccc" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-20T14:57:11.163588435" + }, + "homo_sapiens - [ vcf, tbi ], samples, [], [], []": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "normal.vcf:md5,69cfc4bf92bf3e2847081a2026a4d3bb", + "tumour.vcf:md5,08fa5c8d5561c2a8d7c300cb0eea1042" + ] + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,8625f8c08503e47d029d48430c0bfccc" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "normal.vcf:md5,69cfc4bf92bf3e2847081a2026a4d3bb", + "tumour.vcf:md5,08fa5c8d5561c2a8d7c300cb0eea1042" + ] + ] + ], + "versions": [ + "versions.yml:md5,8625f8c08503e47d029d48430c0bfccc" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-20T14:56:27.978161766" + }, + "homo_sapiens - [ vcf, tbi ], [], groups, regions, targets -stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "15000.vcf:md5,d41d8cd98f00b204e9800998ecf8427e", + "40001.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,8625f8c08503e47d029d48430c0bfccc" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "15000.vcf:md5,d41d8cd98f00b204e9800998ecf8427e", + "40001.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,8625f8c08503e47d029d48430c0bfccc" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-20T14:57:02.456908152" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/pluginsplit/tests/nextflow.config b/modules/nf-core/bcftools/pluginsplit/tests/nextflow.config new file mode 100644 index 00000000..9b9a4783 --- /dev/null +++ b/modules/nf-core/bcftools/pluginsplit/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'BCFTOOLS_PLUGINSPLIT' { + ext.args = '--write-index=tbi --output-type z' + } +} diff --git a/modules/nf-core/bcftools/pluginsplit/tests/tags.yml b/modules/nf-core/bcftools/pluginsplit/tests/tags.yml new file mode 100644 index 00000000..2f29ef18 --- /dev/null +++ b/modules/nf-core/bcftools/pluginsplit/tests/tags.yml @@ -0,0 +1,2 @@ +bcftools/pluginsplit: + - "modules/nf-core/bcftools/pluginsplit/**" diff --git a/modules/nf-core/bcftools/query/environment.yml b/modules/nf-core/bcftools/query/environment.yml new file mode 100644 index 00000000..5c00b116 --- /dev/null +++ b/modules/nf-core/bcftools/query/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bcftools=1.20 diff --git a/modules/nf-core/bcftools/query/main.nf b/modules/nf-core/bcftools/query/main.nf new file mode 100644 index 00000000..58019f4d --- /dev/null +++ b/modules/nf-core/bcftools/query/main.nf @@ -0,0 +1,56 @@ +process BCFTOOLS_QUERY { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.20--h8b25389_0': + 'biocontainers/bcftools:1.20--h8b25389_0' }" + + input: + tuple val(meta), path(vcf), path(tbi) + path regions + path targets + path samples + + output: + tuple val(meta), path("*.${suffix}"), emit: output + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "txt" + def regions_file = regions ? "--regions-file ${regions}" : "" + def targets_file = targets ? "--targets-file ${targets}" : "" + def samples_file = samples ? "--samples-file ${samples}" : "" + """ + bcftools query \\ + $regions_file \\ + $targets_file \\ + $samples_file \\ + $args \\ + $vcf \\ + > ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "txt" + """ + touch ${prefix}.${suffix} \\ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/query/meta.yml b/modules/nf-core/bcftools/query/meta.yml new file mode 100644 index 00000000..279b3205 --- /dev/null +++ b/modules/nf-core/bcftools/query/meta.yml @@ -0,0 +1,67 @@ +name: bcftools_query +description: Extracts fields from VCF or BCF files and outputs them in user-defined + format. +keywords: + - query + - variant calling + - bcftools + - VCF +tools: + - query: + description: | + Extracts fields from VCF or BCF files and outputs them in user-defined format. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: | + The vcf file to be qeuried. + pattern: "*.{vcf.gz, vcf}" + - tbi: + type: file + description: | + The tab index for the VCF file to be inspected. + pattern: "*.tbi" + - - regions: + type: file + description: | + Optionally, restrict the operation to regions listed in this file. + - - targets: + type: file + description: | + Optionally, restrict the operation to regions listed in this file (doesn't rely upon index files) + - - samples: + type: file + description: | + Optional, file of sample names to be included or excluded. + e.g. 'file.tsv' +output: + - output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.${suffix}": + type: file + description: BCFTools query output file + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@abhi18av" + - "@drpatelh" +maintainers: + - "@abhi18av" + - "@drpatelh" diff --git a/modules/nf-core/bcftools/query/tests/main.nf.test b/modules/nf-core/bcftools/query/tests/main.nf.test new file mode 100644 index 00000000..39e67b35 --- /dev/null +++ b/modules/nf-core/bcftools/query/tests/main.nf.test @@ -0,0 +1,101 @@ +nextflow_process { + + name "Test Process BCFTOOLS_QUERY" + script "../main.nf" + process "BCFTOOLS_QUERY" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/query" + + config "./nextflow.config" + + test("sarscov2 - [vcf, tbi], [], [], []") { + + when { + process { + """ + input[0] = [ + [ id:'out' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.output, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], vcf, tsv, []") { + + when { + process { + """ + input[0] = [ + [ id:'out' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.targets.tsv.gz', checkIfExists: true) + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.output, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - stub") { + + when { + process { + """ + input[0] = [ + [ id:'out' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.output[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bcftools/query/tests/main.nf.test.snap b/modules/nf-core/bcftools/query/tests/main.nf.test.snap new file mode 100644 index 00000000..3ead1f2c --- /dev/null +++ b/modules/nf-core/bcftools/query/tests/main.nf.test.snap @@ -0,0 +1,55 @@ +{ + "sarscov2 - [vcf, tbi], vcf, tsv, []": { + "content": [ + [ + [ + { + "id": "out" + }, + "out.txt:md5,75a6bd0084e2e1838cf7baba11b99d19" + ] + ], + [ + "versions.yml:md5,3d93ea9cd5d314743254618b49e4bd16" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T15:15:44.916249758" + }, + "sarscov2 - [vcf, tbi], [], [], [] - stub": { + "content": [ + "out.txt", + [ + "versions.yml:md5,3d93ea9cd5d314743254618b49e4bd16" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T15:15:49.932359271" + }, + "sarscov2 - [vcf, tbi], [], [], []": { + "content": [ + [ + [ + { + "id": "out" + }, + "out.txt:md5,87a2ab194e1ee3219b44e58429ec3307" + ] + ], + [ + "versions.yml:md5,3d93ea9cd5d314743254618b49e4bd16" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T15:15:39.930697926" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/query/tests/nextflow.config b/modules/nf-core/bcftools/query/tests/nextflow.config new file mode 100644 index 00000000..da81c2a0 --- /dev/null +++ b/modules/nf-core/bcftools/query/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + ext.args = "-f '%CHROM %POS %REF %ALT[%SAMPLE=%GT]'" +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/query/tests/tags.yml b/modules/nf-core/bcftools/query/tests/tags.yml new file mode 100644 index 00000000..fb9455cb --- /dev/null +++ b/modules/nf-core/bcftools/query/tests/tags.yml @@ -0,0 +1,2 @@ +bcftools/query: + - "modules/nf-core/bcftools/query/**" diff --git a/modules/nf-core/bcftools/stats/environment.yml b/modules/nf-core/bcftools/stats/environment.yml new file mode 100644 index 00000000..93357b41 --- /dev/null +++ b/modules/nf-core/bcftools/stats/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bcftools=1.20 + - bioconda::htslib=1.20 diff --git a/modules/nf-core/bcftools/stats/main.nf b/modules/nf-core/bcftools/stats/main.nf new file mode 100644 index 00000000..20e5da77 --- /dev/null +++ b/modules/nf-core/bcftools/stats/main.nf @@ -0,0 +1,60 @@ +process BCFTOOLS_STATS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.20--h8b25389_0': + 'biocontainers/bcftools:1.20--h8b25389_0' }" + + input: + tuple val(meta), path(vcf), path(tbi) + tuple val(meta2), path(regions) + tuple val(meta3), path(targets) + tuple val(meta4), path(samples) + tuple val(meta5), path(exons) + tuple val(meta6), path(fasta) + + output: + tuple val(meta), path("*stats.txt"), emit: stats + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def regions_file = regions ? "--regions-file ${regions}" : "" + def targets_file = targets ? "--targets-file ${targets}" : "" + def samples_file = samples ? "--samples-file ${samples}" : "" + def reference_fasta = fasta ? "--fasta-ref ${fasta}" : "" + def exons_file = exons ? "--exons ${exons}" : "" + """ + bcftools stats \\ + $args \\ + $regions_file \\ + $targets_file \\ + $samples_file \\ + $reference_fasta \\ + $exons_file \\ + $vcf > ${prefix}.bcftools_stats.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.bcftools_stats.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/stats/meta.yml b/modules/nf-core/bcftools/stats/meta.yml new file mode 100644 index 00000000..655a61c5 --- /dev/null +++ b/modules/nf-core/bcftools/stats/meta.yml @@ -0,0 +1,105 @@ +name: bcftools_stats +description: Generates stats from VCF files +keywords: + - variant calling + - stats + - VCF +tools: + - stats: + description: | + Parses VCF or BCF and produces text file stats which is suitable for + machine processing and can be plotted using plot-vcfstats. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: VCF input file + pattern: "*.{vcf}" + - tbi: + type: file + description: | + The tab index for the VCF file to be inspected. Optional: only required when parameter regions is chosen. + pattern: "*.tbi" + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - regions: + type: file + description: | + Optionally, restrict the operation to regions listed in this file. (VCF, BED or tab-delimited) + - - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - targets: + type: file + description: | + Optionally, restrict the operation to regions listed in this file (doesn't rely upon tbi index files) + - - meta4: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - samples: + type: file + description: | + Optional, file of sample names to be included or excluded. + e.g. 'file.tsv' + - - meta5: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - exons: + type: file + description: | + Tab-delimited file with exons for indel frameshifts (chr,beg,end; 1-based, inclusive, optionally bgzip compressed). + e.g. 'exons.tsv.gz' + - - meta6: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: | + Faidx indexed reference sequence file to determine INDEL context. + e.g. 'reference.fa' +output: + - stats: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*stats.txt": + type: file + description: Text output file containing stats + pattern: "*_{stats.txt}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@SusiJo" + - "@TCLamnidis" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@SusiJo" + - "@TCLamnidis" diff --git a/modules/nf-core/bcftools/stats/tests/main.nf.test b/modules/nf-core/bcftools/stats/tests/main.nf.test new file mode 100644 index 00000000..be618b0b --- /dev/null +++ b/modules/nf-core/bcftools/stats/tests/main.nf.test @@ -0,0 +1,182 @@ +nextflow_process { + + name "Test Process BCFTOOLS_STATS" + script "../main.nf" + process "BCFTOOLS_STATS" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/stats" + + test("sarscov2 - vcf_gz") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + []] + input[1] = [ [], [] ] + input[2] = [ [], [] ] + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = [ [], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("versions") }, + { assert snapshot(file(process.out.stats.get(0).get(1)).readLines()[0..5]).match() }, + ) + } + + } + + test("sarscov2 - vcf_gz - regions") { + + when { + process { + """ + input[0] = [ [ id:'regions_test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true)] + input[1] = [ [id:'regions_test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) ] + input[2] = [ [], [] ] + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = [ [], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("regions_versions") }, + { assert snapshot(file(process.out.stats.get(0).get(1)).readLines()[0..5]).match() }, + ) + } + + } + + test("sarscov2 - vcf_gz - targets") { + + when { + process { + """ + input[0] = [ [ id:'targets_test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] ] + input[1] = [ [], [] ] + input[2] = [ [id:'targets_test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.targets.tsv.gz', checkIfExists: true) + ] + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = [ [], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("targets_versions") }, + { assert snapshot(file(process.out.stats.get(0).get(1)).readLines()[0..5]).match() }, + ) + } + + } + + test("sarscov2 - vcf_gz - exons") { + + when { + process { + """ + input[0] = [ [ id:'exon_test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] ] + input[1] = [ [], [] ] + input[2] = [ [], [] ] + input[3] = [ [], [] ] + input[4] = [ [id: "exon_test"], + file(params.modules_testdata_base_path + 'delete_me/bcftools/stats/exons.tsv.gz', checkIfExists: true) ] + input[5] = [ [], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("exon_versions") }, + { assert snapshot(file(process.out.stats.get(0).get(1)).readLines()[0..5]).match() }, + ) + } + + } + + test("sarscov2 - vcf_gz - reference") { + + when { + process { + """ + input[0] = [ [ id:'ref_test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] ] + input[1] = [ [], [] ] + input[2] = [ [], [] ] + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = [ [id: 'ref_test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("ref_versions") }, + { assert snapshot(file(process.out.stats.get(0).get(1)).readLines()[0..5]).match() }, + ) + } + + } + + + test("sarscov2 - vcf_gz - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + []] + input[1] = [ [], [] ] + input[2] = [ [], [] ] + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = [ [], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/stats/tests/main.nf.test.snap b/modules/nf-core/bcftools/stats/tests/main.nf.test.snap new file mode 100644 index 00000000..cd8cff6d --- /dev/null +++ b/modules/nf-core/bcftools/stats/tests/main.nf.test.snap @@ -0,0 +1,180 @@ +{ + "sarscov2 - vcf_gz - reference": { + "content": [ + [ + "# This file was produced by bcftools stats (1.20+htslib-1.20) and can be plotted using plot-vcfstats.", + "# The command line was:\tbcftools stats --fasta-ref genome.fasta test.vcf.gz", + "#", + "# Definition of sets:", + "# ID\t[2]id\t[3]tab-separated file names", + "ID\t0\ttest.vcf.gz" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T18:14:35.506777837" + }, + "sarscov2 - vcf_gz - exons": { + "content": [ + [ + "# This file was produced by bcftools stats (1.20+htslib-1.20) and can be plotted using plot-vcfstats.", + "# The command line was:\tbcftools stats --exons exons.tsv.gz test.vcf.gz", + "#", + "# Definition of sets:", + "# ID\t[2]id\t[3]tab-separated file names", + "ID\t0\ttest.vcf.gz" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T18:14:30.57486244" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,17cdf9d1ad31f6b1f5935dfcc9fe7b9a" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T18:16:27.637515559" + }, + "sarscov2 - vcf_gz - targets": { + "content": [ + [ + "# This file was produced by bcftools stats (1.20+htslib-1.20) and can be plotted using plot-vcfstats.", + "# The command line was:\tbcftools stats --targets-file test2.targets.tsv.gz test.vcf.gz", + "#", + "# Definition of sets:", + "# ID\t[2]id\t[3]tab-separated file names", + "ID\t0\ttest.vcf.gz" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T18:14:25.732997442" + }, + "regions_versions": { + "content": [ + [ + "versions.yml:md5,17cdf9d1ad31f6b1f5935dfcc9fe7b9a" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T18:16:32.559884458" + }, + "targets_versions": { + "content": [ + [ + "versions.yml:md5,17cdf9d1ad31f6b1f5935dfcc9fe7b9a" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T18:16:37.512009805" + }, + "sarscov2 - vcf_gz - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bcftools_stats.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,17cdf9d1ad31f6b1f5935dfcc9fe7b9a" + ], + "stats": [ + [ + { + "id": "test" + }, + "test.bcftools_stats.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,17cdf9d1ad31f6b1f5935dfcc9fe7b9a" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T11:57:09.614976125" + }, + "exon_versions": { + "content": [ + [ + "versions.yml:md5,17cdf9d1ad31f6b1f5935dfcc9fe7b9a" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T18:16:42.347397266" + }, + "ref_versions": { + "content": [ + [ + "versions.yml:md5,17cdf9d1ad31f6b1f5935dfcc9fe7b9a" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T18:16:47.26823622" + }, + "sarscov2 - vcf_gz": { + "content": [ + [ + "# This file was produced by bcftools stats (1.20+htslib-1.20) and can be plotted using plot-vcfstats.", + "# The command line was:\tbcftools stats test.vcf.gz", + "#", + "# Definition of sets:", + "# ID\t[2]id\t[3]tab-separated file names", + "ID\t0\ttest.vcf.gz" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T18:16:27.670416598" + }, + "sarscov2 - vcf_gz - regions": { + "content": [ + [ + "# This file was produced by bcftools stats (1.20+htslib-1.20) and can be plotted using plot-vcfstats.", + "# The command line was:\tbcftools stats --regions-file test3.vcf.gz test.vcf.gz", + "#", + "# Definition of sets:", + "# ID\t[2]id\t[3]tab-separated file names", + "ID\t0\ttest.vcf.gz" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T18:14:20.759094062" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/stats/tests/tags.yml b/modules/nf-core/bcftools/stats/tests/tags.yml new file mode 100644 index 00000000..53c12d92 --- /dev/null +++ b/modules/nf-core/bcftools/stats/tests/tags.yml @@ -0,0 +1,2 @@ +bcftools/stats: + - "modules/nf-core/bcftools/stats/**" diff --git a/modules/nf-core/bcftools/view/environment.yml b/modules/nf-core/bcftools/view/environment.yml new file mode 100644 index 00000000..5c00b116 --- /dev/null +++ b/modules/nf-core/bcftools/view/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bcftools=1.20 diff --git a/modules/nf-core/bcftools/view/main.nf b/modules/nf-core/bcftools/view/main.nf new file mode 100644 index 00000000..7fe4303c --- /dev/null +++ b/modules/nf-core/bcftools/view/main.nf @@ -0,0 +1,76 @@ +process BCFTOOLS_VIEW { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.20--h8b25389_0': + 'biocontainers/bcftools:1.20--h8b25389_0' }" + + input: + tuple val(meta), path(vcf), path(index) + path(regions) + path(targets) + path(samples) + + output: + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi, optional: true + tuple val(meta), path("*.csi") , emit: csi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def regions_file = regions ? "--regions-file ${regions}" : "" + def targets_file = targets ? "--targets-file ${targets}" : "" + def samples_file = samples ? "--samples-file ${samples}" : "" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + """ + bcftools view \\ + --output ${prefix}.${extension} \\ + ${regions_file} \\ + ${targets_file} \\ + ${samples_file} \\ + $args \\ + --threads $task.cpus \\ + ${vcf} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + def index = args.contains("--write-index=tbi") || args.contains("-W=tbi") ? "tbi" : + args.contains("--write-index=csi") || args.contains("-W=csi") ? "csi" : + args.contains("--write-index") || args.contains("-W") ? "csi" : + "" + def create_cmd = extension.endsWith(".gz") ? "echo '' | gzip >" : "touch" + def create_index = extension.endsWith(".gz") && index.matches("csi|tbi") ? "touch ${prefix}.${extension}.${index}" : "" + + """ + ${create_cmd} ${prefix}.${extension} + ${create_index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/view/meta.yml b/modules/nf-core/bcftools/view/meta.yml new file mode 100644 index 00000000..aa7785f1 --- /dev/null +++ b/modules/nf-core/bcftools/view/meta.yml @@ -0,0 +1,88 @@ +name: bcftools_view +description: View, subset and filter VCF or BCF files by position and filtering expression. + Convert between VCF and BCF +keywords: + - variant calling + - view + - bcftools + - VCF +tools: + - view: + description: | + View, subset and filter VCF or BCF files by position and filtering expression. Convert between VCF and BCF + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: | + The vcf file to be inspected. + e.g. 'file.vcf' + - index: + type: file + description: | + The tab index for the VCF file to be inspected. + e.g. 'file.tbi' + - - regions: + type: file + description: | + Optionally, restrict the operation to regions listed in this file. + e.g. 'file.vcf' + - - targets: + type: file + description: | + Optionally, restrict the operation to regions listed in this file (doesn't rely upon index files) + e.g. 'file.vcf' + - - samples: + type: file + description: | + Optional, file of sample names to be included or excluded. + e.g. 'file.tsv' +output: + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{vcf,vcf.gz,bcf,bcf.gz}": + type: file + description: VCF normalized output file + pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}" + - tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tbi": + type: file + description: Alternative VCF file index + pattern: "*.tbi" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: Default VCF file index + pattern: "*.csi" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@abhi18av" +maintainers: + - "@abhi18av" diff --git a/modules/nf-core/bcftools/view/tests/main.nf.test b/modules/nf-core/bcftools/view/tests/main.nf.test new file mode 100644 index 00000000..1e60c50d --- /dev/null +++ b/modules/nf-core/bcftools/view/tests/main.nf.test @@ -0,0 +1,298 @@ +nextflow_process { + + name "Test Process BCFTOOLS_VIEW" + script "../main.nf" + process "BCFTOOLS_VIEW" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/view" + + test("sarscov2 - [vcf, tbi], [], [], []") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index") { + + config "./vcf_gz_index.config" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index_csi") { + + config "./vcf_gz_index_csi.config" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index_tbi") { + + config "./vcf_gz_index_tbi.config" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match() }, + { assert process.out.tbi[0][1].endsWith(".tbi") } + ) + } + + } + + test("sarscov2 - [vcf, tbi], vcf, tsv, []") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.targets.tsv.gz', checkIfExists: true) + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - stub") { + + config "./nextflow.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.vcf[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index - stub") { + + config "./vcf_gz_index.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index_csi - stub") { + + config "./vcf_gz_index_csi.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index_tbi - stub") { + + config "./vcf_gz_index_tbi.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.tbi[0][1].endsWith(".tbi") } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/view/tests/main.nf.test.snap b/modules/nf-core/bcftools/view/tests/main.nf.test.snap new file mode 100644 index 00000000..fec22e36 --- /dev/null +++ b/modules/nf-core/bcftools/view/tests/main.nf.test.snap @@ -0,0 +1,333 @@ +{ + "sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index_csi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,241125d00357804552689c37bbabe1f5" + ], + "csi": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,241125d00357804552689c37bbabe1f5" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T12:14:38.717458272" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index_tbi": { + "content": [ + [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + [ + + ], + [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.tbi" + ] + ], + [ + "versions.yml:md5,241125d00357804552689c37bbabe1f5" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T12:13:44.760671384" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index - stub": { + "content": [ + { + "0": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,241125d00357804552689c37bbabe1f5" + ], + "csi": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,241125d00357804552689c37bbabe1f5" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T16:06:21.669668533" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index_tbi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,241125d00357804552689c37bbabe1f5" + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,241125d00357804552689c37bbabe1f5" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T12:14:53.026083914" + }, + "sarscov2 - [vcf, tbi], vcf, tsv, []": { + "content": [ + [ + [ + { + "id": "out", + "single_end": false + }, + "out.vcf:md5,1bcbd0eff25d316ba915d06463aab17b" + ] + ], + [ + "versions.yml:md5,241125d00357804552689c37bbabe1f5" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T15:15:14.663512924" + }, + "sarscov2 - [vcf, tbi], [], [], [] - stub": { + "content": [ + "out.vcf", + [ + "versions.yml:md5,241125d00357804552689c37bbabe1f5" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T15:15:19.723448323" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index": { + "content": [ + [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.csi" + ] + ], + [ + + ], + [ + "versions.yml:md5,241125d00357804552689c37bbabe1f5" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T08:24:36.358469315" + }, + "sarscov2 - [vcf, tbi], [], [], []": { + "content": [ + [ + [ + { + "id": "out", + "single_end": false + }, + "out.vcf:md5,8e722884ffb75155212a3fc053918766" + ] + ], + [ + "versions.yml:md5,241125d00357804552689c37bbabe1f5" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T15:15:09.588867653" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index_csi": { + "content": [ + [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.csi" + ] + ], + [ + + ], + [ + "versions.yml:md5,241125d00357804552689c37bbabe1f5" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T12:13:33.834986869" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/view/tests/nextflow.config b/modules/nf-core/bcftools/view/tests/nextflow.config new file mode 100644 index 00000000..932e3ba6 --- /dev/null +++ b/modules/nf-core/bcftools/view/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + ext.args = '--no-version --output-type v' +} diff --git a/modules/nf-core/bcftools/view/tests/tags.yml b/modules/nf-core/bcftools/view/tests/tags.yml new file mode 100644 index 00000000..43b1f0aa --- /dev/null +++ b/modules/nf-core/bcftools/view/tests/tags.yml @@ -0,0 +1,2 @@ +bcftools/view: + - "modules/nf-core/bcftools/view/**" diff --git a/modules/nf-core/bcftools/view/tests/vcf_gz_index.config b/modules/nf-core/bcftools/view/tests/vcf_gz_index.config new file mode 100644 index 00000000..7dd696ee --- /dev/null +++ b/modules/nf-core/bcftools/view/tests/vcf_gz_index.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index --no-version" +} diff --git a/modules/nf-core/bcftools/view/tests/vcf_gz_index_csi.config b/modules/nf-core/bcftools/view/tests/vcf_gz_index_csi.config new file mode 100644 index 00000000..aebffb6f --- /dev/null +++ b/modules/nf-core/bcftools/view/tests/vcf_gz_index_csi.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index=csi --no-version" +} diff --git a/modules/nf-core/bcftools/view/tests/vcf_gz_index_tbi.config b/modules/nf-core/bcftools/view/tests/vcf_gz_index_tbi.config new file mode 100644 index 00000000..b192ae7d --- /dev/null +++ b/modules/nf-core/bcftools/view/tests/vcf_gz_index_tbi.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index=tbi --no-version" +} diff --git a/modules/nf-core/bedtools/makewindows/environment.yml b/modules/nf-core/bedtools/makewindows/environment.yml new file mode 100644 index 00000000..5683bc05 --- /dev/null +++ b/modules/nf-core/bedtools/makewindows/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bedtools=2.31.1 diff --git a/modules/nf-core/bedtools/makewindows/main.nf b/modules/nf-core/bedtools/makewindows/main.nf new file mode 100644 index 00000000..36d6cac2 --- /dev/null +++ b/modules/nf-core/bedtools/makewindows/main.nf @@ -0,0 +1,49 @@ +process BEDTOOLS_MAKEWINDOWS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' : + 'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }" + + input: + tuple val(meta), path(regions) + + output: + tuple val(meta), path("*.bed"), emit: bed + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def arg_input = regions.extension in ["bed", "tab"] ? "-b ${regions}" : "-g ${regions}" + if ("${regions}" == "${prefix}.bed") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + """ + bedtools \\ + makewindows \\ + ${arg_input} \\ + ${args} \\ + > ${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + if ("${regions}" == "${prefix}.bed") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + """ + touch ${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/bedtools/makewindows/meta.yml b/modules/nf-core/bedtools/makewindows/meta.yml new file mode 100644 index 00000000..df047f66 --- /dev/null +++ b/modules/nf-core/bedtools/makewindows/meta.yml @@ -0,0 +1,48 @@ +name: bedtools_makewindows +description: Makes adjacent or sliding windows across a genome or BED file. +keywords: + - bed + - windows + - fai + - chunking +tools: + - bedtools: + description: A set of tools for genomic analysis tasks, specifically enabling + genome arithmetic (merge, count, complement) on various file types. + homepage: https://bedtools.readthedocs.io + documentation: https://bedtools.readthedocs.io/en/latest/content/tools/makewindows.html + doi: "10.1093/bioinformatics/btq033" + licence: ["MIT"] + identifier: biotools:bedtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - regions: + type: file + description: BED file OR Genome details file () + pattern: "*.{bed,tab,fai}" +output: + - bed: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bed": + type: file + description: BED file containing the windows + pattern: "*.bed" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@kevbrick" + - "@nvnieuwk" +maintainers: + - "@kevbrick" + - "@nvnieuwk" diff --git a/modules/nf-core/bedtools/makewindows/tests/main.nf.test b/modules/nf-core/bedtools/makewindows/tests/main.nf.test new file mode 100644 index 00000000..b27e59b6 --- /dev/null +++ b/modules/nf-core/bedtools/makewindows/tests/main.nf.test @@ -0,0 +1,58 @@ + +nextflow_process { + + name "Test Process BEDTOOLS_MAKEWINDOWS" + script "../main.nf" + process "BEDTOOLS_MAKEWINDOWS" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "bedtools" + tag "bedtools/makewindows" + + test("test-bedtools-makewindows-bed") { + + when { + process { + """ + input[0] = [ + [ id:'test2'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test-bedtools-makewindows-fai") { + + when { + process { + """ + input[0] = [ + [ id:'test2'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/bedtools/makewindows/tests/main.nf.test.snap b/modules/nf-core/bedtools/makewindows/tests/main.nf.test.snap new file mode 100644 index 00000000..22cfbc17 --- /dev/null +++ b/modules/nf-core/bedtools/makewindows/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "test-bedtools-makewindows-fai": { + "content": [ + { + "0": [ + [ + { + "id": "test2" + }, + "test2.bed:md5,622d1f62786fe4239b76c53168f21c54" + ] + ], + "1": [ + "versions.yml:md5,f797078cc8b8bac7e6906685d4867be5" + ], + "bed": [ + [ + { + "id": "test2" + }, + "test2.bed:md5,622d1f62786fe4239b76c53168f21c54" + ] + ], + "versions": [ + "versions.yml:md5,f797078cc8b8bac7e6906685d4867be5" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-26T14:03:31.430455" + }, + "test-bedtools-makewindows-bed": { + "content": [ + { + "0": [ + [ + { + "id": "test2" + }, + "test2.bed:md5,0cf6ed2b6f470cd44a247da74ca4fe4e" + ] + ], + "1": [ + "versions.yml:md5,f797078cc8b8bac7e6906685d4867be5" + ], + "bed": [ + [ + { + "id": "test2" + }, + "test2.bed:md5,0cf6ed2b6f470cd44a247da74ca4fe4e" + ] + ], + "versions": [ + "versions.yml:md5,f797078cc8b8bac7e6906685d4867be5" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-26T14:03:27.118372" + } +} \ No newline at end of file diff --git a/modules/nf-core/bedtools/makewindows/tests/nextflow.config b/modules/nf-core/bedtools/makewindows/tests/nextflow.config new file mode 100644 index 00000000..fa16733f --- /dev/null +++ b/modules/nf-core/bedtools/makewindows/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: BEDTOOLS_MAKEWINDOWS { + ext.args = '-w 50 ' + } +} diff --git a/modules/nf-core/gawk/environment.yml b/modules/nf-core/gawk/environment.yml new file mode 100644 index 00000000..315f6dc6 --- /dev/null +++ b/modules/nf-core/gawk/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::gawk=5.3.0 diff --git a/modules/nf-core/gawk/main.nf b/modules/nf-core/gawk/main.nf new file mode 100644 index 00000000..b9df2b8c --- /dev/null +++ b/modules/nf-core/gawk/main.nf @@ -0,0 +1,65 @@ +process GAWK { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gawk:5.3.0' : + 'biocontainers/gawk:5.3.0' }" + + input: + tuple val(meta), path(input, arity: '0..*') + path(program_file) + + output: + tuple val(meta), path("${prefix}.${suffix}"), emit: output + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' // args is used for the main arguments of the tool + def args2 = task.ext.args2 ?: '' // args2 is used to specify a program when no program file has been given + prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "${input.collect{ it.getExtension()}.get(0)}" // use the first extension of the input files + + program = program_file ? "-f ${program_file}" : "${args2}" + lst_gz = input.collect{ it.getExtension().endsWith("gz") } + unzip = lst_gz.contains(false) ? "" : "find ${input} -exec zcat {} \\; | \\" + input_cmd = unzip ? "" : "${input}" + output_cmd = suffix.endsWith("gz") ? "| gzip" : "" + + input.collect{ + assert it.name != "${prefix}.${suffix}" : "Input and output names are the same, set prefix in module configuration to disambiguate!" + } + + """ + ${unzip} + awk \\ + ${args} \\ + ${program} \\ + ${input_cmd} \\ + ${output_cmd} \\ + > ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "${input.getExtension()}" + def create_cmd = suffix.endsWith("gz") ? "echo '' | gzip >" : "touch" + + """ + ${create_cmd} ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gawk/meta.yml b/modules/nf-core/gawk/meta.yml new file mode 100644 index 00000000..2da41405 --- /dev/null +++ b/modules/nf-core/gawk/meta.yml @@ -0,0 +1,57 @@ +name: "gawk" +description: | + If you are like many computer users, you would frequently like to make changes in various text files + wherever certain patterns appear, or extract data from parts of certain lines while discarding the rest. + The job is easy with awk, especially the GNU implementation gawk. +keywords: + - gawk + - awk + - txt + - text + - file parsing +tools: + - "gawk": + description: "GNU awk" + homepage: "https://www.gnu.org/software/gawk/" + documentation: "https://www.gnu.org/software/gawk/manual/" + tool_dev_url: "https://www.gnu.org/prep/ftp.html" + licence: ["GPL v3"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: The input file - Specify the logic that needs to be executed on + this file on the `ext.args2` or in the program file. + If the files have a `.gz` extension, they will be unzipped using `zcat`. + pattern: "*" + - - program_file: + type: file + description: Optional file containing logic for awk to execute. If you don't + wish to use a file, you can use `ext.args2` to specify the logic. + pattern: "*" +output: + - output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.${suffix}: + type: file + description: The output file - specify the name of this file using `ext.prefix` + and the extension using `ext.suffix` + pattern: "*" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@nvnieuwk" +maintainers: + - "@nvnieuwk" diff --git a/modules/nf-core/gawk/tests/main.nf.test b/modules/nf-core/gawk/tests/main.nf.test new file mode 100644 index 00000000..b3cde8bf --- /dev/null +++ b/modules/nf-core/gawk/tests/main.nf.test @@ -0,0 +1,165 @@ +nextflow_process { + + name "Test Process GAWK" + script "../main.nf" + process "GAWK" + + tag "modules" + tag "modules_nfcore" + tag "gawk" + + config "./nextflow.config" + + test("Convert fasta to bed") { + when { + params { + gawk_suffix = "bed" + gawk_args2 = '\'BEGIN {FS="\t"}; {print \$1 FS "0" FS \$2}\'' + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("Convert fasta to bed with program file") { + when { + params { + gawk_suffix = "bed" + gawk_args2 = "" + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[1] = Channel.of('BEGIN {FS="\t"}; {print \$1 FS "0" FS \$2}').collectFile(name:"program.txt") + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("Extract first column from multiple files") { + when { + params { + gawk_suffix = "bed" + gawk_args2 = "" + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [file(params.modules_testdata_base_path + 'generic/txt/hello.txt', checkIfExists: true), + file(params.modules_testdata_base_path + 'generic/txt/species_names.txt', checkIfExists: true)] + ] + input[1] = Channel.of('BEGIN {FS=" "}; {print \$1}').collectFile(name:"program.txt") + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("Unzip files before processing") { + when { + params { + gawk_suffix = "bed" + gawk_args2 = "" + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_chrM.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz', checkIfExists: true)] + ] + input[1] = Channel.of('/^#CHROM/ { print \$1, \$10 }').collectFile(name:"column_header.txt") + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("Compress after processing") { + when { + params { + gawk_suffix = "txt.gz" + gawk_args2 = '\'BEGIN {FS="\t"}; {print \$1 FS "0" FS \$2}\'' + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("Input and output files are similar") { + when { + params { + gawk_suffix = "txt" + gawk_args2 = "" + } + process { + """ + input[0] = [ + [ id:'hello' ], // meta map + [file(params.modules_testdata_base_path + 'generic/txt/hello.txt', checkIfExists: true), + file(params.modules_testdata_base_path + 'generic/txt/species_names.txt', checkIfExists: true)] + ] + input[1] = Channel.of('BEGIN {FS=" "}; {print \$1}').collectFile(name:"program.txt") + """ + } + } + + then { + assertAll( + { assert process.failed }, + { assert process.errorReport.contains("Input and output names are the same, set prefix in module configuration to disambiguate!") } + ) + } + } +} \ No newline at end of file diff --git a/modules/nf-core/gawk/tests/main.nf.test.snap b/modules/nf-core/gawk/tests/main.nf.test.snap new file mode 100644 index 00000000..1b3c2f71 --- /dev/null +++ b/modules/nf-core/gawk/tests/main.nf.test.snap @@ -0,0 +1,167 @@ +{ + "Compress after processing": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.txt.gz:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "1": [ + "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" + ], + "output": [ + [ + { + "id": "test" + }, + "test.txt.gz:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "versions": [ + "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-27T17:11:20.054143406" + }, + "Convert fasta to bed": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "1": [ + "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" + ], + "output": [ + [ + { + "id": "test" + }, + "test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "versions": [ + "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T13:14:02.347809811" + }, + "Convert fasta to bed with program file": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "1": [ + "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" + ], + "output": [ + [ + { + "id": "test" + }, + "test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "versions": [ + "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T13:14:11.894616209" + }, + "Extract first column from multiple files": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bed:md5,566c51674bd643227bb2d83e0963376d" + ] + ], + "1": [ + "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" + ], + "output": [ + [ + { + "id": "test" + }, + "test.bed:md5,566c51674bd643227bb2d83e0963376d" + ] + ], + "versions": [ + "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T22:04:47.729300129" + }, + "Unzip files before processing": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bed:md5,1e31ebd4a060aab5433bbbd9ab24e403" + ] + ], + "1": [ + "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" + ], + "output": [ + [ + { + "id": "test" + }, + "test.bed:md5,1e31ebd4a060aab5433bbbd9ab24e403" + ] + ], + "versions": [ + "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T22:08:19.533527657" + } +} \ No newline at end of file diff --git a/modules/nf-core/gawk/tests/nextflow.config b/modules/nf-core/gawk/tests/nextflow.config new file mode 100644 index 00000000..895709a7 --- /dev/null +++ b/modules/nf-core/gawk/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + withName: GAWK { + ext.suffix = params.gawk_suffix + ext.args2 = params.gawk_args2 + } +} diff --git a/modules/nf-core/gawk/tests/tags.yml b/modules/nf-core/gawk/tests/tags.yml new file mode 100644 index 00000000..72e4531d --- /dev/null +++ b/modules/nf-core/gawk/tests/tags.yml @@ -0,0 +1,2 @@ +gawk: + - "modules/nf-core/gawk/**" diff --git a/modules/nf-core/glimpse/chunk/environment.yml b/modules/nf-core/glimpse/chunk/environment.yml new file mode 100644 index 00000000..6247794b --- /dev/null +++ b/modules/nf-core/glimpse/chunk/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::glimpse-bio=1.1.1 diff --git a/modules/nf-core/glimpse/chunk/main.nf b/modules/nf-core/glimpse/chunk/main.nf new file mode 100644 index 00000000..94779846 --- /dev/null +++ b/modules/nf-core/glimpse/chunk/main.nf @@ -0,0 +1,49 @@ +process GLIMPSE_CHUNK { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/glimpse-bio:1.1.1--h2ce4488_2': + 'biocontainers/glimpse-bio:1.1.1--hce55b13_1' }" + + input: + tuple val(meta), path(input), path(input_index), val(region) + + output: + tuple val(meta), path("*.txt"), emit: chunk_chr + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: "" + + """ + GLIMPSE_chunk \\ + $args \\ + --input $input \\ + --region $region \\ + --thread $task.cpus \\ + --output ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + glimpse: "\$(GLIMPSE_chunk --help | sed -nr '/Version/p' | grep -o -E '([0-9]+.){1,2}[0-9]')" + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: "" + """ + touch ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + glimpse: "\$(GLIMPSE_chunk --help | sed -nr '/Version/p' | grep -o -E '([0-9]+.){1,2}[0-9]')" + END_VERSIONS + """ +} diff --git a/modules/nf-core/glimpse/chunk/meta.yml b/modules/nf-core/glimpse/chunk/meta.yml new file mode 100644 index 00000000..e8ff4019 --- /dev/null +++ b/modules/nf-core/glimpse/chunk/meta.yml @@ -0,0 +1,56 @@ +name: "glimpse_chunk" +description: Defines chunks where to run imputation +keywords: + - chunk + - imputation + - low coverage +tools: + - "glimpse": + description: "GLIMPSE is a phasing and imputation method for large-scale low-coverage + sequencing studies." + homepage: "https://odelaneau.github.io/GLIMPSE" + documentation: "https://odelaneau.github.io/GLIMPSE/commands.html" + tool_dev_url: "https://github.com/odelaneau/GLIMPSE" + doi: "10.1038/s41588-020-00756-0" + licence: ["MIT"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: | + Target dataset in VCF/BCF format defined at all variable positions. + The file could possibly be without GT field (for efficiency reasons a file containing only the positions is recommended). + pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}" + - input_index: + type: file + description: Index file for the input VCF/BCF file. + - region: + type: string + description: | + Target region, usually a full chromosome (e.g. chr20:1000000-2000000 or chr20). + For chrX, please treat PAR and non-PAR regions as different choromosome in order to avoid mixing ploidy. +output: + - chunk_chr: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.txt": + type: file + description: Tab delimited output txt file containing buffer and imputation regions. + pattern: "*.txt" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@louislenezet" +maintainers: + - "@louislenezet" diff --git a/modules/nf-core/glimpse/chunk/tests/main.nf.test b/modules/nf-core/glimpse/chunk/tests/main.nf.test new file mode 100644 index 00000000..4c278af1 --- /dev/null +++ b/modules/nf-core/glimpse/chunk/tests/main.nf.test @@ -0,0 +1,36 @@ +nextflow_process { + + name "Test Process GLIMPSE_CHUNK" + script "../main.nf" + process "GLIMPSE_CHUNK" + tag "glimpse" + tag "glimpse/chunk" + tag "modules_nfcore" + tag "modules" + + test("Should run without failures") { + config "modules/nf-core/glimpse/chunk/tests/nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'input' ], // meta map + file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz_tbi'], checkIfExists: true), + "chr21" + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/glimpse/chunk/tests/main.nf.test.snap b/modules/nf-core/glimpse/chunk/tests/main.nf.test.snap new file mode 100644 index 00000000..0490a8e4 --- /dev/null +++ b/modules/nf-core/glimpse/chunk/tests/main.nf.test.snap @@ -0,0 +1,31 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + [ + { + "id": "input" + }, + "input.txt:md5,9e5562b3f94857b8189b59849ce65cfb" + ] + ], + "1": [ + "versions.yml:md5,a523ef8d6391ddeff47bfd30b606d628" + ], + "chunk_chr": [ + [ + { + "id": "input" + }, + "input.txt:md5,9e5562b3f94857b8189b59849ce65cfb" + ] + ], + "versions": [ + "versions.yml:md5,a523ef8d6391ddeff47bfd30b606d628" + ] + } + ], + "timestamp": "2023-10-16T15:55:52.457257547" + } +} \ No newline at end of file diff --git a/modules/nf-core/glimpse/chunk/tests/nextflow.config b/modules/nf-core/glimpse/chunk/tests/nextflow.config new file mode 100644 index 00000000..c945152e --- /dev/null +++ b/modules/nf-core/glimpse/chunk/tests/nextflow.config @@ -0,0 +1,9 @@ +process { + withName: GLIMPSE_CHUNK { + ext.args = [ + "--window-size 2000000", + "--buffer-size 200000" + ].join(' ') + ext.prefix = { "${meta.id}" } + } +} \ No newline at end of file diff --git a/modules/nf-core/glimpse/chunk/tests/tags.yml b/modules/nf-core/glimpse/chunk/tests/tags.yml new file mode 100644 index 00000000..bd846dfd --- /dev/null +++ b/modules/nf-core/glimpse/chunk/tests/tags.yml @@ -0,0 +1,2 @@ +glimpse/chunk: + - modules/nf-core/glimpse/chunk/** diff --git a/modules/nf-core/glimpse/ligate/environment.yml b/modules/nf-core/glimpse/ligate/environment.yml new file mode 100644 index 00000000..6247794b --- /dev/null +++ b/modules/nf-core/glimpse/ligate/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::glimpse-bio=1.1.1 diff --git a/modules/nf-core/glimpse/ligate/main.nf b/modules/nf-core/glimpse/ligate/main.nf new file mode 100644 index 00000000..65425fd5 --- /dev/null +++ b/modules/nf-core/glimpse/ligate/main.nf @@ -0,0 +1,51 @@ +process GLIMPSE_LIGATE { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/glimpse-bio:1.1.1--hce55b13_1': + 'biocontainers/glimpse-bio:1.1.1--hce55b13_1' }" + + input: + tuple val(meta), path(input_list), path(input_index) + + output: + tuple val(meta), path("*.{vcf,bcf,vcf.gz,bcf.gz}"), emit: merged_variants + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = task.ext.suffix ?: "vcf.gz" + """ + printf "%s\\n" $input_list | tr -d '[],' > all_files.txt + + GLIMPSE_ligate \\ + $args \\ + --input all_files.txt \\ + --thread $task.cpus \\ + --output ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + glimpse: "\$(GLIMPSE_ligate --help | sed -nr '/Version/p' | grep -o -E '([0-9]+.){1,2}[0-9]')" + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = task.ext.suffix ?: "vcf.gz" + def args = task.ext.args ?: "" + """ + touch ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + glimpse: "\$(GLIMPSE_ligate --help | sed -nr '/Version/p' | grep -o -E '([0-9]+.){1,2}[0-9]')" + END_VERSIONS + """ +} diff --git a/modules/nf-core/glimpse/ligate/meta.yml b/modules/nf-core/glimpse/ligate/meta.yml new file mode 100644 index 00000000..c1ed67ee --- /dev/null +++ b/modules/nf-core/glimpse/ligate/meta.yml @@ -0,0 +1,54 @@ +name: "glimpse_ligate" +description: Concatenates imputation chunks in a single VCF/BCF file ligating phased + information. +keywords: + - ligate + - low-coverage + - glimpse + - imputation +tools: + - "glimpse": + description: "GLIMPSE is a phasing and imputation method for large-scale low-coverage + sequencing studies." + homepage: "https://odelaneau.github.io/GLIMPSE" + documentation: "https://odelaneau.github.io/GLIMPSE/commands.html" + tool_dev_url: "https://github.com/odelaneau/GLIMPSE" + doi: "10.1038/s41588-020-00756-0" + licence: ["MIT"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input_list: + type: file + description: VCF/BCF file containing genotype probabilities (GP field). + pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}" + - input_index: + type: file + description: Index file of the input VCF/BCF file containing genotype likelihoods. + pattern: "*.{vcf.gz.csi,bcf.gz.csi}" +output: + - merged_variants: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{vcf,bcf,vcf.gz,bcf.gz}": + type: file + description: | + Output VCF/BCF file for the merged regions. + Phased information (HS field) is updated accordingly for the full region. + pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@louislenezet" +maintainers: + - "@louislenezet" diff --git a/modules/nf-core/glimpse/ligate/tests/main.nf.test b/modules/nf-core/glimpse/ligate/tests/main.nf.test new file mode 100644 index 00000000..7289fc91 --- /dev/null +++ b/modules/nf-core/glimpse/ligate/tests/main.nf.test @@ -0,0 +1,76 @@ +nextflow_process { + + name "Test Process GLIMPSE_LIGATE" + script "../main.nf" + process "GLIMPSE_LIGATE" + + tag "modules_nfcore" + tag "modules" + tag "glimpse" + tag "glimpse/ligate" + tag "glimpse/phase" + tag "bcftools/index" + + test("test_glimpse_ligate") { + setup { + run("GLIMPSE_PHASE") { + script "../../phase/main.nf" + process { + """ + ch_sample = Channel.of('NA12878 2').collectFile(name: 'sampleinfos.txt') + region = Channel.fromList([ + ["chr21:16600000-16750000","chr21:16650000-16700000"], + ["chr21:16650000-16800000","chr21:16700000-16750000"] + ]) + input_vcf = Channel.of([ + [ id:'input'], // meta map + file(params.modules_testdata_base_path + "delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz.csi", checkIfExists: true) + ]) + ref_panel = Channel.of([ + file(params.modules_testdata_base_path + "delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf", checkIfExists: true), + file(params.modules_testdata_base_path + "delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf.csi", checkIfExists: true) + ]) + ch_map = Channel.of([ + file(params.modules_testdata_base_path + "delete_me/glimpse/chr21.b38.gmap.gz", checkIfExists: true), + ]) + + input[0] = input_vcf + | combine(ch_sample) + | combine(region) + | combine(ref_panel) + | combine(ch_map) + """ + } + } + run("BCFTOOLS_INDEX") { + script "../../../bcftools/index/main.nf" + process { + """ + input[0] = GLIMPSE_PHASE.out.phased_variants + """ + } + } + } + + when { + process { + """ + input[0] = GLIMPSE_PHASE.out.phased_variants + | groupTuple() + | join (BCFTOOLS_INDEX.out.csi.groupTuple()) + """ + } + } + + then { + def lines = path(process.out.merged_variants.get(0).get(1)).linesGzip.last() + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("versions") }, + { assert snapshot(lines).match("ligate") } + ) + } + + } +} diff --git a/modules/nf-core/glimpse/ligate/tests/main.nf.test.snap b/modules/nf-core/glimpse/ligate/tests/main.nf.test.snap new file mode 100644 index 00000000..8eec1328 --- /dev/null +++ b/modules/nf-core/glimpse/ligate/tests/main.nf.test.snap @@ -0,0 +1,16 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,0cc9dfe9c9c1087666c418aa3379cf85" + ] + ], + "timestamp": "2023-10-17T11:56:25.087453677" + }, + "ligate": { + "content": [ + "chr21\t16799989\t21:16799989:T:C\tT\tC\t.\t.\tRAF=0.000468897;AF=0;INFO=1\tGT:DS:GP:HS\t0/0:0:1,0,0:0" + ], + "timestamp": "2023-10-17T11:56:25.116120487" + } +} \ No newline at end of file diff --git a/modules/nf-core/glimpse/ligate/tests/tags.yml b/modules/nf-core/glimpse/ligate/tests/tags.yml new file mode 100644 index 00000000..f15d8121 --- /dev/null +++ b/modules/nf-core/glimpse/ligate/tests/tags.yml @@ -0,0 +1,2 @@ +glimpse/ligate: + - modules/nf-core/glimpse/ligate/** diff --git a/modules/nf-core/glimpse/phase/environment.yml b/modules/nf-core/glimpse/phase/environment.yml new file mode 100644 index 00000000..6247794b --- /dev/null +++ b/modules/nf-core/glimpse/phase/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::glimpse-bio=1.1.1 diff --git a/modules/nf-core/glimpse/phase/main.nf b/modules/nf-core/glimpse/phase/main.nf new file mode 100644 index 00000000..41004e60 --- /dev/null +++ b/modules/nf-core/glimpse/phase/main.nf @@ -0,0 +1,58 @@ +process GLIMPSE_PHASE { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/glimpse-bio:1.1.1--hce55b13_1': + 'biocontainers/glimpse-bio:1.1.1--hce55b13_1' }" + + input: + tuple val(meta) , path(input), path(input_index), path(samples_file), val(input_region), val(output_region), path(reference), path(reference_index), path(map) + + output: + tuple val(meta), path("*.{vcf,bcf,vcf.gz,bcf.gz}"), emit: phased_variants + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}_${input_region.replace(":","_")}" + def suffix = task.ext.suffix ?: "vcf.gz" + + def map_command = map ? "--map $map" :"" + def samples_file_command = samples_file ? "--samples-file $samples_file" :"" + + """ + GLIMPSE_phase \\ + $args \\ + --input $input \\ + --reference $reference \\ + $map_command \\ + $samples_file_command \\ + --input-region $input_region \\ + --output-region $output_region \\ + --thread $task.cpus \\ + --output ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + glimpse: "\$(GLIMPSE_phase --help | sed -nr '/Version/p' | grep -o -E '([0-9]+.){1,2}[0-9]')" + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}_${input_region.replace(":","_")}" + def suffix = task.ext.suffix ?: "vcf.gz" + """ + touch ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + glimpse: "\$(GLIMPSE_phase --help | sed -nr '/Version/p' | grep -o -E '([0-9]+.){1,2}[0-9]')" + END_VERSIONS + """ +} diff --git a/modules/nf-core/glimpse/phase/meta.yml b/modules/nf-core/glimpse/phase/meta.yml new file mode 100644 index 00000000..e905b758 --- /dev/null +++ b/modules/nf-core/glimpse/phase/meta.yml @@ -0,0 +1,84 @@ +name: "glimpse_phase" +description: main GLIMPSE algorithm, performs phasing and imputation refining genotype + likelihoods +keywords: + - phase + - imputation + - low-coverage + - glimpse +tools: + - "glimpse": + description: "GLIMPSE is a phasing and imputation method for large-scale low-coverage + sequencing studies." + homepage: "https://odelaneau.github.io/GLIMPSE" + documentation: "https://odelaneau.github.io/GLIMPSE/commands.html" + tool_dev_url: "https://github.com/odelaneau/GLIMPSE" + doi: "10.1038/s41588-020-00756-0" + licence: ["MIT"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: Input VCF/BCF file containing genotype likelihoods. + pattern: "*.{vcf.gz,bcf.gz}" + - input_index: + type: file + description: Index file of the input VCF/BCF file containing genotype likelihoods. + pattern: "*.{vcf.gz.csi,bcf.gz.csi}" + - samples_file: + type: file + description: | + File with sample names and ploidy information. + One sample per line with a mandatory second column indicating ploidy (1 or 2). + Sample names that are not present are assumed to have ploidy 2 (diploids). + GLIMPSE does NOT handle the use of sex (M/F) instead of ploidy. + pattern: "*.{txt,tsv}" + - input_region: + type: string + description: Target region used for imputation, including left and right buffers + (e.g. chr20:1000000-2000000). + pattern: "chrXX:leftBufferPosition-rightBufferPosition" + - output_region: + type: string + description: Target imputed region, excluding left and right buffers (e.g. chr20:1000000-2000000). + pattern: "chrXX:leftBufferPosition-rightBufferPosition" + - reference: + type: file + description: Reference panel of haplotypes in VCF/BCF format. + pattern: "*.{vcf.gz,bcf.gz}" + - reference_index: + type: file + description: Index file of the Reference panel file. + pattern: "*.{vcf.gz.csi,bcf.gz.csi}" + - map: + type: file + description: File containing the genetic map. + pattern: "*.gmap" +output: + - phased_variants: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{vcf,bcf,vcf.gz,bcf.gz}": + type: file + description: | + Output VCF/BCF file containing genotype probabilities (GP field), + imputed dosages (DS field), best guess genotypes (GT field), + sampled haplotypes in the last (max 16) main iterations (HS field) and info-score. + pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@louislenezet" +maintainers: + - "@louislenezet" diff --git a/modules/nf-core/glimpse/phase/tests/main.nf.test b/modules/nf-core/glimpse/phase/tests/main.nf.test new file mode 100644 index 00000000..5c92cb1f --- /dev/null +++ b/modules/nf-core/glimpse/phase/tests/main.nf.test @@ -0,0 +1,67 @@ +nextflow_process { + + name "Test Process GLIMPSE_PHASE" + script "../main.nf" + process "GLIMPSE_PHASE" + tag "glimpse" + tag "glimpse/phase" + tag "modules_nfcore" + tag "modules" + + test("test_glimpse_phase") { + + when { + process { + """ + ch_sample = Channel.of([sample:'present']) + | combine(Channel.of('NA12878 2').collectFile(name: 'sampleinfos.txt')) + | concat(Channel.of([[sample: 'absent'], []])) + region = Channel.fromList([ + ["chr21:16600000-16750000","chr21:16650000-16700000"], + ["chr21:16650000-16800000","chr21:16700000-16750000"] + ]) + input_vcf = Channel.of([ + [ id:'input'], // meta map + file(params.modules_testdata_base_path + "delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz.csi", checkIfExists: true) + ]) + ref_panel = Channel.of([ + file(params.modules_testdata_base_path + "delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf", checkIfExists: true), + file(params.modules_testdata_base_path + "delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf.csi", checkIfExists: true) + ]) + ch_map = Channel.of([ + file(params.modules_testdata_base_path + "delete_me/glimpse/chr21.b38.gmap.gz", checkIfExists: true), + ]) + + input[0] = input_vcf + | combine(ch_sample) + | combine(region) + | map { meta, vcf, index, metaS, sample, regionI, regionO -> + [[id: meta.id + "_" + metaS.sample, region : regionI], vcf, index, sample, regionI, regionO] + } + | combine(ref_panel) + | combine(ch_map) + """ + } + } + + then { + String targetFileName = "input_present_chr21_16650000-16800000.vcf.gz" + File selectedFile = process.out.phased_variants.stream() + .filter(vector -> vector.size() > 1) + .map(vector -> new File(vector.get(1).toString())) + .filter(file -> file.getName().equals(targetFileName)) + .findFirst() + .orElse(null) + String selectedFilename = selectedFile != null ? selectedFile.getPath() : null + def lines = path(selectedFilename).linesGzip.last() + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("versions") }, + { assert process.out.phased_variants.size() == 4}, + { assert snapshot(lines).match("imputed") } + ) + } + + } +} diff --git a/modules/nf-core/glimpse/phase/tests/main.nf.test.snap b/modules/nf-core/glimpse/phase/tests/main.nf.test.snap new file mode 100644 index 00000000..d61cf86e --- /dev/null +++ b/modules/nf-core/glimpse/phase/tests/main.nf.test.snap @@ -0,0 +1,19 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,b24f49b2f5989a1f7da32c195334e96b", + "versions.yml:md5,b24f49b2f5989a1f7da32c195334e96b", + "versions.yml:md5,b24f49b2f5989a1f7da32c195334e96b", + "versions.yml:md5,b24f49b2f5989a1f7da32c195334e96b" + ] + ], + "timestamp": "2023-10-17T15:27:55.512415434" + }, + "imputed": { + "content": [ + "chr21\t16799989\t21:16799989:T:C\tT\tC\t.\t.\tRAF=0.000468897;AF=0;INFO=1;BUF=1\tGT:DS:GP:HS\t0/0:0:1,0,0:0" + ], + "timestamp": "2023-10-17T15:27:55.99820664" + } +} \ No newline at end of file diff --git a/modules/nf-core/glimpse/phase/tests/tags.yml b/modules/nf-core/glimpse/phase/tests/tags.yml new file mode 100644 index 00000000..61c28281 --- /dev/null +++ b/modules/nf-core/glimpse/phase/tests/tags.yml @@ -0,0 +1,2 @@ +glimpse/phase: + - modules/nf-core/glimpse/phase/** diff --git a/modules/nf-core/glimpse2/chunk/environment.yml b/modules/nf-core/glimpse2/chunk/environment.yml new file mode 100644 index 00000000..75b86239 --- /dev/null +++ b/modules/nf-core/glimpse2/chunk/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::glimpse-bio=2.0.1 diff --git a/modules/nf-core/glimpse2/chunk/main.nf b/modules/nf-core/glimpse2/chunk/main.nf new file mode 100644 index 00000000..4c72990d --- /dev/null +++ b/modules/nf-core/glimpse2/chunk/main.nf @@ -0,0 +1,62 @@ +process GLIMPSE2_CHUNK { + tag "$meta.id" + label 'process_low' + + beforeScript """ + if cat /proc/cpuinfo | grep avx2 -q + then + echo "Feature AVX2 present on host" + else + echo "Feature AVX2 not present on host" + exit 1 + fi + """ + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/glimpse-bio:2.0.1--h46b9e50_1': + 'biocontainers/glimpse-bio:2.0.1--h46b9e50_1' }" + + input: + tuple val(meta), path(input), path(input_index), val(region), path(map) + val(model) + + output: + tuple val(meta), path("*.txt"), emit: chunk_chr + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: "" + def map_cmd = map ? "--map ${map}":"" + + """ + GLIMPSE2_chunk \\ + $args \\ + $map_cmd \\ + --${model} \\ + --input $input \\ + --region $region \\ + --threads $task.cpus \\ + --output ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + glimpse2: "\$(GLIMPSE2_chunk --help | sed -nr '/Version/p' | grep -o -E '([0-9]+.){1,2}[0-9]' | head -1)" + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: "" + """ + touch ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + glimpse2: "\$(GLIMPSE2_chunk --help | sed -nr '/Version/p' | grep -o -E '([0-9]+.){1,2}[0-9]' | head -1)" + END_VERSIONS + """ +} diff --git a/modules/nf-core/glimpse2/chunk/meta.yml b/modules/nf-core/glimpse2/chunk/meta.yml new file mode 100644 index 00000000..bcb089d5 --- /dev/null +++ b/modules/nf-core/glimpse2/chunk/meta.yml @@ -0,0 +1,73 @@ +name: "glimpse2_chunk" +description: Defines chunks where to run imputation +keywords: + - chunk + - low-coverage + - imputation + - glimpse +tools: + - "glimpse2": + description: "GLIMPSE2 is a phasing and imputation method for large-scale low-coverage + sequencing studies." + homepage: "https://odelaneau.github.io/GLIMPSE" + documentation: "https://odelaneau.github.io/GLIMPSE/commands.html" + tool_dev_url: "https://github.com/odelaneau/GLIMPSE" + doi: "10.1038/s41588-020-00756-0" + licence: ["MIT"] + identifier: biotools:glimpse2 +requirements: + - AVX2 +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: | + Target dataset in VCF/BCF format defined at all variable positions. + The file could possibly be without GT field (for efficiency reasons a file containing only the positions is recommended). + pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}" + - input_index: + type: file + description: Index file of the input VCF/BCF file containing genotype likelihoods. + pattern: "*.{vcf.gz.csi,bcf.gz.csi}" + - region: + type: string + description: | + Target region, usually a full chromosome (e.g. chr20:1000000-2000000 or chr20). + For chrX, please treat PAR and non-PAR regions as different choromosome in order to avoid mixing ploidy. + - map: + type: file + description: File containing the genetic map. + pattern: "*.gmap" + - - model: + type: string + description: | + Algorithm model to use: + "recursive": Recursive algorithm + "sequential": Sequential algorithm (Recommended) + "uniform-number-variants": Experimental. Uniform the number of variants in the sequential algorithm + pattern: "{recursive,sequential,uniform-number-variants}" +output: + - chunk_chr: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.txt": + type: file + description: Tab delimited output txt file containing buffer and imputation + regions. + pattern: "*.{txt}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@louislenezet" +maintainers: + - "@louislenezet" diff --git a/modules/nf-core/glimpse2/chunk/tests/main.nf.test b/modules/nf-core/glimpse2/chunk/tests/main.nf.test new file mode 100644 index 00000000..406aa7b8 --- /dev/null +++ b/modules/nf-core/glimpse2/chunk/tests/main.nf.test @@ -0,0 +1,64 @@ +nextflow_process { + + name "Test Process GLIMPSE2_CHUNK" + script "../main.nf" + process "GLIMPSE2_CHUNK" + tag "glimpse2" + tag "glimpse2/chunk" + tag "modules_nfcore" + tag "modules" + + test("Should run without map") { + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + "delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf", checkIfExists: true), + file(params.modules_testdata_base_path + "delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf.csi", checkIfExists: true), + "chr21", [] + ] + input[1]= "recursive" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run with map") { + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + "delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf", checkIfExists: true), + file(params.modules_testdata_base_path + "delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf.csi", checkIfExists: true), + "chr21", + file(params.modules_testdata_base_path + "delete_me/glimpse/chr21.b38.gmap.gz", checkIfExists: true) + ] + input[1]= "recursive" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/glimpse2/chunk/tests/main.nf.test.snap b/modules/nf-core/glimpse2/chunk/tests/main.nf.test.snap new file mode 100644 index 00000000..f61ebdcc --- /dev/null +++ b/modules/nf-core/glimpse2/chunk/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "Should run without map": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt:md5,aae05c3099aff601005282744baf8db8" + ] + ], + "1": [ + "versions.yml:md5,f5aa9b92845efdd03350ca7cab08ff6f" + ], + "chunk_chr": [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt:md5,aae05c3099aff601005282744baf8db8" + ] + ], + "versions": [ + "versions.yml:md5,f5aa9b92845efdd03350ca7cab08ff6f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-06T14:51:29.494098" + }, + "Should run with map": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt:md5,1f7a58d7891e82fa5e9669abdbba5690" + ] + ], + "1": [ + "versions.yml:md5,f5aa9b92845efdd03350ca7cab08ff6f" + ], + "chunk_chr": [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt:md5,1f7a58d7891e82fa5e9669abdbba5690" + ] + ], + "versions": [ + "versions.yml:md5,f5aa9b92845efdd03350ca7cab08ff6f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-06T14:51:38.545206" + } +} \ No newline at end of file diff --git a/modules/nf-core/glimpse2/chunk/tests/nextflow.config b/modules/nf-core/glimpse2/chunk/tests/nextflow.config new file mode 100644 index 00000000..e5721995 --- /dev/null +++ b/modules/nf-core/glimpse2/chunk/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + withName: GLIMPSE2_CHUNK { + ext.prefix = { "${meta.id}" } + } + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } +} \ No newline at end of file diff --git a/modules/nf-core/glimpse2/chunk/tests/tags.yml b/modules/nf-core/glimpse2/chunk/tests/tags.yml new file mode 100644 index 00000000..69cc8b67 --- /dev/null +++ b/modules/nf-core/glimpse2/chunk/tests/tags.yml @@ -0,0 +1,2 @@ +glimpse2/chunk: + - modules/nf-core/glimpse2/chunk/** diff --git a/modules/nf-core/glimpse2/concordance/environment.yml b/modules/nf-core/glimpse2/concordance/environment.yml new file mode 100644 index 00000000..75b86239 --- /dev/null +++ b/modules/nf-core/glimpse2/concordance/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::glimpse-bio=2.0.1 diff --git a/modules/nf-core/glimpse2/concordance/main.nf b/modules/nf-core/glimpse2/concordance/main.nf new file mode 100644 index 00000000..f96f5c38 --- /dev/null +++ b/modules/nf-core/glimpse2/concordance/main.nf @@ -0,0 +1,83 @@ +process GLIMPSE2_CONCORDANCE { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/glimpse-bio:2.0.1--h46b9e50_1': + 'biocontainers/glimpse-bio:2.0.1--h46b9e50_1' }" + + input: + tuple val(meta), path(estimate), path(estimate_index), path(truth), path(truth_index), path(freq), path(freq_index), path(samples), val(region) + tuple val(meta2), path(groups), val(bins), val(ac_bins), val(allele_counts), val(min_val_gl), val(min_val_dp) + + output: + tuple val(meta), path("*.error.cal.txt.gz") , emit: errors_cal + tuple val(meta), path("*.error.grp.txt.gz") , emit: errors_grp + tuple val(meta), path("*.error.spl.txt.gz") , emit: errors_spl + tuple val(meta), path("*.rsquare.grp.txt.gz"), emit: rsquare_grp + tuple val(meta), path("*.rsquare.spl.txt.gz"), emit: rsquare_spl + tuple val(meta), path("*_r2_sites.txt.gz") , emit: rsquare_per_site, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def samples_cmd = samples ? "--samples ${samples}" : "" + def groups_cmd = groups ? "--groups ${groups}" : "" + def bins_cmd = bins ? "--bins ${bins}" : "" + def ac_bins_cmd = ac_bins ? "--ac-bins ${ac_bins}" : "" + def ale_ct_cmd = allele_counts ? "--allele-counts ${allele_counts}" : "" + def min_val_gl_cmd = min_val_gl ? "--min-val-gl ${min_val_gl}" : "" + def min_val_dp_cmd = min_val_dp ? "--min-val-dp ${min_val_dp}" : "" + def region_str = region instanceof List ? region.join('\\n') : region + + if (((groups ? 1:0) + (bins ? 1:0) + (ac_bins ? 1:0) + (allele_counts ? 1:0)) != 1) error "One and only one argument should be selected between groups, bins, ac_bins, allele_counts" + + if (args.contains("--gt-val")) { + assert !(min_val_gl || min_val_dp) : "If --gt-val is set, --min-val-gl nor --min-val-dp must be set" + } + + """ + printf '$region_str' > regions.txt + sed 's/\$/ $freq $truth $estimate/' regions.txt > input.txt + GLIMPSE2_concordance \\ + $args \\ + $samples_cmd \\ + $groups_cmd \\ + $bins_cmd \\ + $ac_bins_cmd \\ + $ale_ct_cmd \\ + $min_val_gl_cmd \\ + $min_val_dp_cmd \\ + --input input.txt \\ + --thread $task.cpus \\ + --output ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + glimpse2: "\$(GLIMPSE2_concordance --help | sed -nr '/Version/p' | grep -o -E '([0-9]+.){1,2}[0-9]' | head -1)" + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: "" + def rsquare_per_site_cmd = args.contains("--out-r2-per-site") ? "touch ${prefix}_r2_sites.txt.gz" : "" + """ + touch ${prefix}.error.cal.txt.gz + touch ${prefix}.error.grp.txt.gz + touch ${prefix}.error.spl.txt.gz + touch ${prefix}.rsquare.grp.txt.gz + touch ${prefix}.rsquare.spl.txt.gz + ${rsquare_per_site_cmd} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + glimpse: "\$(GLIMPSE_concordance --help | sed -nr '/Version/p' | grep -o -E '([0-9]+.){1,2}[0-9]')" + END_VERSIONS + """ +} diff --git a/modules/nf-core/glimpse2/concordance/meta.yml b/modules/nf-core/glimpse2/concordance/meta.yml new file mode 100644 index 00000000..a9e27c90 --- /dev/null +++ b/modules/nf-core/glimpse2/concordance/meta.yml @@ -0,0 +1,168 @@ +name: "glimpse2_concordance" +description: Program to compute the genotyping error rate at the sample or marker + level. +keywords: + - concordance + - low-coverage + - glimpse + - imputation +tools: + - "glimpse2": + description: "GLIMPSE2 is a phasing and imputation method for large-scale low-coverage + sequencing studies." + homepage: "https://odelaneau.github.io/GLIMPSE" + documentation: "https://odelaneau.github.io/GLIMPSE/commands.html" + tool_dev_url: "https://github.com/odelaneau/GLIMPSE" + doi: "10.1038/s41588-020-00756-0" + licence: ["MIT"] + identifier: biotools:glimpse2 +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - estimate: + type: file + description: Imputed dataset file obtain after phasing. + pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}" + - estimate_index: + type: file + description: Index file for the imputed dataset file. + - truth: + type: file + description: Validation dataset called at the same positions as the imputed + file. + pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}" + - truth_index: + type: file + description: Index file for the truth file. + - freq: + type: file + description: File containing allele frequencies at each site. + pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}" + - freq_index: + type: file + description: Index file for the allele frequencies file. + - samples: + type: file + description: List of samples to process, one sample ID per line. + pattern: "*.{txt,tsv}" + - region: + type: string + description: Target region used for imputation, including left and right buffers + (e.g. chr20:1000000-2000000). Can also be a list of such regions. + pattern: "chrXX:leftBufferPosition-rightBufferPosition" + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - groups: + type: file + description: Alternative to frequency bins, group bins are user defined, provided + in a file. + pattern: "*.{txt,tsv}" + - bins: + type: string + description: | + Allele frequency bins used for rsquared computations. + By default they should as MAF bins [0-0.5], while + they should take the full range [0-1] if --use-ref-alt is used. + pattern: "0 0.01 0.05 ... 0.5" + - ac_bins: + type: string + description: User-defined allele count bins used for rsquared computations. + pattern: "1 2 5 10 20 ... 100000" + - allele_counts: + type: string + description: | + Default allele count bins used for rsquared computations. + AN field must be defined in the frequency file. + - min_val_gl: + type: float + description: | + Minimum genotype likelihood probability P(G|R) in validation data. + Set to zero to have no filter of if using –gt-validation + - min_val_dp: + type: integer + description: | + Minimum coverage in validation data. + If FORMAT/DP is missing and –min_val_dp > 0, the program exits with an error. + Set to zero to have no filter of if using –gt-validation +output: + - errors_cal: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.error.cal.txt.gz": + type: file + description: Calibration correlation errors between imputed dosages (in MAF + bins) and highly-confident genotype. + pattern: "*.errors.cal.txt.gz" + - errors_grp: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.error.grp.txt.gz": + type: file + description: Groups correlation errors between imputed dosages (in MAF bins) + and highly-confident genotype. + pattern: "*.errors.grp.txt.gz" + - errors_spl: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.error.spl.txt.gz": + type: file + description: Samples correlation errors between imputed dosages (in MAF bins) + and highly-confident genotype. + pattern: "*.errors.spl.txt.gz" + - rsquare_grp: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.rsquare.grp.txt.gz": + type: file + description: Groups r-squared correlation between imputed dosages (in MAF bins) + and highly-confident genotype. + pattern: "*.rsquare.grp.txt.gz" + - rsquare_spl: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.rsquare.spl.txt.gz": + type: file + description: Samples r-squared correlation between imputed dosages (in MAF bins) + and highly-confident genotype. + pattern: "*.rsquare.spl.txt.gz" + - rsquare_per_site: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_r2_sites.txt.gz": + type: file + description: Variant r-squared correlation between imputed dosages (in MAF bins) + and highly-confident genotype. + pattern: "_r2_sites.txt.gz" + - versions: + - versions.yml: + type: file + description: File containing software versions. + pattern: "versions.yml" +authors: + - "@louislenezet" +maintainers: + - "@louislenezet" diff --git a/modules/nf-core/glimpse2/concordance/tests/main.nf.test b/modules/nf-core/glimpse2/concordance/tests/main.nf.test new file mode 100644 index 00000000..d1c68bc2 --- /dev/null +++ b/modules/nf-core/glimpse2/concordance/tests/main.nf.test @@ -0,0 +1,114 @@ +nextflow_process { + + name "Test Process GLIMPSE2_CONCORDANCE" + script "../main.nf" + process "GLIMPSE2_CONCORDANCE" + tag "glimpse2" + tag "glimpse2/concordance" + tag "glimpse2/phase" + tag "bcftools/index" + tag "modules_nfcore" + tag "modules" + + test("test glimpse2 concordance") { + config "./nextflow.config" + + when { + params { + glimpse2_concordance_args = "--gt-val --af-tag AF" + } + process { + """ + target = Channel.of([ + [id: "input"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz",checkIfExists:true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz.csi",checkIfExists:true) + ]) + truth = Channel.of([ + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr22.vcf.gz",checkIfExists:true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr22.vcf.gz.csi",checkIfExists:true) + ]) + allele_freq = Channel.of([ + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.sites.vcf.gz",checkIfExists:true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.sites.vcf.gz.csi",checkIfExists:true) + ]) + list_inputs = target + .combine( truth ) + .combine( allele_freq ) + .combine( Channel.of([[]]) ) + .combine( Channel.of(["chr22"]) ) + + input[0] = list_inputs + input[1] = Channel.of([[id:"params"], [],"0 0.01 0.05 0.1 0.2 0.5", [], [], [], []]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.errors_cal.collect{ file(it[1]).name }, + process.out.errors_grp.collect{ file(it[1]).name }, + process.out.errors_spl.collect{ file(it[1]).name }, + process.out.rsquare_grp.collect{ file(it[1]).name }, + process.out.rsquare_spl.collect{ file(it[1]).name }, + process.out.versions + ).match() } + ) + } + + } + + test("test list of region and rsquare per site") { + config "./nextflow.config" + + when { + params { + glimpse2_concordance_args = "--gt-val --af-tag AF --out-r2-per-site" + } + process { + """ + target = Channel.of([ + [id: "input"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz",checkIfExists:true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz.csi",checkIfExists:true) + ]) + truth = Channel.of([ + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr22.vcf.gz",checkIfExists:true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr22.vcf.gz.csi",checkIfExists:true) + ]) + allele_freq = Channel.of([ + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.sites.vcf.gz",checkIfExists:true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.sites.vcf.gz.csi",checkIfExists:true) + ]) + list_inputs = target + .combine( truth ) + .combine( allele_freq ) + .combine( Channel.of([[]]) ) + .combine( Channel.of([["chr22", "chr22"]]) ) + .view() + + input[0] = list_inputs + input[1] = Channel.of([[id:"params"], [],"0 0.01 0.05 0.1 0.2 0.5", [], [], [], []]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.errors_cal.collect{ file(it[1]).name }, + process.out.errors_grp.collect{ file(it[1]).name }, + process.out.errors_spl.collect{ file(it[1]).name }, + process.out.rsquare_grp.collect{ file(it[1]).name }, + process.out.rsquare_spl.collect{ file(it[1]).name }, + process.out.rsquare_per_site.collect{ file(it[1]).name }, + process.out.versions + ).match() } + ) + } + + } +} diff --git a/modules/nf-core/glimpse2/concordance/tests/main.nf.test.snap b/modules/nf-core/glimpse2/concordance/tests/main.nf.test.snap new file mode 100644 index 00000000..5e81d44e --- /dev/null +++ b/modules/nf-core/glimpse2/concordance/tests/main.nf.test.snap @@ -0,0 +1,59 @@ +{ + "test glimpse2 concordance": { + "content": [ + [ + "input.error.cal.txt.gz" + ], + [ + "input.error.grp.txt.gz" + ], + [ + "input.error.spl.txt.gz" + ], + [ + "input.rsquare.grp.txt.gz" + ], + [ + "input.rsquare.spl.txt.gz" + ], + [ + "versions.yml:md5,ba729289bab6b9fbb8c36a620c86bb82" + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.1" + }, + "timestamp": "2024-12-03T16:22:35.440086384" + }, + "test list of region and rsquare per site": { + "content": [ + [ + "input.error.cal.txt.gz" + ], + [ + "input.error.grp.txt.gz" + ], + [ + "input.error.spl.txt.gz" + ], + [ + "input.rsquare.grp.txt.gz" + ], + [ + "input.rsquare.spl.txt.gz" + ], + [ + "input_r2_sites.txt.gz" + ], + [ + "versions.yml:md5,ba729289bab6b9fbb8c36a620c86bb82" + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.1" + }, + "timestamp": "2024-12-03T16:26:35.009071185" + } +} \ No newline at end of file diff --git a/modules/nf-core/glimpse2/concordance/tests/nextflow.config b/modules/nf-core/glimpse2/concordance/tests/nextflow.config new file mode 100644 index 00000000..f9242b4b --- /dev/null +++ b/modules/nf-core/glimpse2/concordance/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + withName: GLIMPSE2_CONCORDANCE { + ext.args = { params.glimpse2_concordance_args} + ext.prefix = { "${meta.id}" } + } +} \ No newline at end of file diff --git a/modules/nf-core/glimpse2/concordance/tests/tags.yml b/modules/nf-core/glimpse2/concordance/tests/tags.yml new file mode 100644 index 00000000..76ef847a --- /dev/null +++ b/modules/nf-core/glimpse2/concordance/tests/tags.yml @@ -0,0 +1,2 @@ +glimpse2/concordance: + - modules/nf-core/glimpse2/concordance/** diff --git a/modules/nf-core/glimpse2/ligate/environment.yml b/modules/nf-core/glimpse2/ligate/environment.yml new file mode 100644 index 00000000..75b86239 --- /dev/null +++ b/modules/nf-core/glimpse2/ligate/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::glimpse-bio=2.0.1 diff --git a/modules/nf-core/glimpse2/ligate/main.nf b/modules/nf-core/glimpse2/ligate/main.nf new file mode 100644 index 00000000..a1ca54b0 --- /dev/null +++ b/modules/nf-core/glimpse2/ligate/main.nf @@ -0,0 +1,51 @@ +process GLIMPSE2_LIGATE { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/glimpse-bio:2.0.1--h46b9e50_1': + 'biocontainers/glimpse-bio:2.0.1--h46b9e50_1' }" + + input: + tuple val(meta), path(input_list), path(input_index) + + output: + tuple val(meta), path("*.{vcf,bcf,vcf.gz,bcf.gz}"), emit: merged_variants + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = task.ext.suffix ?: "vcf.gz" + """ + printf "%s\\n" $input_list | tr -d '[],' | sort -V > all_files.txt + + GLIMPSE2_ligate \\ + $args \\ + --input all_files.txt \\ + --thread $task.cpus \\ + --output ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + glimpse2: "\$(GLIMPSE2_ligate --help | sed -nr '/Version/p' | grep -o -E '([0-9]+.){1,2}[0-9]' | head -1)" + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = task.ext.suffix ?: "vcf.gz" + """ + touch ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + glimpse2: "\$(GLIMPSE2_ligate --help | sed -nr '/Version/p' | grep -o -E '([0-9]+.){1,2}[0-9]' | head -1)" + END_VERSIONS + """ +} diff --git a/modules/nf-core/glimpse2/ligate/meta.yml b/modules/nf-core/glimpse2/ligate/meta.yml new file mode 100644 index 00000000..14fc8dac --- /dev/null +++ b/modules/nf-core/glimpse2/ligate/meta.yml @@ -0,0 +1,53 @@ +name: "glimpse2_ligate" +description: | + Ligatation of multiple phased BCF/VCF files into a single whole chromosome file. + GLIMPSE2 is run in chunks that are ligated into chromosome-wide files maintaining the phasing. +keywords: + - ligate + - low-coverage + - glimpse + - imputation +tools: + - "glimpse2": + description: "GLIMPSE2 is a phasing and imputation method for large-scale low-coverage + sequencing studies." + homepage: "https://odelaneau.github.io/GLIMPSE" + documentation: "https://odelaneau.github.io/GLIMPSE/commands.html" + tool_dev_url: "https://github.com/odelaneau/GLIMPSE" + doi: "10.1038/s41588-020-00756-0" + licence: ["MIT"] + identifier: biotools:glimpse2 +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input_list: + type: file + description: VCF/BCF file containing genotype probabilities (GP field). + pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}" + - input_index: + type: file + description: Index file of the input VCF/BCF file containing genotype likelihoods. + pattern: "*.{csi,tbi}" +output: + - merged_variants: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{vcf,bcf,vcf.gz,bcf.gz}": + type: file + description: Output ligated (phased) file in VCF/BCF format. + pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@louislenezet" +maintainers: + - "@louislenezet" diff --git a/modules/nf-core/glimpse2/ligate/tests/main.nf.test b/modules/nf-core/glimpse2/ligate/tests/main.nf.test new file mode 100644 index 00000000..6706d586 --- /dev/null +++ b/modules/nf-core/glimpse2/ligate/tests/main.nf.test @@ -0,0 +1,76 @@ +nextflow_process { + + name "Test Process GLIMPSE2_LIGATE" + script "../main.nf" + + process "GLIMPSE2_LIGATE" + + tag "modules_nfcore" + tag "modules" + tag "glimpse2" + tag "glimpse2/ligate" + tag "bcftools/index" + tag "glimpse2/phase" + + test("Should run glimpse ligate") { + setup { + run("GLIMPSE2_PHASE") { + script "../../phase/main.nf" + process { + """ + input_vcf = Channel.of([ + [ id:'input' ], // meta map + file(params.modules_testdata_base_path + "delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz.csi", checkIfExists: true), + [], [], + "chr21:16600000-16800000", + "chr21:16650000-16750000" + ]) + + ref_panel = Channel.of([ + file(params.modules_testdata_base_path + "delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf", checkIfExists: true), + file(params.modules_testdata_base_path + "delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf.csi", checkIfExists: true) + ]) + + map_file = Channel.of([ + file(params.modules_testdata_base_path + "delete_me/glimpse/chr21.b38.gmap.gz", checkIfExists: true) + ]) + + // [meta, vcf, index, sample_infos, regionin, regionout,ref, index, map] [meta, fasta, fai] + input[0] = input_vcf + .combine(ref_panel) + .combine(map_file) + input[1] = Channel.of([[],[],[]]) + """ + } + } + run("BCFTOOLS_INDEX") { + script "../../../bcftools/index/main.nf" + process { + """ + input[0] = GLIMPSE2_PHASE.out.phased_variants + """ + } + } + } + + when { + process { + """ + input[0] = GLIMPSE2_PHASE.out.phased_variants + | groupTuple() + | join (BCFTOOLS_INDEX.out.csi.groupTuple()) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("versions") }, + { assert file(process.out.merged_variants[0][1]).name == "input.vcf.gz" } + ) + } + + } +} diff --git a/modules/nf-core/glimpse2/ligate/tests/main.nf.test.snap b/modules/nf-core/glimpse2/ligate/tests/main.nf.test.snap new file mode 100644 index 00000000..a1b0b8c8 --- /dev/null +++ b/modules/nf-core/glimpse2/ligate/tests/main.nf.test.snap @@ -0,0 +1,14 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,44addcaef4965ff6409a8293c5bcad84" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T15:52:19.469961519" + } +} \ No newline at end of file diff --git a/modules/nf-core/glimpse2/ligate/tests/tags.yml b/modules/nf-core/glimpse2/ligate/tests/tags.yml new file mode 100644 index 00000000..1613896f --- /dev/null +++ b/modules/nf-core/glimpse2/ligate/tests/tags.yml @@ -0,0 +1,2 @@ +glimpse2/ligate: + - modules/nf-core/glimpse2/ligate/** diff --git a/modules/nf-core/glimpse2/phase/environment.yml b/modules/nf-core/glimpse2/phase/environment.yml new file mode 100644 index 00000000..75b86239 --- /dev/null +++ b/modules/nf-core/glimpse2/phase/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::glimpse-bio=2.0.1 diff --git a/modules/nf-core/glimpse2/phase/main.nf b/modules/nf-core/glimpse2/phase/main.nf new file mode 100644 index 00000000..8d5cc254 --- /dev/null +++ b/modules/nf-core/glimpse2/phase/main.nf @@ -0,0 +1,113 @@ +process GLIMPSE2_PHASE { + tag "$meta.id" + label 'process_medium' + + beforeScript """ + if cat /proc/cpuinfo | grep avx2 -q + then + echo "Feature AVX2 present on host" + else + echo "Feature AVX2 not present on host" + exit 1 + fi + """ + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/glimpse-bio:2.0.1--h46b9e50_1': + 'biocontainers/glimpse-bio:2.0.1--h46b9e50_1' }" + + input: + tuple val(meta) , path(input, arity: '1..*'), path(input_index), path(bamlist), path(samples_file), val(input_region), val(output_region), path(reference), path(reference_index), path(map) + tuple val(meta2), path(fasta_reference), path(fasta_reference_index) + + output: + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bgen}"), emit: phased_variants + tuple val(meta), path("*.txt.gz") , emit: stats_coverage, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def region = input_region ? "${output_region.replace(":","_")}" : "${reference}" + def args = task.ext.args ?: "" + def prefix = task.ext.prefix ?: "${meta.id}_${region}" + def suffix = task.ext.suffix ?: "bcf" + + def map_command = map ? "--map $map" : "" + def samples_file_command = samples_file ? "--samples-file $samples_file" : "" + def fasta_command = fasta_reference ? "--fasta $fasta_reference" : "" + def input_region_cmd = input_region ? "--input-region $input_region" : "" + def output_region_cmd = output_region ? "--output-region $output_region": "" + + def input_type = input.collect{ + it.toString().endsWithAny("cram", "bam") ? "bam" : + it.toString().endsWithAny("vcf", "bcf", "vcf.gz") ? "gl" : + it.getExtension() + }.unique() + + if (input_type.size() > 1 | !(input_type.contains("gl") | input_type.contains("bam"))) { + error "Input files must be of the same type and either .bam/.cram or .vcf/.vcf.gz/.bcf format. Found: ${input_type}" + } else { + input_type = input_type[0] + } + if (input_type == "gl" & input.size() > 1) { + error "Only one input .vcf/.vcf.gz/.bcf file can be provided" + } + def input_list = input.size() > 1 + + """ + if [ -n "$bamlist" ] ; + then + input_command="--bam-list $bamlist" + elif $input_list ; + then + ls -1 | grep '\\.cram\$\\|\\.bam\$' > all_bam.txt + input_command="--bam-list all_bam.txt" + else + if [ "$input_type" == "bam" ]; + then + input_command="--bam-file $input" + elif [ "$input_type" == "gl" ]; + then + input_command="--input-gl $input" + else + echo "Input file type not recognised" + echo "$input_type" + exit 1 + fi + fi + + GLIMPSE2_phase \\ + $args \\ + \$input_command \\ + --reference $reference \\ + $map_command \\ + $fasta_command \\ + $samples_file_command \\ + $input_region_cmd \\ + $output_region_cmd \\ + --thread $task.cpus \\ + --output ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + glimpse2: "\$(GLIMPSE2_phase --help | sed -nr '/Version/p' | grep -o -E '([0-9]+.){1,2}[0-9]' | head -1)" + END_VERSIONS + """ + + stub: + def region = input_region ? "${output_region.replace(":","_")}" : "${reference}" + def args = task.ext.args ?: "" + def prefix = task.ext.prefix ?: "${meta.id}_${region}" + def suffix = task.ext.suffix ?: "bcf" + """ + touch ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + glimpse2: "\$(GLIMPSE2_phase --help | sed -nr '/Version/p' | grep -o -E '([0-9]+.){1,2}[0-9]' | head -1)" + END_VERSIONS + """ +} diff --git a/modules/nf-core/glimpse2/phase/meta.yml b/modules/nf-core/glimpse2/phase/meta.yml new file mode 100644 index 00000000..dd720fd4 --- /dev/null +++ b/modules/nf-core/glimpse2/phase/meta.yml @@ -0,0 +1,122 @@ +name: "glimpse2_phase" +description: Tool for imputation and phasing from vcf file or directly from bam files. +keywords: + - phasing + - low-coverage + - imputation + - glimpse +tools: + - "glimpse2": + description: "GLIMPSE2 is a phasing and imputation method for large-scale low-coverage + sequencing studies." + homepage: "https://odelaneau.github.io/GLIMPSE" + documentation: "https://odelaneau.github.io/GLIMPSE/commands.html" + tool_dev_url: "https://github.com/odelaneau/GLIMPSE" + doi: "10.1038/s41588-020-00756-0" + licence: ["MIT"] + identifier: biotools:glimpse2 +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - input: + type: file + description: | + Either one or multiple BAM/CRAM files in an array containing low-coverage sequencing reads or one VCF/BCF file containing the genotype likelihoods. + When using BAM/CRAM the name of the file is used as samples name. + pattern: "*.{bam,cram,vcf,vcf.gz,bcf,bcf.gz}" + - input_index: + type: file + description: Index file of the input BAM/CRAM/VCF/BCF file. + pattern: "*.{bam.bai,cram.crai,vcf.gz.csi,bcf.gz.csi}" + - bamlist: + type: file + description: | + File containing the list of BAM/CRAM files to be phased. + One file per line and a second column can be added to indicate the sample name. + pattern: "*.{txt,tsv}" + - samples_file: + type: file + description: | + File with sample names and ploidy information. + One sample per line with a mandatory second column indicating ploidy (1 or 2). + Sample names that are not present are assumed to have ploidy 2 (diploids). + GLIMPSE does NOT handle the use of sex (M/F) instead of ploidy. + pattern: "*.{txt,tsv}" + - input_region: + type: string + description: | + Target region used for imputation, including left and right buffers (e.g. chr20:1000000-2000000). + Optional if reference panel is in bin format. + pattern: "chrXX:leftBufferPosition-rightBufferPosition" + - output_region: + type: string + description: | + Target imputed region, excluding left and right buffers (e.g. chr20:1000000-2000000). + Optional if reference panel is in bin format. + pattern: "chrXX:leftBufferPosition-rightBufferPosition" + - reference: + type: file + description: Reference panel of haplotypes in VCF/BCF format. + pattern: "*.{vcf.gz,bcf.gz}" + - reference_index: + type: file + description: Index file of the Reference panel file. + pattern: "*.{vcf.gz.csi,bcf.gz.csi}" + - map: + type: file + description: | + File containing the genetic map. + Optional if reference panel is in bin format. + pattern: "*.gmap" + - - meta2: + type: map + description: | + Groovy Map containing genomic map information + e.g. `[ map:'GRCh38' ]` + - fasta_reference: + type: file + description: | + Faidx-indexed reference sequence file in the appropriate genome build. + Necessary for CRAM files. + pattern: "*.fasta" + - fasta_reference_index: + type: file + description: | + Faidx index of the reference sequence file in the appropriate genome build. + Necessary for CRAM files. + pattern: "*.fai" +output: + - phased_variants: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - "*.{vcf,vcf.gz,bcf,bgen}": + type: file + description: | + Output VCF/BCF file containing genotype probabilities (GP field), imputed dosages (DS field), best guess genotypes (GT field), sampled haplotypes in the last (max 16) main iterations (HS field) and info-score. + pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}" + - stats_coverage: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - "*.txt.gz": + type: file + description: Optional coverage statistic file created when BAM/CRAM files are + used as inputs. + pattern: "*.txt.gz" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@LouisLeNezet" +maintainers: + - "@LouisLeNezet" diff --git a/modules/nf-core/glimpse2/phase/tests/main.nf.test b/modules/nf-core/glimpse2/phase/tests/main.nf.test new file mode 100644 index 00000000..04b89f2a --- /dev/null +++ b/modules/nf-core/glimpse2/phase/tests/main.nf.test @@ -0,0 +1,339 @@ +nextflow_process { + + name "Test Process GLIMPSE2_PHASE" + script "../main.nf" + + process "GLIMPSE2_PHASE" + + tag "modules_nfcore" + tag "modules" + tag "glimpse2" + tag "glimpse2/phase" + + test("Should run with vcf") { + + when { + process { + """ + input_vcf = Channel.of([ + [ id:'input' ], // meta map + file(params.modules_testdata_base_path + "delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz.csi", checkIfExists: true), + [], [], + "chr21:16600000-16800000", + "chr21:16650000-16750000" + ]) + + ref_panel = Channel.of([ + file(params.modules_testdata_base_path + "delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf", checkIfExists: true), + file(params.modules_testdata_base_path + "delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf.csi", checkIfExists: true) + ]) + + map_file = Channel.of([ + file(params.modules_testdata_base_path + "delete_me/glimpse/chr21.b38.gmap.gz", checkIfExists: true) + ]) + + // [meta, vcf, index, sample_infos, regionin, regionout,ref, index, map] [meta, fasta, fai] + input[0] = input_vcf + .combine(ref_panel) + .combine(map_file) + input[1] = Channel.of([[],[],[]]) + """ + } + } + + then { + assertAll( + { assert process.success }, + // File has a timestamp in it and is in binary format, so we can only check the name + { assert file(process.out.phased_variants[0][1]).name == "input_chr21_16650000-16750000.bcf" }, + { assert snapshot(process.out.versions).match("VCF")} + ) + } + + } + + test("Should run with bam") { + + when { + process { + """ + input_bam = Channel.of([ + [id:'input'], + file(params.modules_testdata_base_path + "delete_me/glimpse/NA12878.chr21.s.1x.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "delete_me/glimpse/NA12878.chr21.s.1x.bam.bai", checkIfExists: true), + [], [], + "chr21:16600000-16800000", + "chr21:16650000-16750000", + ]) + ref_panel = Channel.of([ + file(params.modules_testdata_base_path + "delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf", checkIfExists: true), + file(params.modules_testdata_base_path + "delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf.csi", checkIfExists: true) + ]) + + map_file = Channel.of([ + file(params.modules_testdata_base_path + "delete_me/glimpse/chr21.b38.gmap.gz", checkIfExists: true) + ]) + + // [meta, vcf, index, sample_infos, regionin, regionout,ref, index, map] [meta, fasta, fai] + input[0] = input_bam + .combine(ref_panel) + .combine(map_file) + input[1] = Channel.of([[],[],[]]) + """ + } + } + + then { + assertAll( + { assert process.success }, + // File has a timestamp in it and is in binary format, so we can only check the name + { assert file(process.out.phased_variants[0][1]).name == "input_chr21_16650000-16750000.bcf" }, + { assert snapshot(process.out.stats_coverage).match("BAM_coverage")}, + { assert snapshot(process.out.versions).match("BAM")} + ) + } + + } + + test("Should run with cram and reference genome") { + + when { + process { + """ + input_cram = Channel.of([ + [id:'input'], + file(params.modules_testdata_base_path + "delete_me/glimpse/NA12878.chr21.s.1x.cram", checkIfExists: true), + file(params.modules_testdata_base_path + "delete_me/glimpse/NA12878.chr21.s.1x.cram.crai", checkIfExists: true), + [], [], + "chr21:16600000-16800000", + "chr21:16650000-16750000", + ]) + ref_panel = Channel.of([ + file(params.modules_testdata_base_path + "delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf", checkIfExists: true), + file(params.modules_testdata_base_path + "delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf.csi", checkIfExists: true) + ]) + + map_file = Channel.of([ + file(params.modules_testdata_base_path + "delete_me/glimpse/chr21.b38.gmap.gz", checkIfExists: true) + ]) + reference_genome = Channel.of([ + [id:'refHG38_chr21'], + file(params.modules_testdata_base_path + "delete_me/glimpse/hs38DH.chr21.fa.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "delete_me/glimpse/hs38DH.chr21.fa.gz.fai", checkIfExists: true) + ]) + // [meta, vcf, index, sample_infos, regionin, regionout,ref, index, map] [meta, fasta, fai] + input[0] = input_cram + .combine(ref_panel) + .combine(map_file) + input[1] = reference_genome + """ + } + } + + then { + assertAll( + { assert process.success }, + // File has a timestamp in it and is in binary format, so we can only check the name + { assert file(process.out.phased_variants[0][1]).name == "input_chr21_16650000-16750000.bcf" }, + { assert snapshot(process.out.stats_coverage).match("CRAM_coverage")}, + { assert snapshot(process.out.versions).match("CRAM")} + ) + } + } + + test("Should run with list of bam files and reference genome") { + + when { + process { + """ + input_bam = Channel.of([ + [id:'input'], + [file("https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA12878/NA12878.s.bam", checkIfExists: true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA19401/NA19401.s.bam", checkIfExists: true)], + [file("https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA12878/NA12878.s.bam.bai", checkIfExists: true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA19401/NA19401.s.bam.bai", checkIfExists: true)], + [], [], + "chr21:16600000-16800000", + "chr21:16650000-16750000", + ]) + ref_panel = Channel.of([ + file(params.modules_testdata_base_path + "delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf", checkIfExists: true), + file(params.modules_testdata_base_path + "delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf.csi", checkIfExists: true) + ]) + + map_file = Channel.of([ + file(params.modules_testdata_base_path + "delete_me/glimpse/chr21.b38.gmap.gz", checkIfExists: true) + ]) + reference_genome = Channel.of([ + [id:'refHG38_chr21'], + file(params.modules_testdata_base_path + "delete_me/glimpse/hs38DH.chr21.fa.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "delete_me/glimpse/hs38DH.chr21.fa.gz.fai", checkIfExists: true) + ]) + // [meta, vcf, index, sample_infos, regionin, regionout,ref, index, map] [meta, fasta, fai] + input[0] = input_bam + .combine(ref_panel) + .combine(map_file) + input[1] = reference_genome + """ + } + } + + then { + assertAll( + { assert process.success }, + // File has a timestamp in it and is in binary format, so we can only check the name + { assert file(process.out.phased_variants[0][1]).name == "input_chr21_16650000-16750000.bcf" }, + { assert snapshot(process.out.stats_coverage).match("List_coverage")}, + { assert snapshot(process.out.versions).match("List_versions")} + ) + } + } + + test("Should run into error file type inconsistent") { + + when { + process { + """ + input_bam = Channel.of([ + [id:'input'], + [file("https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA12878/NA12878.s.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz", checkIfExists: true)], + [file("https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA12878/NA12878.s.bam.bai", checkIfExists: true), + file(params.modules_testdata_base_path + "delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz.csi", checkIfExists: true)], + [], [], + "chr21:16600000-16800000", + "chr21:16650000-16750000", + ]) + ref_panel = Channel.of([ + file(params.modules_testdata_base_path + "delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf", checkIfExists: true), + file(params.modules_testdata_base_path + "delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf.csi", checkIfExists: true) + ]) + + map_file = Channel.of([ + file(params.modules_testdata_base_path + "delete_me/glimpse/chr21.b38.gmap.gz", checkIfExists: true) + ]) + reference_genome = Channel.of([ + [id:'refHG38_chr21'], + file(params.modules_testdata_base_path + "delete_me/glimpse/hs38DH.chr21.fa.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "delete_me/glimpse/hs38DH.chr21.fa.gz.fai", checkIfExists: true) + ]) + // [meta, vcf, index, sample_infos, regionin, regionout,ref, index, map] [meta, fasta, fai] + input[0] = input_bam + .combine(ref_panel) + .combine(map_file) + input[1] = reference_genome + """ + } + } + + then { + assertAll( + { assert process.failed }, + { assert process.errorReport.contains("Input files must be of the same type and either .bam/.cram or .vcf/.vcf.gz/.bcf format. Found: [bam, gl]") } + ) + } + } + + test("Should run into error only one vcf allowed") { + + when { + process { + """ + input_bam = Channel.of([ + [id:'input'], + [file("https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA12878/NA12878.s.1x.bcf", checkIfExists: true), + file(params.modules_testdata_base_path + "delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz", checkIfExists: true)], + [file("https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA12878/NA12878.s.1x.bcf.csi", checkIfExists: true), + file(params.modules_testdata_base_path + "delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz.csi", checkIfExists: true)], + [], [], + "chr21:16600000-16800000", + "chr21:16650000-16750000", + ]) + ref_panel = Channel.of([ + file(params.modules_testdata_base_path + "delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf", checkIfExists: true), + file(params.modules_testdata_base_path + "delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf.csi", checkIfExists: true) + ]) + + map_file = Channel.of([ + file(params.modules_testdata_base_path + "delete_me/glimpse/chr21.b38.gmap.gz", checkIfExists: true) + ]) + reference_genome = Channel.of([ + [id:'refHG38_chr21'], + file(params.modules_testdata_base_path + "delete_me/glimpse/hs38DH.chr21.fa.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "delete_me/glimpse/hs38DH.chr21.fa.gz.fai", checkIfExists: true) + ]) + // [meta, vcf, index, sample_infos, regionin, regionout,ref, index, map] [meta, fasta, fai] + input[0] = input_bam + .combine(ref_panel) + .combine(map_file) + input[1] = reference_genome + """ + } + } + + then { + assertAll( + { assert process.failed }, + { assert process.errorReport.contains("Only one input .vcf/.vcf.gz/.bcf file can be provided") } + ) + } + } + + test("Should run with a bamlist to rename samples") { + config "./nextflow.config" + + when { + process { + """ + bamlist = Channel.of( + "NA12878.s.bam\tNA12878", + "NA19401.s.bam\tNA19401" + ).collectFile(name: 'bamlist.txt', newLine: true) + + input_bam = Channel.of([ + [id:'input'], + [file("https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA12878/NA12878.s.bam", checkIfExists: true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA19401/NA19401.s.bam", checkIfExists: true)], + [file("https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA12878/NA12878.s.bam.bai", checkIfExists: true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA19401/NA19401.s.bam.bai", checkIfExists: true)], + ]).combine(bamlist).map{ map, bam, bai, bamlist -> [ + map, bam, bai, bamlist, [], + "chr21:16600000-16800000", + "chr21:16650000-16750000", + ] + } + ref_panel = Channel.of([ + file(params.modules_testdata_base_path + "delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf", checkIfExists: true), + file(params.modules_testdata_base_path + "delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf.csi", checkIfExists: true) + ]) + + map_file = Channel.of([ + file(params.modules_testdata_base_path + "delete_me/glimpse/chr21.b38.gmap.gz", checkIfExists: true) + ]) + reference_genome = Channel.of([ + [id:'refHG38_chr21'], + file(params.modules_testdata_base_path + "delete_me/glimpse/hs38DH.chr21.fa.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "delete_me/glimpse/hs38DH.chr21.fa.gz.fai", checkIfExists: true) + ]) + // [meta, vcf, index, sample_infos, regionin, regionout,ref, index, map] [meta, fasta, fai] + input[0] = input_bam + .combine(ref_panel) + .combine(map_file) + input[1] = reference_genome + """ + } + } + + then { + assertAll( + { assert process.success }, + // File has a timestamp in it and is in binary format, so we can only check the name + { assert snapshot( + process.out.phased_variants.collect{ path(it[1]).vcf.header.getGenotypeSamples().sort() }, + process.out.phased_variants.collect{ path(it[1]).vcf.summary.replaceAll(", phasedAutodetect=(false|true)", "") } + ).match()}, + ) + } + } +} diff --git a/modules/nf-core/glimpse2/phase/tests/main.nf.test.snap b/modules/nf-core/glimpse2/phase/tests/main.nf.test.snap new file mode 100644 index 00000000..180af8cc --- /dev/null +++ b/modules/nf-core/glimpse2/phase/tests/main.nf.test.snap @@ -0,0 +1,119 @@ +{ + "CRAM": { + "content": [ + [ + "versions.yml:md5,c68de03046a6503cdbcf3a1495fc512f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-13T16:58:34.365910006" + }, + "List_versions": { + "content": [ + [ + "versions.yml:md5,c68de03046a6503cdbcf3a1495fc512f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-07-09T20:35:30.97466287" + }, + "Should run with a bamlist to rename samples": { + "content": [ + [ + [ + "NA12878", + "NA19401" + ] + ], + [ + "VcfFile [chromosomes=[chr21], sampleCount=2, variantCount=4719, phased=true]" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-10-22T15:23:37.368119579" + }, + "VCF": { + "content": [ + [ + "versions.yml:md5,c68de03046a6503cdbcf3a1495fc512f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-13T16:58:05.013609832" + }, + "List_coverage": { + "content": [ + [ + [ + { + "id": "input" + }, + "input_chr21_16650000-16750000_stats_coverage.txt.gz:md5,ac61370fc14738b103e160b7298727b9" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-07-09T20:35:30.943277837" + }, + "BAM_coverage": { + "content": [ + [ + [ + { + "id": "input" + }, + "input_chr21_16650000-16750000_stats_coverage.txt.gz:md5,9be7101ef4f599416c22fd6160c3b146" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-13T16:58:13.527360464" + }, + "CRAM_coverage": { + "content": [ + [ + [ + { + "id": "input" + }, + "input_chr21_16650000-16750000_stats_coverage.txt.gz:md5,a2bee17d81568dba62ce4dd430947d29" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-13T16:58:34.264826731" + }, + "BAM": { + "content": [ + [ + "versions.yml:md5,c68de03046a6503cdbcf3a1495fc512f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-13T16:58:13.58159608" + } +} \ No newline at end of file diff --git a/modules/nf-core/glimpse2/phase/tests/nextflow.config b/modules/nf-core/glimpse2/phase/tests/nextflow.config new file mode 100644 index 00000000..c5d9c9be --- /dev/null +++ b/modules/nf-core/glimpse2/phase/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: GLIMPSE2_PHASE { + ext.suffix = { "vcf" } + } +} \ No newline at end of file diff --git a/modules/nf-core/glimpse2/phase/tests/tags.yml b/modules/nf-core/glimpse2/phase/tests/tags.yml new file mode 100644 index 00000000..ab05b49f --- /dev/null +++ b/modules/nf-core/glimpse2/phase/tests/tags.yml @@ -0,0 +1,2 @@ +glimpse2/phase: + - modules/nf-core/glimpse2/phase/** diff --git a/modules/nf-core/gunzip/environment.yml b/modules/nf-core/gunzip/environment.yml new file mode 100644 index 00000000..c7794856 --- /dev/null +++ b/modules/nf-core/gunzip/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::grep=3.11 + - conda-forge::sed=4.8 + - conda-forge::tar=1.34 diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf new file mode 100644 index 00000000..5e67e3b9 --- /dev/null +++ b/modules/nf-core/gunzip/main.nf @@ -0,0 +1,55 @@ +process GUNZIP { + tag "$archive" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:22.04' : + 'nf-core/ubuntu:22.04' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("$gunzip"), emit: gunzip + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def extension = ( archive.toString() - '.gz' ).tokenize('.')[-1] + def name = archive.toString() - '.gz' - ".$extension" + def prefix = task.ext.prefix ?: name + gunzip = prefix + ".$extension" + """ + # Not calling gunzip itself because it creates files + # with the original group ownership rather than the + # default one for that user / the work directory + gzip \\ + -cd \\ + $args \\ + $archive \\ + > $gunzip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def extension = ( archive.toString() - '.gz' ).tokenize('.')[-1] + def name = archive.toString() - '.gz' - ".$extension" + def prefix = task.ext.prefix ?: name + gunzip = prefix + ".$extension" + """ + touch $gunzip + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml new file mode 100644 index 00000000..9066c035 --- /dev/null +++ b/modules/nf-core/gunzip/meta.yml @@ -0,0 +1,47 @@ +name: gunzip +description: Compresses and decompresses files. +keywords: + - gunzip + - compression + - decompression +tools: + - gunzip: + description: | + gzip is a file format and a software application used for file compression and decompression. + documentation: https://www.gnu.org/software/gzip/manual/gzip.html + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Optional groovy Map containing meta information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be compressed/uncompressed + pattern: "*.*" +output: + - gunzip: + - meta: + type: file + description: Compressed/uncompressed file + pattern: "*.*" + - $gunzip: + type: file + description: Compressed/uncompressed file + pattern: "*.*" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@jfy133" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@jfy133" + - "@gallvp" diff --git a/modules/nf-core/gunzip/tests/main.nf.test b/modules/nf-core/gunzip/tests/main.nf.test new file mode 100644 index 00000000..776211ad --- /dev/null +++ b/modules/nf-core/gunzip/tests/main.nf.test @@ -0,0 +1,121 @@ +nextflow_process { + + name "Test Process GUNZIP" + script "../main.nf" + process "GUNZIP" + tag "gunzip" + tag "modules_nfcore" + tag "modules" + + test("Should run without failures") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run without failures - prefix") { + + config './nextflow.config' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run without failures - stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run without failures - prefix - stub") { + + options '-stub' + config './nextflow.config' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/gunzip/tests/main.nf.test.snap b/modules/nf-core/gunzip/tests/main.nf.test.snap new file mode 100644 index 00000000..069967e7 --- /dev/null +++ b/modules/nf-core/gunzip/tests/main.nf.test.snap @@ -0,0 +1,134 @@ +{ + "Should run without failures - prefix - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:35:10.861293" + }, + "Should run without failures - stub": { + "content": [ + { + "0": [ + [ + [ + + ], + "test_1.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + [ + + ], + "test_1.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:35:05.857145" + }, + "Should run without failures": { + "content": [ + { + "0": [ + [ + [ + + ], + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + [ + + ], + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2023-10-17T15:35:37.690477896" + }, + "Should run without failures - prefix": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:33:32.921739" + } +} \ No newline at end of file diff --git a/modules/nf-core/gunzip/tests/nextflow.config b/modules/nf-core/gunzip/tests/nextflow.config new file mode 100644 index 00000000..dec77642 --- /dev/null +++ b/modules/nf-core/gunzip/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: GUNZIP { + ext.prefix = { "${meta.id}.xyz" } + } +} diff --git a/modules/nf-core/gunzip/tests/tags.yml b/modules/nf-core/gunzip/tests/tags.yml new file mode 100644 index 00000000..fd3f6915 --- /dev/null +++ b/modules/nf-core/gunzip/tests/tags.yml @@ -0,0 +1,2 @@ +gunzip: + - modules/nf-core/gunzip/** diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml new file mode 100644 index 00000000..6f5b867b --- /dev/null +++ b/modules/nf-core/multiqc/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::multiqc=1.25.1 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf new file mode 100644 index 00000000..cc0643e1 --- /dev/null +++ b/modules/nf-core/multiqc/main.nf @@ -0,0 +1,63 @@ +process MULTIQC { + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/multiqc:1.25.1--pyhdfd78af_0' : + 'biocontainers/multiqc:1.25.1--pyhdfd78af_0' }" + + input: + path multiqc_files, stageAs: "?/*" + path(multiqc_config) + path(extra_multiqc_config) + path(multiqc_logo) + path(replace_names) + path(sample_names) + + output: + path "*multiqc_report.html", emit: report + path "*_data" , emit: data + path "*_plots" , optional:true, emit: plots + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ? "--filename ${task.ext.prefix}.html" : '' + def config = multiqc_config ? "--config $multiqc_config" : '' + def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + def logo = multiqc_logo ? "--cl-config 'custom_logo: \"${multiqc_logo}\"'" : '' + def replace = replace_names ? "--replace-names ${replace_names}" : '' + def samples = sample_names ? "--sample-names ${sample_names}" : '' + """ + multiqc \\ + --force \\ + $args \\ + $config \\ + $prefix \\ + $extra_config \\ + $logo \\ + $replace \\ + $samples \\ + . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) + END_VERSIONS + """ + + stub: + """ + mkdir multiqc_data + mkdir multiqc_plots + touch multiqc_report.html + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml new file mode 100644 index 00000000..b16c1879 --- /dev/null +++ b/modules/nf-core/multiqc/meta.yml @@ -0,0 +1,78 @@ +name: multiqc +description: Aggregate results from bioinformatics analyses across many samples into + a single report +keywords: + - QC + - bioinformatics tools + - Beautiful stand-alone HTML report +tools: + - multiqc: + description: | + MultiQC searches a given directory for analysis logs and compiles a HTML report. + It's a general use tool, perfect for summarising the output from numerous bioinformatics tools. + homepage: https://multiqc.info/ + documentation: https://multiqc.info/docs/ + licence: ["GPL-3.0-or-later"] + identifier: biotools:multiqc +input: + - - multiqc_files: + type: file + description: | + List of reports / files recognised by MultiQC, for example the html and zip output of FastQC + - - multiqc_config: + type: file + description: Optional config yml for MultiQC + pattern: "*.{yml,yaml}" + - - extra_multiqc_config: + type: file + description: Second optional config yml for MultiQC. Will override common sections + in multiqc_config. + pattern: "*.{yml,yaml}" + - - multiqc_logo: + type: file + description: Optional logo file for MultiQC + pattern: "*.{png}" + - - replace_names: + type: file + description: | + Optional two-column sample renaming file. First column a set of + patterns, second column a set of corresponding replacements. Passed via + MultiQC's `--replace-names` option. + pattern: "*.{tsv}" + - - sample_names: + type: file + description: | + Optional TSV file with headers, passed to the MultiQC --sample_names + argument. + pattern: "*.{tsv}" +output: + - report: + - "*multiqc_report.html": + type: file + description: MultiQC report file + pattern: "multiqc_report.html" + - data: + - "*_data": + type: directory + description: MultiQC data dir + pattern: "multiqc_data" + - plots: + - "*_plots": + type: file + description: Plots created by MultiQC + pattern: "*_data" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" +maintainers: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test new file mode 100644 index 00000000..33316a7d --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -0,0 +1,92 @@ +nextflow_process { + + name "Test Process MULTIQC" + script "../main.nf" + process "MULTIQC" + + tag "modules" + tag "modules_nfcore" + tag "multiqc" + + config "./nextflow.config" + + test("sarscov2 single-end [fastqc]") { + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] + input[4] = [] + input[5] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_single") } + ) + } + + } + + test("sarscov2 single-end [fastqc] [config]") { + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) + input[2] = [] + input[3] = [] + input[4] = [] + input[5] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_config") } + ) + } + } + + test("sarscov2 single-end [fastqc] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] + input[4] = [] + input[5] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.report.collect { file(it).getName() } + + process.out.data.collect { file(it).getName() } + + process.out.plots.collect { file(it).getName() } + + process.out.versions ).match("multiqc_stub") } + ) + } + + } +} diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap new file mode 100644 index 00000000..2fcbb5ff --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -0,0 +1,41 @@ +{ + "multiqc_versions_single": { + "content": [ + [ + "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-02T17:51:46.317523" + }, + "multiqc_stub": { + "content": [ + [ + "multiqc_report.html", + "multiqc_data", + "multiqc_plots", + "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-02T17:52:20.680978" + }, + "multiqc_versions_config": { + "content": [ + [ + "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-02T17:52:09.185842" + } +} \ No newline at end of file diff --git a/modules/nf-core/multiqc/tests/nextflow.config b/modules/nf-core/multiqc/tests/nextflow.config new file mode 100644 index 00000000..c537a6a3 --- /dev/null +++ b/modules/nf-core/multiqc/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'MULTIQC' { + ext.prefix = null + } +} diff --git a/modules/nf-core/multiqc/tests/tags.yml b/modules/nf-core/multiqc/tests/tags.yml new file mode 100644 index 00000000..bea6c0d3 --- /dev/null +++ b/modules/nf-core/multiqc/tests/tags.yml @@ -0,0 +1,2 @@ +multiqc: + - modules/nf-core/multiqc/** diff --git a/modules/nf-core/quilt/quilt/environment.yml b/modules/nf-core/quilt/quilt/environment.yml new file mode 100644 index 00000000..967b84ba --- /dev/null +++ b/modules/nf-core/quilt/quilt/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::r-quilt=1.0.5=r43h06b5641_0 + - r-base=4.3.1 diff --git a/modules/nf-core/quilt/quilt/main.nf b/modules/nf-core/quilt/quilt/main.nf new file mode 100644 index 00000000..fd3a67a6 --- /dev/null +++ b/modules/nf-core/quilt/quilt/main.nf @@ -0,0 +1,69 @@ +process QUILT_QUILT { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/r-quilt:1.0.5--r43h06b5641_0': + 'biocontainers/r-quilt:1.0.5--r43h06b5641_0' }" + + input: + tuple val(meta), path(bams), path(bais), path(bamlist), path(reference_haplotype_file), path(reference_legend_file), val(chr), val(regions_start), val(regions_end), val(ngen), val(buffer), path(genetic_map_file) + tuple val(meta2), path(posfile), path(phasefile) + tuple val(meta3), path(fasta) + + output: + tuple val(meta), path("*.vcf.gz"), emit: vcf + tuple val(meta), path("*.vcf.gz.tbi"), emit: tbi, optional:true + tuple val(meta), path("RData", type: "dir"), emit: rdata, optional:true + tuple val(meta), path("plots", type: "dir"), emit: plots, optional:true + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extensions = bams.collect { it.extension } + def extension = extensions.flatten().unique() + def list_command = extension == ["bam"] ? "--bamlist=" : + extension == ["cram"] ? "--reference=${fasta} --cramlist=" : "" + def genetic_map_file_command = genetic_map_file ? "--genetic_map_file=${genetic_map_file}" : "" + def posfile_command = posfile ? "--posfile=${posfile}" : "" + def phasefile_command = phasefile ? "--phasefile=${phasefile}" : "" + if (!(args ==~ /.*--seed.*/)) {args += " --seed=1"} + + """ + if [ -n "$bamlist" ] ; + then + BAM_LIST="$bamlist" + else + printf "%s\\n" $bams | tr -d '[],' > all_files.txt + BAM_LIST="all_files.txt" + fi + + QUILT.R \\ + ${list_command}\$BAM_LIST \\ + $genetic_map_file_command \\ + $posfile_command \\ + $phasefile_command \\ + --chr=$chr \\ + --regionStart=$regions_start \\ + --regionEnd=$regions_end \\ + --nGen=$ngen \\ + --buffer=$buffer \\ + --nCores=$task.cpus \\ + --outputdir="." \\ + --reference_haplotype_file=$reference_haplotype_file \\ + --reference_legend_file=$reference_legend_file \\ + $args + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + r-base: \$(Rscript -e "cat(strsplit(R.version[['version.string']], ' ')[[1]][3])") + r-quilt: \$(Rscript -e "cat(as.character(utils::packageVersion(\\"QUILT\\")))") + END_VERSIONS + """ +} diff --git a/modules/nf-core/quilt/quilt/meta.yml b/modules/nf-core/quilt/quilt/meta.yml new file mode 100644 index 00000000..958d39cb --- /dev/null +++ b/modules/nf-core/quilt/quilt/meta.yml @@ -0,0 +1,157 @@ +name: "quilt_quilt" +description: QUILT is an R and C++ program for rapid genotype imputation from low-coverage + sequence using a large reference panel. +keywords: + - imputation + - low-coverage + - genotype + - genomics + - vcf +tools: + - "quilt": + description: "Read aware low coverage whole genome sequence imputation from a + reference panel" + homepage: "https://github.com/rwdavies/quilt" + documentation: "https://github.com/rwdavies/quilt" + tool_dev_url: "https://github.com/rwdavies/quilt" + doi: "10.1038/s41588-021-00877-0" + licence: ["GPL v3"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bams: + type: file + description: (Mandatory) BAM/CRAM files + pattern: "*.{bam,cram,sam}" + - bais: + type: file + description: (Mandatory) BAM/CRAM index files + pattern: "*.{bai}" + - bamlist: + type: file + description: (Optional) File with list of BAM/CRAM files to impute. One file + per line. + pattern: "*.{txt}" + - reference_haplotype_file: + type: file + description: (Mandatory) Reference haplotype file in IMPUTE format (file with + no header and no rownames, one row per SNP, one column per reference haplotype, + space separated, values must be 0 or 1) + pattern: "*.{hap.gz}" + - reference_legend_file: + type: file + description: (Mandatory) Reference haplotype legend file in IMPUTE format (file + with one row per SNP, and a header including position for the physical position + in 1 based coordinates, a0 for the reference allele, and a1 for the alternate + allele). + pattern: "*.{legend.gz}" + - chr: + type: string + description: (Mandatory) What chromosome to run. Should match BAM headers. + - regions_start: + type: integer + description: (Mandatory) When running imputation, where to start from. The 1-based + position x is kept if regionStart <= x <= regionEnd. + - regions_end: + type: integer + description: (Mandatory) When running imputation, where to stop. + - ngen: + type: integer + description: Number of generations since founding or mixing. Note that the algorithm + is relatively robust to this. Use nGen = 4 * Ne / K if unsure. + - buffer: + type: integer + description: Buffer of region to perform imputation over. So imputation is run + form regionStart-buffer to regionEnd+buffer, and reported for regionStart + to regionEnd, including the bases of regionStart and regionEnd. + - genetic_map_file: + type: file + description: (Optional) File with genetic map information, a file with 3 white-space + delimited entries giving position (1-based), genetic rate map in cM/Mbp, and + genetic map in cM. If no file included, rate is based on physical distance + and expected rate (expRate). + pattern: "*.{txt.gz}" + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - posfile: + type: file + description: (Optional) File with positions of where to impute, lining up one-to-one + with genfile. File is tab seperated with no header, one row per SNP, with + col 1 = chromosome, col 2 = physical position (sorted from smallest to largest), + col 3 = reference base, col 4 = alternate base. Bases are capitalized. + pattern: "*.{txt}" + - phasefile: + type: file + description: (Optional) File with truth phasing results. Supersedes genfile + if both options given. File has a header row with a name for each sample, + matching what is found in the bam file. Each subject is then a tab seperated + column, with 0 = ref and 1 = alt, separated by a vertical bar |, e.g. 0|0 + or 0|1. Note therefore this file has one more row than posfile which has no + header. + pattern: "*.{txt}" + - - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: (Optional) File with reference genome. + pattern: "*.{txt.gz}" +output: + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.vcf.gz": + type: file + description: VCF file with both SNP annotation information and per-sample genotype + information. + pattern: "*.{vcf.gz}" + - tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.vcf.gz.tbi": + type: file + description: TBI file of the VCF. + pattern: "*.{vcf.gz.tbi}" + - rdata: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - 'RData", type: "dir': + type: directory + description: Optional directory path to prepared RData file with reference objects + (useful with --save_prepared_reference=TRUE). + - plots: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - 'plots", type: "dir': + type: directory + description: Optional directory path to save plots. + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@atrigila" +maintainers: + - "@atrigila" diff --git a/modules/nf-core/quilt/quilt/tests/main.nf.test b/modules/nf-core/quilt/quilt/tests/main.nf.test new file mode 100644 index 00000000..d2963026 --- /dev/null +++ b/modules/nf-core/quilt/quilt/tests/main.nf.test @@ -0,0 +1,172 @@ +// Input data +def path = "file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/quilt/" +def bam = "[${path}NA12878.haplotagged.1.0.bam', checkIfExists: true), ${path}NA12878.ont.1.0.bam', checkIfExists: true), ${path}NA12878.illumina.1.0.bam', checkIfExists: true)]" +def bai = "[${path}NA12878.haplotagged.1.0.bam.bai', checkIfExists: true), ${path}NA12878.ont.1.0.bam.bai', checkIfExists: true),${path}NA12878.illumina.1.0.bam.bai', checkIfExists: true)]" + +// Input reference data +def reference_haplotype_file = "file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/quilt/ALL.chr20_GRCh38.genotypes.20170504.chr20.2000001.2100000.noNA12878.hap.gz', checkIfExists: true)" +def reference_legend_file = "file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/quilt/ALL.chr20_GRCh38.genotypes.20170504.chr20.2000001.2100000.noNA12878.legend.gz', checkIfExists: true)" +def genetic_map_file = "file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/quilt/CEU-chr20-final.b38.txt.gz', checkIfExists: true)" + +// Parameters +def chr = "'chr20'" +def regions_start = "2000001" +def regions_end = "2100000" +def ngen = "100" +def buffer = "10000" + + +// (optional) input truth data +def posfile = "file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/quilt/ALL.chr20_GRCh38.genotypes.20170504.chr20.2000001.2100000.posfile.txt', checkIfExists: true)" +def phasefile = "file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/quilt/ALL.chr20_GRCh38.genotypes.20170504.chr20.2000001.2100000.phasefile.txt', checkIfExists: true)" +def posfile_phasefile = "[[ id:'test', chr:'chr20' ], [$posfile], [$phasefile]]" +def fasta = "[[id:'test'], []]" + +// Input channel quilt +def ch_input = "[ id:'test', chr:'chr20' ], $bam, $bai, [], [$reference_haplotype_file], [$reference_legend_file], $chr, $regions_start, $regions_end, $ngen, $buffer" +def ch_input_gmap = "[$ch_input, [$genetic_map_file]]" +def ch_input_nogmap = "[$ch_input, []]" + +nextflow_process { + + name "Test Process QUILT" + script "../main.nf" + process "QUILT_QUILT" + + tag "modules" + tag "modules_nfcore" + tag "quilt/quilt" + tag "quilt" + + test("QUILT") { + config ("./quilt_default.config") + when { + process { + """ + input[0] = $ch_input_gmap + input[1] = $posfile_phasefile + input[2] = $fasta + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("QUILT no optional files") { + config ("./quilt_default.config") + when { + process { + """ + input[0] = $ch_input_nogmap + input[1] = [[id: null], [], []] + input[2] = $fasta + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("QUILT optional output") { + config ("./quilt_optional.config") + when { + process { + """ + input[0] = $ch_input_gmap + input[1] = $posfile_phasefile + input[2] = $fasta + """ + } + } + + then { + def dir = new File(process.out.plots[0][1]) + def list = [] + dir.eachFileRecurse { file -> list << file.getName() } + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf + process.out.tbi + + list.sort() + + process.out.rdata + process.out.versions + ).match() } + ) + } + + } + + test("QUILT no seed") { + config ("./quilt_noseed.config") + when { + process { + """ + input[0] = $ch_input_gmap + input[1] = $posfile_phasefile + input[2] = $fasta + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("QUILT with bamlist") { + config ("./quilt_default.config") + tag "test" + when { + process { + """ + bamlist = Channel.of( + "NA12878.illumina.1.0.bam", + "NA12878.ont.1.0.bam", + "NA12878.haplotagged.1.0.bam" + ).collectFile(name : 'bamlist.txt', newLine : true) + + ch_input = Channel.of([ + [ id:'test', chr:'chr20' ], + $bam, $bai + ]) + .combine(bamlist) + .map { map, bam, bai, bamlist -> [ + map, bam, bai, bamlist, + [$reference_haplotype_file], [$reference_legend_file], + $chr, $regions_start, $regions_end, $ngen, $buffer, + [$genetic_map_file] + ]} + + input[0] = ch_input + input[1] = $posfile_phasefile + input[2] = $fasta + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/quilt/quilt/tests/main.nf.test.snap b/modules/nf-core/quilt/quilt/tests/main.nf.test.snap new file mode 100644 index 00000000..1a07b067 --- /dev/null +++ b/modules/nf-core/quilt/quilt/tests/main.nf.test.snap @@ -0,0 +1,457 @@ +{ + "QUILT": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "chr": "chr20" + }, + "quilt.chr20.2000001.2100000.vcf.gz:md5,32f539c80971e2e8e0c31870be094a25" + ] + ], + "1": [ + [ + { + "id": "test", + "chr": "chr20" + }, + "quilt.chr20.2000001.2100000.vcf.gz.tbi:md5,4607cdcb20599cbebd1ccf76d4dc56ae" + ] + ], + "2": [ + [ + { + "id": "test", + "chr": "chr20" + }, + [ + + ] + ] + ], + "3": [ + + ], + "4": [ + "versions.yml:md5,6d07cd60389ff6981a44004872bd16b7" + ], + "plots": [ + + ], + "rdata": [ + [ + { + "id": "test", + "chr": "chr20" + }, + [ + + ] + ] + ], + "tbi": [ + [ + { + "id": "test", + "chr": "chr20" + }, + "quilt.chr20.2000001.2100000.vcf.gz.tbi:md5,4607cdcb20599cbebd1ccf76d4dc56ae" + ] + ], + "vcf": [ + [ + { + "id": "test", + "chr": "chr20" + }, + "quilt.chr20.2000001.2100000.vcf.gz:md5,32f539c80971e2e8e0c31870be094a25" + ] + ], + "versions": [ + "versions.yml:md5,6d07cd60389ff6981a44004872bd16b7" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-23T17:27:54.607934432" + }, + "QUILT no seed": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "chr": "chr20" + }, + "quilt.chr20.2000001.2100000.vcf.gz:md5,32f539c80971e2e8e0c31870be094a25" + ] + ], + "1": [ + [ + { + "id": "test", + "chr": "chr20" + }, + "quilt.chr20.2000001.2100000.vcf.gz.tbi:md5,4607cdcb20599cbebd1ccf76d4dc56ae" + ] + ], + "2": [ + [ + { + "id": "test", + "chr": "chr20" + }, + [ + + ] + ] + ], + "3": [ + + ], + "4": [ + "versions.yml:md5,6d07cd60389ff6981a44004872bd16b7" + ], + "plots": [ + + ], + "rdata": [ + [ + { + "id": "test", + "chr": "chr20" + }, + [ + + ] + ] + ], + "tbi": [ + [ + { + "id": "test", + "chr": "chr20" + }, + "quilt.chr20.2000001.2100000.vcf.gz.tbi:md5,4607cdcb20599cbebd1ccf76d4dc56ae" + ] + ], + "vcf": [ + [ + { + "id": "test", + "chr": "chr20" + }, + "quilt.chr20.2000001.2100000.vcf.gz:md5,32f539c80971e2e8e0c31870be094a25" + ] + ], + "versions": [ + "versions.yml:md5,6d07cd60389ff6981a44004872bd16b7" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-23T17:29:31.357244889" + }, + "QUILT no optional files": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "chr": "chr20" + }, + "quilt.chr20.2000001.2100000.vcf.gz:md5,3fde483728ef2287416b2340c06aaf85" + ] + ], + "1": [ + [ + { + "id": "test", + "chr": "chr20" + }, + "quilt.chr20.2000001.2100000.vcf.gz.tbi:md5,20d9e8cda03fc84482f3aa53a0c94fb6" + ] + ], + "2": [ + [ + { + "id": "test", + "chr": "chr20" + }, + [ + + ] + ] + ], + "3": [ + + ], + "4": [ + "versions.yml:md5,6d07cd60389ff6981a44004872bd16b7" + ], + "plots": [ + + ], + "rdata": [ + [ + { + "id": "test", + "chr": "chr20" + }, + [ + + ] + ] + ], + "tbi": [ + [ + { + "id": "test", + "chr": "chr20" + }, + "quilt.chr20.2000001.2100000.vcf.gz.tbi:md5,20d9e8cda03fc84482f3aa53a0c94fb6" + ] + ], + "vcf": [ + [ + { + "id": "test", + "chr": "chr20" + }, + "quilt.chr20.2000001.2100000.vcf.gz:md5,3fde483728ef2287416b2340c06aaf85" + ] + ], + "versions": [ + "versions.yml:md5,6d07cd60389ff6981a44004872bd16b7" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-23T17:28:16.39358682" + }, + "QUILT optional output": { + "content": [ + [ + [ + { + "id": "test", + "chr": "chr20" + }, + "quilt.chr20.2000001.2100000.vcf.gz:md5,8352fbcabdd102a8ba2c4490e0834287" + ], + [ + { + "id": "test", + "chr": "chr20" + }, + "quilt.chr20.2000001.2100000.vcf.gz.tbi:md5,88d16933f2ac53058b7a5d5c849dc19a" + ], + "haps.NA12878.chr20.2000001.2100000_igs.1.0.truth.png", + "haps.NA12878.chr20.2000001.2100000_igs.1.it1.gibbs.png", + "haps.NA12878.chr20.2000001.2100000_igs.1.it2.gibbs.png", + "haps.NA12878.chr20.2000001.2100000_igs.1.it3.gibbs.png", + "haps.NA12878.chr20.2000001.2100000_igs.2.0.truth.png", + "haps.NA12878.chr20.2000001.2100000_igs.2.it1.gibbs.png", + "haps.NA12878.chr20.2000001.2100000_igs.2.it2.gibbs.png", + "haps.NA12878.chr20.2000001.2100000_igs.2.it3.gibbs.png", + "haps.NA12878.chr20.2000001.2100000_igs.3.0.truth.png", + "haps.NA12878.chr20.2000001.2100000_igs.3.it1.gibbs.png", + "haps.NA12878.chr20.2000001.2100000_igs.3.it2.gibbs.png", + "haps.NA12878.chr20.2000001.2100000_igs.3.it3.gibbs.png", + "haps.NA12878.chr20.2000001.2100000_igs.4.0.truth.png", + "haps.NA12878.chr20.2000001.2100000_igs.4.it1.gibbs.png", + "haps.NA12878.chr20.2000001.2100000_igs.4.it2.gibbs.png", + "haps.NA12878.chr20.2000001.2100000_igs.4.it3.gibbs.png", + "haps.NA12878.chr20.2000001.2100000_igs.5.0.truth.png", + "haps.NA12878.chr20.2000001.2100000_igs.5.it1.gibbs.png", + "haps.NA12878.chr20.2000001.2100000_igs.5.it2.gibbs.png", + "haps.NA12878.chr20.2000001.2100000_igs.5.it3.gibbs.png", + "haps.NA12878.chr20.2000001.2100000_igs.6.0.truth.png", + "haps.NA12878.chr20.2000001.2100000_igs.6.it1.gibbs.png", + "haps.NA12878.chr20.2000001.2100000_igs.6.it2.gibbs.png", + "haps.NA12878.chr20.2000001.2100000_igs.6.it3.gibbs.png", + "haps.NA12878.chr20.2000001.2100000_igs.7.0.truth.png", + "haps.NA12878.chr20.2000001.2100000_igs.7.it1.gibbs.png", + "haps.NA12878.chr20.2000001.2100000_igs.7.it2.gibbs.png", + "haps.NA12878.chr20.2000001.2100000_igs.7.it3.gibbs.png", + "haps.NA12878.chr20.2000001.2100000_igs.8.0.truth.png", + "haps.NA12878.chr20.2000001.2100000_igs.8.it1.gibbs.png", + "haps.NA12878.chr20.2000001.2100000_igs.8.it2.gibbs.png", + "haps.NA12878.chr20.2000001.2100000_igs.8.it3.gibbs.png", + "haps.NA12878HT.chr20.2000001.2100000_igs.1.0.truth.png", + "haps.NA12878HT.chr20.2000001.2100000_igs.1.it1.gibbs.png", + "haps.NA12878HT.chr20.2000001.2100000_igs.1.it2.gibbs.png", + "haps.NA12878HT.chr20.2000001.2100000_igs.1.it3.gibbs.png", + "haps.NA12878HT.chr20.2000001.2100000_igs.2.0.truth.png", + "haps.NA12878HT.chr20.2000001.2100000_igs.2.it1.gibbs.png", + "haps.NA12878HT.chr20.2000001.2100000_igs.2.it2.gibbs.png", + "haps.NA12878HT.chr20.2000001.2100000_igs.2.it3.gibbs.png", + "haps.NA12878HT.chr20.2000001.2100000_igs.3.0.truth.png", + "haps.NA12878HT.chr20.2000001.2100000_igs.3.it1.gibbs.png", + "haps.NA12878HT.chr20.2000001.2100000_igs.3.it2.gibbs.png", + "haps.NA12878HT.chr20.2000001.2100000_igs.3.it3.gibbs.png", + "haps.NA12878HT.chr20.2000001.2100000_igs.4.0.truth.png", + "haps.NA12878HT.chr20.2000001.2100000_igs.4.it1.gibbs.png", + "haps.NA12878HT.chr20.2000001.2100000_igs.4.it2.gibbs.png", + "haps.NA12878HT.chr20.2000001.2100000_igs.4.it3.gibbs.png", + "haps.NA12878HT.chr20.2000001.2100000_igs.5.0.truth.png", + "haps.NA12878HT.chr20.2000001.2100000_igs.5.it1.gibbs.png", + "haps.NA12878HT.chr20.2000001.2100000_igs.5.it2.gibbs.png", + "haps.NA12878HT.chr20.2000001.2100000_igs.5.it3.gibbs.png", + "haps.NA12878HT.chr20.2000001.2100000_igs.6.0.truth.png", + "haps.NA12878HT.chr20.2000001.2100000_igs.6.it1.gibbs.png", + "haps.NA12878HT.chr20.2000001.2100000_igs.6.it2.gibbs.png", + "haps.NA12878HT.chr20.2000001.2100000_igs.6.it3.gibbs.png", + "haps.NA12878HT.chr20.2000001.2100000_igs.7.0.truth.png", + "haps.NA12878HT.chr20.2000001.2100000_igs.7.it1.gibbs.png", + "haps.NA12878HT.chr20.2000001.2100000_igs.7.it2.gibbs.png", + "haps.NA12878HT.chr20.2000001.2100000_igs.7.it3.gibbs.png", + "haps.NA12878HT.chr20.2000001.2100000_igs.8.0.truth.png", + "haps.NA12878HT.chr20.2000001.2100000_igs.8.it1.gibbs.png", + "haps.NA12878HT.chr20.2000001.2100000_igs.8.it2.gibbs.png", + "haps.NA12878HT.chr20.2000001.2100000_igs.8.it3.gibbs.png", + "haps.NA12878ONT.chr20.2000001.2100000_igs.1.0.truth.png", + "haps.NA12878ONT.chr20.2000001.2100000_igs.1.it1.gibbs.png", + "haps.NA12878ONT.chr20.2000001.2100000_igs.1.it2.gibbs.png", + "haps.NA12878ONT.chr20.2000001.2100000_igs.1.it3.gibbs.png", + "haps.NA12878ONT.chr20.2000001.2100000_igs.2.0.truth.png", + "haps.NA12878ONT.chr20.2000001.2100000_igs.2.it1.gibbs.png", + "haps.NA12878ONT.chr20.2000001.2100000_igs.2.it2.gibbs.png", + "haps.NA12878ONT.chr20.2000001.2100000_igs.2.it3.gibbs.png", + "haps.NA12878ONT.chr20.2000001.2100000_igs.3.0.truth.png", + "haps.NA12878ONT.chr20.2000001.2100000_igs.3.it1.gibbs.png", + "haps.NA12878ONT.chr20.2000001.2100000_igs.3.it2.gibbs.png", + "haps.NA12878ONT.chr20.2000001.2100000_igs.3.it3.gibbs.png", + "haps.NA12878ONT.chr20.2000001.2100000_igs.4.0.truth.png", + "haps.NA12878ONT.chr20.2000001.2100000_igs.4.it1.gibbs.png", + "haps.NA12878ONT.chr20.2000001.2100000_igs.4.it2.gibbs.png", + "haps.NA12878ONT.chr20.2000001.2100000_igs.4.it3.gibbs.png", + "haps.NA12878ONT.chr20.2000001.2100000_igs.5.0.truth.png", + "haps.NA12878ONT.chr20.2000001.2100000_igs.5.it1.gibbs.png", + "haps.NA12878ONT.chr20.2000001.2100000_igs.5.it2.gibbs.png", + "haps.NA12878ONT.chr20.2000001.2100000_igs.5.it3.gibbs.png", + "haps.NA12878ONT.chr20.2000001.2100000_igs.6.0.truth.png", + "haps.NA12878ONT.chr20.2000001.2100000_igs.6.it1.gibbs.png", + "haps.NA12878ONT.chr20.2000001.2100000_igs.6.it2.gibbs.png", + "haps.NA12878ONT.chr20.2000001.2100000_igs.6.it3.gibbs.png", + "haps.NA12878ONT.chr20.2000001.2100000_igs.7.0.truth.png", + "haps.NA12878ONT.chr20.2000001.2100000_igs.7.it1.gibbs.png", + "haps.NA12878ONT.chr20.2000001.2100000_igs.7.it2.gibbs.png", + "haps.NA12878ONT.chr20.2000001.2100000_igs.7.it3.gibbs.png", + "haps.NA12878ONT.chr20.2000001.2100000_igs.8.0.truth.png", + "haps.NA12878ONT.chr20.2000001.2100000_igs.8.it1.gibbs.png", + "haps.NA12878ONT.chr20.2000001.2100000_igs.8.it2.gibbs.png", + "haps.NA12878ONT.chr20.2000001.2100000_igs.8.it3.gibbs.png", + [ + { + "id": "test", + "chr": "chr20" + }, + [ + "QUILT_prepared_reference.chr20.2000001.2100000.RData:md5,c2bbcf91085f33536fbaf094b4f0ea05" + ] + ], + "versions.yml:md5,6d07cd60389ff6981a44004872bd16b7" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-23T17:28:59.999377862" + }, + "QUILT with bamlist": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "chr": "chr20" + }, + "quilt.chr20.2000001.2100000.vcf.gz:md5,9d1da27200354997d7f159d36c4d8166" + ] + ], + "1": [ + [ + { + "id": "test", + "chr": "chr20" + }, + "quilt.chr20.2000001.2100000.vcf.gz.tbi:md5,7d573c34c4e133549623f9b44b27e486" + ] + ], + "2": [ + [ + { + "id": "test", + "chr": "chr20" + }, + [ + + ] + ] + ], + "3": [ + + ], + "4": [ + "versions.yml:md5,6d07cd60389ff6981a44004872bd16b7" + ], + "plots": [ + + ], + "rdata": [ + [ + { + "id": "test", + "chr": "chr20" + }, + [ + + ] + ] + ], + "tbi": [ + [ + { + "id": "test", + "chr": "chr20" + }, + "quilt.chr20.2000001.2100000.vcf.gz.tbi:md5,7d573c34c4e133549623f9b44b27e486" + ] + ], + "vcf": [ + [ + { + "id": "test", + "chr": "chr20" + }, + "quilt.chr20.2000001.2100000.vcf.gz:md5,9d1da27200354997d7f159d36c4d8166" + ] + ], + "versions": [ + "versions.yml:md5,6d07cd60389ff6981a44004872bd16b7" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-10-22T17:05:26.881048883" + } +} \ No newline at end of file diff --git a/modules/nf-core/quilt/quilt/tests/quilt_default.config b/modules/nf-core/quilt/quilt/tests/quilt_default.config new file mode 100644 index 00000000..87f87b9a --- /dev/null +++ b/modules/nf-core/quilt/quilt/tests/quilt_default.config @@ -0,0 +1,6 @@ +process { + cpus = 1 // More than 1 cpu may lead to different md5sum + withName: QUILT_QUILT { + ext.args = "--seed=1" + } +} diff --git a/modules/nf-core/quilt/quilt/tests/quilt_noseed.config b/modules/nf-core/quilt/quilt/tests/quilt_noseed.config new file mode 100644 index 00000000..e9f81a34 --- /dev/null +++ b/modules/nf-core/quilt/quilt/tests/quilt_noseed.config @@ -0,0 +1,6 @@ +process { + cpus = 1 // More than 1 cpu may lead to different md5sum + withName: QUILT_QUILT { + ext.args = "" + } +} diff --git a/modules/nf-core/quilt/quilt/tests/quilt_optional.config b/modules/nf-core/quilt/quilt/tests/quilt_optional.config new file mode 100644 index 00000000..cfbd1353 --- /dev/null +++ b/modules/nf-core/quilt/quilt/tests/quilt_optional.config @@ -0,0 +1,6 @@ +process { + cpus = 1 // More than 1 cpu may lead to different md5sum + withName: QUILT_QUILT { + ext.args = "--save_prepared_reference=TRUE --make_plots=TRUE --seed=1" + } +} diff --git a/modules/nf-core/quilt/quilt/tests/tags.yml b/modules/nf-core/quilt/quilt/tests/tags.yml new file mode 100644 index 00000000..ac1b9092 --- /dev/null +++ b/modules/nf-core/quilt/quilt/tests/tags.yml @@ -0,0 +1,2 @@ +quilt/quilt: + - "modules/nf-core/quilt/quilt/**" diff --git a/modules/nf-core/samtools/coverage/environment.yml b/modules/nf-core/samtools/coverage/environment.yml new file mode 100644 index 00000000..62054fc9 --- /dev/null +++ b/modules/nf-core/samtools/coverage/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/coverage/main.nf b/modules/nf-core/samtools/coverage/main.nf new file mode 100644 index 00000000..060c9298 --- /dev/null +++ b/modules/nf-core/samtools/coverage/main.nf @@ -0,0 +1,50 @@ +process SAMTOOLS_COVERAGE { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" + + input: + tuple val(meta), path(input), path(input_index) + tuple val(meta2), path(fasta), path(fai) + + output: + tuple val(meta), path("*.txt"), emit: coverage + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + samtools \\ + coverage \\ + $args \\ + -o ${prefix}.txt \\ + --reference ${fasta} \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' ) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.txt + echo "#rname\tstartpos\tendpos\tnumreads\tcovbases\tcoverage\tmeandepth\tmeanbaseq\tmeanmapq" > ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/coverage/meta.yml b/modules/nf-core/samtools/coverage/meta.yml new file mode 100644 index 00000000..fb9ba6f3 --- /dev/null +++ b/modules/nf-core/samtools/coverage/meta.yml @@ -0,0 +1,70 @@ +name: "samtools_coverage" +description: produces a histogram or table of coverage per chromosome +keywords: + - depth + - samtools + - bam +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - input_index: + type: file + description: BAM/CRAM index file + pattern: "*.{bai,crai}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference genome file + pattern: "*.{fa,fasta}" + - - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fai: + type: file + description: Reference genome index file + pattern: "*.fai" +output: + - coverage: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.txt": + type: file + description: Tabulated text containing the coverage at each position or region + or an ASCII-art histogram (with --histogram). + pattern: "*.txt" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@LouisLeNezet" +maintainers: + - "@LouisLeNezet" diff --git a/modules/nf-core/samtools/coverage/samtools-coverage.diff b/modules/nf-core/samtools/coverage/samtools-coverage.diff new file mode 100644 index 00000000..14c9be18 --- /dev/null +++ b/modules/nf-core/samtools/coverage/samtools-coverage.diff @@ -0,0 +1,29 @@ +Changes in module 'nf-core/samtools/coverage' +'modules/nf-core/samtools/coverage/environment.yml' is unchanged +Changes in 'samtools/coverage/main.nf': +--- modules/nf-core/samtools/coverage/main.nf ++++ modules/nf-core/samtools/coverage/main.nf +@@ -9,8 +9,7 @@ + + input: + tuple val(meta), path(input), path(input_index) +- tuple val(meta2), path(fasta) +- tuple val(meta3), path(fai) ++ tuple val(meta2), path(fasta), path(fai) + + output: + tuple val(meta), path("*.txt"), emit: coverage +@@ -41,6 +40,7 @@ + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.txt ++ echo "#rname\tstartpos\tendpos\tnumreads\tcovbases\tcoverage\tmeandepth\tmeanbaseq\tmeanmapq" > ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + +'modules/nf-core/samtools/coverage/meta.yml' is unchanged +'modules/nf-core/samtools/coverage/tests/main.nf.test' is unchanged +'modules/nf-core/samtools/coverage/tests/main.nf.test.snap' is unchanged +'modules/nf-core/samtools/coverage/tests/tags.yml' is unchanged +************************************************************ diff --git a/modules/nf-core/samtools/coverage/tests/main.nf.test b/modules/nf-core/samtools/coverage/tests/main.nf.test new file mode 100644 index 00000000..1e3ad5a4 --- /dev/null +++ b/modules/nf-core/samtools/coverage/tests/main.nf.test @@ -0,0 +1,105 @@ +nextflow_process { + + name "Test Process SAMTOOLS_COVERAGE" + script "../main.nf" + process "SAMTOOLS_COVERAGE" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/coverage" + + test("test_samtools_coverage_bam") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'fai' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_samtools_coverage_cram") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'fai' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_samtools_coverage_stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'fai' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/samtools/coverage/tests/main.nf.test.snap b/modules/nf-core/samtools/coverage/tests/main.nf.test.snap new file mode 100644 index 00000000..b9ddb18d --- /dev/null +++ b/modules/nf-core/samtools/coverage/tests/main.nf.test.snap @@ -0,0 +1,107 @@ +{ + "test_samtools_coverage_stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,9c876b9db54dc710c87c404e4b28243c" + ], + "coverage": [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,9c876b9db54dc710c87c404e4b28243c" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T07:53:42.773351407" + }, + "test_samtools_coverage_bam": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt:md5,99a521b3bf53b6acf8055a44a571ea84" + ] + ], + "1": [ + "versions.yml:md5,9c876b9db54dc710c87c404e4b28243c" + ], + "coverage": [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt:md5,99a521b3bf53b6acf8055a44a571ea84" + ] + ], + "versions": [ + "versions.yml:md5,9c876b9db54dc710c87c404e4b28243c" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T07:53:22.798338025" + }, + "test_samtools_coverage_cram": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt:md5,ce896534bac51cfcc97e5508ae907e99" + ] + ], + "1": [ + "versions.yml:md5,9c876b9db54dc710c87c404e4b28243c" + ], + "coverage": [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt:md5,ce896534bac51cfcc97e5508ae907e99" + ] + ], + "versions": [ + "versions.yml:md5,9c876b9db54dc710c87c404e4b28243c" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T07:53:32.409876082" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/coverage/tests/tags.yml b/modules/nf-core/samtools/coverage/tests/tags.yml new file mode 100644 index 00000000..2b4f53c2 --- /dev/null +++ b/modules/nf-core/samtools/coverage/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/coverage: + - "modules/nf-core/samtools/coverage/**" diff --git a/modules/nf-core/samtools/depth/environment.yml b/modules/nf-core/samtools/depth/environment.yml new file mode 100644 index 00000000..62054fc9 --- /dev/null +++ b/modules/nf-core/samtools/depth/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/depth/main.nf b/modules/nf-core/samtools/depth/main.nf new file mode 100644 index 00000000..bb11306b --- /dev/null +++ b/modules/nf-core/samtools/depth/main.nf @@ -0,0 +1,39 @@ +process SAMTOOLS_DEPTH { + tag "$meta1.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" + + input: + tuple val(meta1), path(bam), path(bai) + tuple val(meta2), path(intervals) + + output: + tuple val(meta1), path("*.tsv"), emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta1.id}" + def positions = intervals ? "-b ${intervals}" : "" + """ + samtools \\ + depth \\ + --threads ${task.cpus-1} \\ + $args \\ + $positions \\ + -o ${prefix}.tsv \\ + $bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/depth/meta.yml b/modules/nf-core/samtools/depth/meta.yml new file mode 100644 index 00000000..2acd645d --- /dev/null +++ b/modules/nf-core/samtools/depth/meta.yml @@ -0,0 +1,60 @@ +name: samtools_depth +description: Computes the depth at each position or region. +keywords: + - depth + - samtools + - statistics + - coverage +tools: + - samtools: + description: Tools for dealing with SAM, BAM and CRAM files; samtools depth – + computes the read depth at each position or region + homepage: http://www.htslib.org + documentation: http://www.htslib.org/doc/samtools-depth.html + tool_dev_url: https://github.com/samtools/samtools + doi: "10.1093/bioinformatics/btp352" + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta1: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - intervals: + type: file + description: list of positions or regions in specified bed file + pattern: "*.{bed}" +output: + - tsv: + - meta1: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tsv": + type: file + description: The output of samtools depth has three columns - the name of the + contig or chromosome, the position and the number of reads aligned at that + position + pattern: "*.{tsv}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@louperelo" + - "@nevinwu" +maintainers: + - "@louperelo" + - "@nevinwu" diff --git a/modules/nf-core/samtools/depth/samtools-depth.diff b/modules/nf-core/samtools/depth/samtools-depth.diff new file mode 100644 index 00000000..389b7650 --- /dev/null +++ b/modules/nf-core/samtools/depth/samtools-depth.diff @@ -0,0 +1,19 @@ +Changes in module 'nf-core/samtools/depth' +'modules/nf-core/samtools/depth/environment.yml' is unchanged +Changes in 'samtools/depth/main.nf': +--- modules/nf-core/samtools/depth/main.nf ++++ modules/nf-core/samtools/depth/main.nf +@@ -8,7 +8,7 @@ + 'biocontainers/samtools:1.21--h50ea8bc_0' }" + + input: +- tuple val(meta1), path(bam) ++ tuple val(meta1), path(bam), path(bai) + tuple val(meta2), path(intervals) + + output: + +'modules/nf-core/samtools/depth/meta.yml' is unchanged +'modules/nf-core/samtools/depth/tests/main.nf.test' is unchanged +'modules/nf-core/samtools/depth/tests/main.nf.test.snap' is unchanged +************************************************************ diff --git a/modules/nf-core/samtools/depth/tests/main.nf.test b/modules/nf-core/samtools/depth/tests/main.nf.test new file mode 100644 index 00000000..ddacb540 --- /dev/null +++ b/modules/nf-core/samtools/depth/tests/main.nf.test @@ -0,0 +1,62 @@ + +nextflow_process { + + name "Test Process SAMTOOLS_DEPTH" + script "../main.nf" + process "SAMTOOLS_DEPTH" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/depth" + + test("test-samtools-depth") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam', checkIfExists: true) + ] + input[1] = [[],[]] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test-samtools-depth-intervals") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'bed' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', checkIfExists: true) + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/samtools/depth/tests/main.nf.test.snap b/modules/nf-core/samtools/depth/tests/main.nf.test.snap new file mode 100644 index 00000000..f5523cbf --- /dev/null +++ b/modules/nf-core/samtools/depth/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "test-samtools-depth-intervals": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,a89e2f8e1b66356c7500599fcadb8784" + ] + ], + "1": [ + "versions.yml:md5,8a21701d45a94582920fea7e0ba15eec" + ], + "tsv": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,a89e2f8e1b66356c7500599fcadb8784" + ] + ], + "versions": [ + "versions.yml:md5,8a21701d45a94582920fea7e0ba15eec" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T07:55:28.207892316" + }, + "test-samtools-depth": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,722d2c060944d6b96ae5ae00d8074657" + ] + ], + "1": [ + "versions.yml:md5,8a21701d45a94582920fea7e0ba15eec" + ], + "tsv": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,722d2c060944d6b96ae5ae00d8074657" + ] + ], + "versions": [ + "versions.yml:md5,8a21701d45a94582920fea7e0ba15eec" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T07:55:17.233534429" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/faidx/environment.yml b/modules/nf-core/samtools/faidx/environment.yml new file mode 100644 index 00000000..62054fc9 --- /dev/null +++ b/modules/nf-core/samtools/faidx/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf new file mode 100644 index 00000000..28c0a81c --- /dev/null +++ b/modules/nf-core/samtools/faidx/main.nf @@ -0,0 +1,50 @@ +process SAMTOOLS_FAIDX { + tag "$fasta" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" + + input: + tuple val(meta), path(fasta) + tuple val(meta2), path(fai) + + output: + tuple val(meta), path ("*.{fa,fasta}") , emit: fa , optional: true + tuple val(meta), path ("*.fai") , emit: fai, optional: true + tuple val(meta), path ("*.gzi") , emit: gzi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools \\ + faidx \\ + $fasta \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def match = (task.ext.args =~ /-o(?:utput)?\s(.*)\s?/).findAll() + def fastacmd = match[0] ? "touch ${match[0][1]}" : '' + """ + ${fastacmd} + touch ${fasta}.fai + + cat <<-END_VERSIONS > versions.yml + + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml new file mode 100644 index 00000000..6721b2cb --- /dev/null +++ b/modules/nf-core/samtools/faidx/meta.yml @@ -0,0 +1,80 @@ +name: samtools_faidx +description: Index FASTA file +keywords: + - index + - fasta + - faidx +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: FASTA file + pattern: "*.{fa,fasta}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fai: + type: file + description: FASTA index file + pattern: "*.{fai}" +output: + - fa: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{fa,fasta}": + type: file + description: FASTA file + pattern: "*.{fa}" + - fai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fai": + type: file + description: FASTA index file + pattern: "*.{fai}" + - gzi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.gzi": + type: file + description: Optional gzip index file for compressed inputs + pattern: "*.gzi" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@phue" +maintainers: + - "@drpatelh" + - "@ewels" + - "@phue" diff --git a/modules/nf-core/samtools/faidx/tests/main.nf.test b/modules/nf-core/samtools/faidx/tests/main.nf.test new file mode 100644 index 00000000..17244ef2 --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/main.nf.test @@ -0,0 +1,122 @@ +nextflow_process { + + name "Test Process SAMTOOLS_FAIDX" + script "../main.nf" + process "SAMTOOLS_FAIDX" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/faidx" + + test("test_samtools_faidx") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_samtools_faidx_bgzip") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)] + + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_samtools_faidx_fasta") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + + input[1] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_samtools_faidx_stub_fasta") { + + config "./nextflow2.config" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + + input[1] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_samtools_faidx_stub_fai") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/faidx/tests/main.nf.test.snap b/modules/nf-core/samtools/faidx/tests/main.nf.test.snap new file mode 100644 index 00000000..1bbb3ec2 --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/main.nf.test.snap @@ -0,0 +1,249 @@ +{ + "test_samtools_faidx": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ], + "fa": [ + + ], + "fai": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + + ], + "versions": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T07:57:47.450887871" + }, + "test_samtools_faidx_bgzip": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" + ] + ], + "3": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ], + "fa": [ + + ], + "fai": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" + ] + ], + "versions": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T07:58:04.804905659" + }, + "test_samtools_faidx_fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "extract.fa:md5,6a0774a0ad937ba0bfd2ac7457d90f36" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ], + "fa": [ + [ + { + "id": "test", + "single_end": false + }, + "extract.fa:md5,6a0774a0ad937ba0bfd2ac7457d90f36" + ] + ], + "fai": [ + + ], + "gzi": [ + + ], + "versions": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T07:58:23.831268154" + }, + "test_samtools_faidx_stub_fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "extract.fa:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ], + "fa": [ + [ + { + "id": "test", + "single_end": false + }, + "extract.fa:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "fai": [ + + ], + "gzi": [ + + ], + "versions": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T07:58:35.600243706" + }, + "test_samtools_faidx_stub_fai": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ], + "fa": [ + + ], + "fai": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + + ], + "versions": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T07:58:54.705460167" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/faidx/tests/nextflow.config b/modules/nf-core/samtools/faidx/tests/nextflow.config new file mode 100644 index 00000000..f76a3ba0 --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SAMTOOLS_FAIDX { + ext.args = 'MT192765.1 -o extract.fa' + } + +} diff --git a/modules/nf-core/samtools/faidx/tests/nextflow2.config b/modules/nf-core/samtools/faidx/tests/nextflow2.config new file mode 100644 index 00000000..33ebbd5d --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/nextflow2.config @@ -0,0 +1,6 @@ +process { + + withName: SAMTOOLS_FAIDX { + ext.args = '-o extract.fa' + } +} diff --git a/modules/nf-core/samtools/faidx/tests/tags.yml b/modules/nf-core/samtools/faidx/tests/tags.yml new file mode 100644 index 00000000..e4a83948 --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/faidx: + - modules/nf-core/samtools/faidx/** diff --git a/modules/nf-core/samtools/index/environment.yml b/modules/nf-core/samtools/index/environment.yml new file mode 100644 index 00000000..62054fc9 --- /dev/null +++ b/modules/nf-core/samtools/index/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf new file mode 100644 index 00000000..31175610 --- /dev/null +++ b/modules/nf-core/samtools/index/main.nf @@ -0,0 +1,49 @@ +process SAMTOOLS_INDEX { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.bai") , optional:true, emit: bai + tuple val(meta), path("*.csi") , optional:true, emit: csi + tuple val(meta), path("*.crai"), optional:true, emit: crai + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools \\ + index \\ + -@ ${task.cpus-1} \\ + $args \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def extension = file(input).getExtension() == 'cram' ? + "crai" : args.contains("-c") ? "csi" : "bai" + """ + touch ${input}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml new file mode 100644 index 00000000..db8df0d5 --- /dev/null +++ b/modules/nf-core/samtools/index/meta.yml @@ -0,0 +1,71 @@ +name: samtools_index +description: Index SAM/BAM/CRAM file +keywords: + - index + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: input file +output: + - bai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bai": + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: CSI index file + pattern: "*.{csi}" + - crai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@maxulysse" +maintainers: + - "@drpatelh" + - "@ewels" + - "@maxulysse" diff --git a/modules/nf-core/samtools/index/tests/csi.nextflow.config b/modules/nf-core/samtools/index/tests/csi.nextflow.config new file mode 100644 index 00000000..0ed260ef --- /dev/null +++ b/modules/nf-core/samtools/index/tests/csi.nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SAMTOOLS_INDEX { + ext.args = '-c' + } + +} diff --git a/modules/nf-core/samtools/index/tests/main.nf.test b/modules/nf-core/samtools/index/tests/main.nf.test new file mode 100644 index 00000000..ca34fb5c --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test @@ -0,0 +1,140 @@ +nextflow_process { + + name "Test Process SAMTOOLS_INDEX" + script "../main.nf" + process "SAMTOOLS_INDEX" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/index" + + test("bai") { + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("crai") { + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("csi") { + config "./csi.nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.csi[0][1]).name, + process.out.versions + ).match() } + ) + } + } + + test("bai - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("crai - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("csi - stub") { + options "-stub" + config "./csi.nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/samtools/index/tests/main.nf.test.snap b/modules/nf-core/samtools/index/tests/main.nf.test.snap new file mode 100644 index 00000000..72d65e81 --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test.snap @@ -0,0 +1,250 @@ +{ + "csi - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + + ], + "crai": [ + + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:21:25.261127166" + }, + "crai - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:21:12.653194876" + }, + "bai - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "crai": [ + + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:21:01.854932651" + }, + "csi": { + "content": [ + "test.paired_end.sorted.bam.csi", + [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:20:51.485364222" + }, + "crai": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:20:40.518873972" + }, + "bai": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ], + "crai": [ + + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:20:21.184050361" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/index/tests/tags.yml b/modules/nf-core/samtools/index/tests/tags.yml new file mode 100644 index 00000000..e0f58a7a --- /dev/null +++ b/modules/nf-core/samtools/index/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/index: + - modules/nf-core/samtools/index/** diff --git a/modules/nf-core/samtools/merge/environment.yml b/modules/nf-core/samtools/merge/environment.yml new file mode 100644 index 00000000..62054fc9 --- /dev/null +++ b/modules/nf-core/samtools/merge/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/merge/main.nf b/modules/nf-core/samtools/merge/main.nf new file mode 100644 index 00000000..d01d5e3f --- /dev/null +++ b/modules/nf-core/samtools/merge/main.nf @@ -0,0 +1,60 @@ +process SAMTOOLS_MERGE { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" + + input: + tuple val(meta), path(input_files, stageAs: "?/*"), path(index, stageAs: "?/*") + tuple val(meta2), path(fasta), path(fai) + + output: + tuple val(meta), path("${prefix}.bam") , optional:true, emit: bam + tuple val(meta), path("${prefix}.cram"), optional:true, emit: cram + tuple val(meta), path("*.csi") , optional:true, emit: csi + tuple val(meta), path("*.crai") , optional:true, emit: crai + path "versions.yml" , emit: versions + + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def file_type = input_files instanceof List ? input_files[0].getExtension() : input_files.getExtension() + def reference = fasta ? "--reference ${fasta}" : "" + """ + samtools \\ + merge \\ + --threads ${task.cpus-1} \\ + $args \\ + ${reference} \\ + ${prefix}.${file_type} \\ + $input_files + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" + def file_type = input_files instanceof List ? input_files[0].getExtension() : input_files.getExtension() + def index_type = file_type == "bam" ? "csi" : "crai" + def index = args.contains("--write-index") ? "touch ${prefix}.${index_type}" : "" + """ + touch ${prefix}.${file_type} + ${index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/merge/meta.yml b/modules/nf-core/samtools/merge/meta.yml new file mode 100644 index 00000000..235aa219 --- /dev/null +++ b/modules/nf-core/samtools/merge/meta.yml @@ -0,0 +1,104 @@ +name: samtools_merge +description: Merge BAM or CRAM file +keywords: + - merge + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input_files: + type: file + description: BAM/CRAM file + pattern: "*.{bam,cram,sam}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference file the CRAM was created with (optional) + pattern: "*.{fasta,fa}" + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Index of the reference file the CRAM was created with (optional) + pattern: "*.fai" +output: + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.bam: + type: file + description: BAM file + pattern: "*.{bam}" + - cram: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.cram: + type: file + description: CRAM file + pattern: "*.{cram}" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: BAM index file (optional) + pattern: "*.csi" + - crai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: CRAM index file (optional) + pattern: "*.crai" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@yuukiiwa " + - "@maxulysse" + - "@FriederikeHanssen" + - "@ramprasadn" +maintainers: + - "@drpatelh" + - "@yuukiiwa " + - "@maxulysse" + - "@FriederikeHanssen" + - "@ramprasadn" diff --git a/modules/nf-core/samtools/merge/samtools-merge.diff b/modules/nf-core/samtools/merge/samtools-merge.diff new file mode 100644 index 00000000..c53a01b8 --- /dev/null +++ b/modules/nf-core/samtools/merge/samtools-merge.diff @@ -0,0 +1,24 @@ +Changes in module 'nf-core/samtools/merge' +'modules/nf-core/samtools/merge/environment.yml' is unchanged +Changes in 'samtools/merge/main.nf': +--- modules/nf-core/samtools/merge/main.nf ++++ modules/nf-core/samtools/merge/main.nf +@@ -8,9 +8,8 @@ + 'biocontainers/samtools:1.21--h50ea8bc_0' }" + + input: +- tuple val(meta), path(input_files, stageAs: "?/*") +- tuple val(meta2), path(fasta) +- tuple val(meta3), path(fai) ++ tuple val(meta), path(input_files, stageAs: "?/*"), path(index, stageAs: "?/*") ++ tuple val(meta2), path(fasta), path(fai) + + output: + tuple val(meta), path("${prefix}.bam") , optional:true, emit: bam + +'modules/nf-core/samtools/merge/meta.yml' is unchanged +'modules/nf-core/samtools/merge/tests/index.config' is unchanged +'modules/nf-core/samtools/merge/tests/main.nf.test' is unchanged +'modules/nf-core/samtools/merge/tests/main.nf.test.snap' is unchanged +'modules/nf-core/samtools/merge/tests/tags.yml' is unchanged +************************************************************ diff --git a/modules/nf-core/samtools/merge/tests/index.config b/modules/nf-core/samtools/merge/tests/index.config new file mode 100644 index 00000000..8c5668cf --- /dev/null +++ b/modules/nf-core/samtools/merge/tests/index.config @@ -0,0 +1,3 @@ +process { + ext.args = "--write-index" +} \ No newline at end of file diff --git a/modules/nf-core/samtools/merge/tests/main.nf.test b/modules/nf-core/samtools/merge/tests/main.nf.test new file mode 100644 index 00000000..40b36e82 --- /dev/null +++ b/modules/nf-core/samtools/merge/tests/main.nf.test @@ -0,0 +1,137 @@ +nextflow_process { + + name "Test Process SAMTOOLS_MERGE" + script "../main.nf" + process "SAMTOOLS_MERGE" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/merge" + + test("bams") { + + config "./index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true) ] + ]) + input[1] = [[],[]] + input[2] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bams_bam") }, + { assert snapshot(process.out.cram).match("bams_cram") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("bams_csi") }, + { assert snapshot(process.out.crai).match("bams_crai") }, + { assert snapshot(process.out.versions).match("bams_versions") } + ) + } + } + + test("crams") { + + config "./index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.cram[0][1]).name).match("crams_cram") }, + { assert snapshot(process.out.bam).match("crams_bam") }, + { assert snapshot(file(process.out.crai[0][1]).name).match("crams_crai") }, + { assert snapshot(process.out.csi).match("crams_csi") }, + { assert snapshot(process.out.versions).match("crams_versions") } + ) + } + } + + test("bam") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam', checkIfExists: true) ] + ]) + input[1] = [[],[]] + input[2] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bam_bam") }, + { assert snapshot(process.out.cram).match("bam_cram") }, + { assert snapshot(process.out.crai).match("bam_crai") }, + { assert snapshot(process.out.csi).match("bam_csi") }, + { assert snapshot(process.out.versions).match("bam_versions") } + ) + } + } + + test("bams_stub") { + + config "./index.config" + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true) ] + ]) + input[1] = [[],[]] + input[2] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bams_stub_bam") }, + { assert snapshot(process.out.cram).match("bams_stub_cram") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("bams_stub_csi") }, + { assert snapshot(process.out.crai).match("bams_stub_crai") }, + { assert snapshot(process.out.versions).match("bams_stub_versions") } + ) + } + } +} diff --git a/modules/nf-core/samtools/merge/tests/main.nf.test.snap b/modules/nf-core/samtools/merge/tests/main.nf.test.snap new file mode 100644 index 00000000..0a41e01a --- /dev/null +++ b/modules/nf-core/samtools/merge/tests/main.nf.test.snap @@ -0,0 +1,228 @@ +{ + "crams_cram": { + "content": [ + "test.cram" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:00.647389" + }, + "bams_stub_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:19.937013" + }, + "bams_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:49:24.928616" + }, + "bams_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:49:24.923289" + }, + "bams_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:49:24.925716" + }, + "crams_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:00.655959" + }, + "bam_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:10.319539" + }, + "bam_versions": { + "content": [ + [ + "versions.yml:md5,d51d18a97513e370e43f0c891c51dfc4" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:16:30.476887194" + }, + "bams_csi": { + "content": [ + "test.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:49:24.92719" + }, + "bams_stub_csi": { + "content": [ + "test.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:19.940498" + }, + "bam_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:10.328852" + }, + "bams_stub_versions": { + "content": [ + [ + "versions.yml:md5,d51d18a97513e370e43f0c891c51dfc4" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:16:52.203823961" + }, + "bam_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:10.324219" + }, + "bams_stub_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:19.933153" + }, + "bams_versions": { + "content": [ + [ + "versions.yml:md5,d51d18a97513e370e43f0c891c51dfc4" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:29:57.524363148" + }, + "crams_bam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:00.650652" + }, + "crams_versions": { + "content": [ + [ + "versions.yml:md5,d51d18a97513e370e43f0c891c51dfc4" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:16:06.977096207" + }, + "bam_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:10.33292" + }, + "crams_crai": { + "content": [ + "test.cram.crai" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:00.653512" + }, + "bams_stub_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:19.943839" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/merge/tests/tags.yml b/modules/nf-core/samtools/merge/tests/tags.yml new file mode 100644 index 00000000..b869abcb --- /dev/null +++ b/modules/nf-core/samtools/merge/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/merge: + - "modules/nf-core/samtools/merge/**" diff --git a/modules/nf-core/samtools/reheader/environment.yml b/modules/nf-core/samtools/reheader/environment.yml new file mode 100644 index 00000000..62054fc9 --- /dev/null +++ b/modules/nf-core/samtools/reheader/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/reheader/main.nf b/modules/nf-core/samtools/reheader/main.nf new file mode 100644 index 00000000..b4ba902f --- /dev/null +++ b/modules/nf-core/samtools/reheader/main.nf @@ -0,0 +1,51 @@ +process SAMTOOLS_REHEADER { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0': + 'biocontainers/samtools:1.21--h50ea8bc_0' }" + + input: + tuple val(meta), path(bam), path(bai), val(cmd) + + output: + tuple val(meta), path("*.bam"), emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def cmd_reheader = cmd ? "-c '$cmd'" : "" + + if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + samtools \\ + reheader \\ + $cmd_reheader \\ + $args \\ + $bam \\ + > ${prefix}.bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/reheader/meta.yml b/modules/nf-core/samtools/reheader/meta.yml new file mode 100644 index 00000000..e9699f56 --- /dev/null +++ b/modules/nf-core/samtools/reheader/meta.yml @@ -0,0 +1,52 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json + +name: "samtools_reheader" +description: | + Replace the header in the bam file with the header generated by the command. + This command is much faster than replacing the header with a BAM→SAM→BAM conversion. +keywords: + - reheader + - cram + - bam + - genomics +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - bam: + type: file + description: BAM/CRAM file to be reheaded + pattern: "*.{bam,cram}" +output: + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - "*.bam": + type: file + description: Reheaded BAM/CRAM file + pattern: "*.{bam,cram}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@louislenezet" +maintainers: + - "@louislenezet" diff --git a/modules/nf-core/samtools/reheader/samtools-reheader.diff b/modules/nf-core/samtools/reheader/samtools-reheader.diff new file mode 100644 index 00000000..1c080b8d --- /dev/null +++ b/modules/nf-core/samtools/reheader/samtools-reheader.diff @@ -0,0 +1,35 @@ +Changes in module 'nf-core/samtools/reheader' +'modules/nf-core/samtools/reheader/environment.yml' is unchanged +Changes in 'samtools/reheader/main.nf': +--- modules/nf-core/samtools/reheader/main.nf ++++ modules/nf-core/samtools/reheader/main.nf +@@ -8,7 +8,7 @@ + 'biocontainers/samtools:1.21--h50ea8bc_0' }" + + input: +- tuple val(meta), path(bam) ++ tuple val(meta), path(bam), path(bai), val(cmd) + + output: + tuple val(meta), path("*.bam"), emit: bam +@@ -20,11 +20,13 @@ + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" ++ def cmd_reheader = cmd ? "-c '$cmd'" : "" + + if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + samtools \\ + reheader \\ ++ $cmd_reheader \\ + $args \\ + $bam \\ + > ${prefix}.bam + +'modules/nf-core/samtools/reheader/meta.yml' is unchanged +'modules/nf-core/samtools/reheader/tests/chrdel.config' is unchanged +'modules/nf-core/samtools/reheader/tests/main.nf.test' is unchanged +'modules/nf-core/samtools/reheader/tests/main.nf.test.snap' is unchanged +'modules/nf-core/samtools/reheader/tests/rgdel.config' is unchanged +************************************************************ diff --git a/modules/nf-core/samtools/reheader/tests/chrdel.config b/modules/nf-core/samtools/reheader/tests/chrdel.config new file mode 100644 index 00000000..7b5da49a --- /dev/null +++ b/modules/nf-core/samtools/reheader/tests/chrdel.config @@ -0,0 +1,6 @@ +process { + withName: SAMTOOLS_REHEADER { + ext.args = "--command 'sed -E \"s/^(@SQ.*)(\\tSN:)chr/\\1\\2/\"'" + ext.prefix = { "${meta.id}_chrDel" } + } +} diff --git a/modules/nf-core/samtools/reheader/tests/main.nf.test b/modules/nf-core/samtools/reheader/tests/main.nf.test new file mode 100644 index 00000000..53142525 --- /dev/null +++ b/modules/nf-core/samtools/reheader/tests/main.nf.test @@ -0,0 +1,80 @@ +nextflow_process { + + name "Test Process SAMTOOLS_REHEADER" + script "../main.nf" + process "SAMTOOLS_REHEADER" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/reheader" + + test("homo_sapiens - chrdel") { + config "./chrdel.config" + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - rgdel") { + config "./rgdel.config" + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/samtools/reheader/tests/main.nf.test.snap b/modules/nf-core/samtools/reheader/tests/main.nf.test.snap new file mode 100644 index 00000000..885ee350 --- /dev/null +++ b/modules/nf-core/samtools/reheader/tests/main.nf.test.snap @@ -0,0 +1,107 @@ +{ + "homo_sapiens - chrdel": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_chrDel.bam:md5,957d2d542fb9bbd6511df9c0b5069ce4" + ] + ], + "1": [ + "versions.yml:md5,3ffd4092d9f7ad731794ab8691ea79af" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test_chrDel.bam:md5,957d2d542fb9bbd6511df9c0b5069ce4" + ] + ], + "versions": [ + "versions.yml:md5,3ffd4092d9f7ad731794ab8691ea79af" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-17T08:28:17.577422205" + }, + "homo_sapiens - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,3ffd4092d9f7ad731794ab8691ea79af" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,3ffd4092d9f7ad731794ab8691ea79af" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-17T08:26:34.99215454" + }, + "homo_sapiens - rgdel": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_rgDel.bam:md5,02e7512a76fd9df1905ee90558365b0c" + ] + ], + "1": [ + "versions.yml:md5,3ffd4092d9f7ad731794ab8691ea79af" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test_rgDel.bam:md5,02e7512a76fd9df1905ee90558365b0c" + ] + ], + "versions": [ + "versions.yml:md5,3ffd4092d9f7ad731794ab8691ea79af" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-17T08:23:30.403448169" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/reheader/tests/rgdel.config b/modules/nf-core/samtools/reheader/tests/rgdel.config new file mode 100644 index 00000000..03a2dc85 --- /dev/null +++ b/modules/nf-core/samtools/reheader/tests/rgdel.config @@ -0,0 +1,6 @@ +process { + withName: SAMTOOLS_REHEADER { + ext.args = "--command 'grep -v ^@RG'" + ext.prefix = { "${meta.id}_rgDel" } + } +} diff --git a/modules/nf-core/samtools/view/environment.yml b/modules/nf-core/samtools/view/environment.yml new file mode 100644 index 00000000..62054fc9 --- /dev/null +++ b/modules/nf-core/samtools/view/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/view/main.nf b/modules/nf-core/samtools/view/main.nf new file mode 100644 index 00000000..5260dde8 --- /dev/null +++ b/modules/nf-core/samtools/view/main.nf @@ -0,0 +1,81 @@ +process SAMTOOLS_VIEW { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" + + input: + tuple val(meta), path(input), path(index), val(region), val(subsample) + tuple val(meta2), path(fasta), path(fai) + path qname + + output: + tuple val(meta), path("${prefix}.bam"), emit: bam, optional: true + tuple val(meta), path("${prefix}.cram"), emit: cram, optional: true + tuple val(meta), path("${prefix}.sam"), emit: sam, optional: true + tuple val(meta), path("${prefix}.${file_type}.bai"), emit: bai, optional: true + tuple val(meta), path("${prefix}.${file_type}.csi"), emit: csi, optional: true + tuple val(meta), path("${prefix}.${file_type}.crai"), emit: crai, optional: true + tuple val(meta), path("${prefix}.unselected.${file_type}"), emit: unselected, optional: true + tuple val(meta), path("${prefix}.unselected.${file_type}.{bai,csi,crsi}"), emit: unselected_index, optional: true + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--reference ${fasta}" : "" + def region_cmd = region ? "${region}" : "" + def subsample_cmd = subsample ? "--subsample ${subsample}" : "" + file_type = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt bam") ? "bam" : + args.contains("--output-fmt cram") ? "cram" : + input.getExtension() + readnames = qname ? "--qname-file ${qname} --output-unselected ${prefix}.unselected.${file_type}": "" + if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + samtools \\ + view \\ + --threads ${task.cpus-1} \\ + ${reference} \\ + ${readnames} \\ + $args \\ + ${subsample_cmd} \\ + -o ${prefix}.${file_type} \\ + $input \\ + $args2 \\ + ${region_cmd} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + file_type = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt bam") ? "bam" : + args.contains("--output-fmt cram") ? "cram" : + input.getExtension() + if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + + def index = args.contains("--write-index") ? "touch ${prefix}.${file_type}.csi" : "" + + """ + touch ${prefix}.${file_type} + ${index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/view/meta.yml b/modules/nf-core/samtools/view/meta.yml new file mode 100644 index 00000000..caa7b015 --- /dev/null +++ b/modules/nf-core/samtools/view/meta.yml @@ -0,0 +1,141 @@ +name: samtools_view +description: filter/convert SAM/BAM/CRAM file +keywords: + - view + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - index: + type: file + description: BAM.BAI/BAM.CSI/CRAM.CRAI file (optional) + pattern: "*.{.bai,.csi,.crai}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: Reference file the CRAM was created with (optional) + pattern: "*.{fasta,fa}" + - - qname: + type: file + description: Optional file with read names to output only select alignments + pattern: "*.{txt,list}" +output: + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.bam: + type: file + description: optional filtered/converted BAM file + pattern: "*.{bam}" + - cram: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.cram: + type: file + description: optional filtered/converted CRAM file + pattern: "*.{cram}" + - sam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.sam: + type: file + description: optional filtered/converted SAM file + pattern: "*.{sam}" + - bai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.${file_type}.bai: + type: file + description: optional BAM file index + pattern: "*.{bai}" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.${file_type}.csi: + type: file + description: optional tabix BAM file index + pattern: "*.{csi}" + - crai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.${file_type}.crai: + type: file + description: optional CRAM file index + pattern: "*.{crai}" + - unselected: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.unselected.${file_type}: + type: file + description: optional file with unselected alignments + pattern: "*.unselected.{bam,cram,sam}" + - unselected_index: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.unselected.${file_type}.{bai,csi,crsi}: + type: file + description: index for the "unselected" file + pattern: "*.unselected.{bai,csi,crai}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@joseespinosa" + - "@FriederikeHanssen" + - "@priyanka-surana" +maintainers: + - "@drpatelh" + - "@joseespinosa" + - "@FriederikeHanssen" + - "@priyanka-surana" diff --git a/modules/nf-core/samtools/view/samtools-view.diff b/modules/nf-core/samtools/view/samtools-view.diff new file mode 100644 index 00000000..d6c86ad3 --- /dev/null +++ b/modules/nf-core/samtools/view/samtools-view.diff @@ -0,0 +1,46 @@ +Changes in module 'nf-core/samtools/view' +'modules/nf-core/samtools/view/environment.yml' is unchanged +Changes in 'samtools/view/main.nf': +--- modules/nf-core/samtools/view/main.nf ++++ modules/nf-core/samtools/view/main.nf +@@ -8,8 +8,8 @@ + 'biocontainers/samtools:1.21--h50ea8bc_0' }" + + input: +- tuple val(meta), path(input), path(index) +- tuple val(meta2), path(fasta) ++ tuple val(meta), path(input), path(index), val(region), val(subsample) ++ tuple val(meta2), path(fasta), path(fai) + path qname + + output: +@@ -31,6 +31,8 @@ + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--reference ${fasta}" : "" ++ def region_cmd = region ? "${region}" : "" ++ def subsample_cmd = subsample ? "--subsample ${subsample}" : "" + file_type = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt bam") ? "bam" : + args.contains("--output-fmt cram") ? "cram" : +@@ -44,9 +46,11 @@ + ${reference} \\ + ${readnames} \\ + $args \\ ++ ${subsample_cmd} \\ + -o ${prefix}.${file_type} \\ + $input \\ +- $args2 ++ $args2 \\ ++ ${region_cmd} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + +'modules/nf-core/samtools/view/meta.yml' is unchanged +'modules/nf-core/samtools/view/tests/bam.config' is unchanged +'modules/nf-core/samtools/view/tests/bam_index.config' is unchanged +'modules/nf-core/samtools/view/tests/main.nf.test' is unchanged +'modules/nf-core/samtools/view/tests/main.nf.test.snap' is unchanged +'modules/nf-core/samtools/view/tests/tags.yml' is unchanged +************************************************************ diff --git a/modules/nf-core/samtools/view/tests/bam.config b/modules/nf-core/samtools/view/tests/bam.config new file mode 100644 index 00000000..c10d1081 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/bam.config @@ -0,0 +1,3 @@ +process { + ext.args = "--output-fmt bam" +} \ No newline at end of file diff --git a/modules/nf-core/samtools/view/tests/bam_index.config b/modules/nf-core/samtools/view/tests/bam_index.config new file mode 100644 index 00000000..771ae033 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/bam_index.config @@ -0,0 +1,3 @@ +process { + ext.args = "--output-fmt bam --write-index" +} \ No newline at end of file diff --git a/modules/nf-core/samtools/view/tests/main.nf.test b/modules/nf-core/samtools/view/tests/main.nf.test new file mode 100644 index 00000000..37b81a91 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/main.nf.test @@ -0,0 +1,214 @@ +nextflow_process { + + name "Test Process SAMTOOLS_VIEW" + script "../main.nf" + process "SAMTOOLS_VIEW" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/view" + + test("bam") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true), + [] + ]) + input[1] = [[],[]] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bam_bam") }, + { assert snapshot(process.out.bai).match("bam_bai") }, + { assert snapshot(process.out.crai).match("bam_crai") }, + { assert snapshot(process.out.cram).match("bam_cram") }, + { assert snapshot(process.out.csi).match("bam_csi") }, + { assert snapshot(process.out.sam).match("bam_sam") }, + { assert snapshot(process.out.versions).match("bam_versions") } + ) + } + } + + test("cram") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.cram[0][1]).name).match("cram_cram") }, + { assert snapshot(process.out.bai).match("cram_bai") }, + { assert snapshot(process.out.bam).match("cram_bam") }, + { assert snapshot(process.out.crai).match("cram_crai") }, + { assert snapshot(process.out.csi).match("cram_csi") }, + { assert snapshot(process.out.sam).match("cram_sam") }, + { assert snapshot(process.out.versions).match("cram_versions") } + ) + } + } + + test("cram_to_bam") { + + config "./bam.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + [] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_bam") }, + { assert snapshot(process.out.bai).match("cram_to_bam_bai") }, + { assert snapshot(process.out.crai).match("cram_to_bam_crai") }, + { assert snapshot(process.out.cram).match("cram_to_bam_cram") }, + { assert snapshot(process.out.csi).match("cram_to_bam_csi") }, + { assert snapshot(process.out.sam).match("cram_to_bam_sam") }, + { assert snapshot(process.out.versions).match("cram_to_bam_versions") } + ) + } + } + + test("cram_to_bam_index") { + + config "./bam_index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + [] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_index_bam") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("cram_to_bam_index_csi") }, + { assert snapshot(process.out.bai).match("cram_to_bam_index_bai") }, + { assert snapshot(process.out.crai).match("cram_to_bam_index_crai") }, + { assert snapshot(process.out.cram).match("cram_to_bam_index_cram") }, + { assert snapshot(process.out.sam).match("cram_to_bam_index_sam") }, + { assert snapshot(process.out.versions).match("cram_to_bam_index_versions") } + ) + } + } + + test("cram_to_bam_index_qname") { + + config "./bam_index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + [] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of("testN:2817", "testN:2814").collectFile(name: "readnames.list", newLine: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_index_qname_bam") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("cram_to_bam_index_qname_csi") }, + { assert snapshot(process.out.bai).match("cram_to_bam_index_qname_bai") }, + { assert snapshot(process.out.crai).match("cram_to_bam_index_qname_crai") }, + { assert snapshot(process.out.cram).match("cram_to_bam_index_qname_cram") }, + { assert snapshot(process.out.sam).match("cram_to_bam_index_qname_sam") }, + { assert snapshot(file(process.out.unselected[0][1]).name).match("cram_to_bam_index_qname_unselected") }, + { assert snapshot(file(process.out.unselected_index[0][1]).name).match("cram_to_bam_index_qname_unselected_csi") }, + { assert snapshot(process.out.versions).match("cram_to_bam_index_qname_versions") } + ) + } + } + + test("bam_stub") { + + options "-stub" + config "./bam_index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true), + [] + ]) + input[1] = [[],[]] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bam_stub_bam") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("bam_stub_csi") }, + { assert snapshot(process.out.bai).match("bam_stub_bai") }, + { assert snapshot(process.out.crai).match("bam_stub_crai") }, + { assert snapshot(process.out.cram).match("bam_stub_cram") }, + { assert snapshot(process.out.sam).match("bam_stub_sam") }, + { assert snapshot(process.out.versions).match("bam_stub_versions") } + ) + } + } +} diff --git a/modules/nf-core/samtools/view/tests/main.nf.test.snap b/modules/nf-core/samtools/view/tests/main.nf.test.snap new file mode 100644 index 00000000..63849b03 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/main.nf.test.snap @@ -0,0 +1,528 @@ +{ + "bam_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.256068" + }, + "cram_to_bam_index_csi": { + "content": [ + "test.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.958617" + }, + "bam_stub_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.065301" + }, + "bam_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.258578" + }, + "bam_stub_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.071284" + }, + "bam_stub_versions": { + "content": [ + [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:26:24.461775464" + }, + "cram_to_bam_index_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.972288" + }, + "cram_to_bam_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.999247" + }, + "cram_to_bam_index_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.976457" + }, + "cram_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.497581" + }, + "cram_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.50038" + }, + "cram_to_bam_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.992239" + }, + "cram_to_bam_index_qname_csi": { + "content": [ + "test.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.325496" + }, + "bam_stub_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.079529" + }, + "cram_cram": { + "content": [ + "test.cram" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.490286" + }, + "cram_to_bam_index_qname_unselected_csi": { + "content": [ + "test.unselected.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.328458" + }, + "bam_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.262882" + }, + "cram_to_bam_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.989247" + }, + "cram_to_bam_index_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.967681" + }, + "cram_to_bam_index_qname_versions": { + "content": [ + [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:25:51.953436682" + }, + "cram_to_bam_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.982361" + }, + "cram_to_bam_index_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.95456" + }, + "cram_to_bam_index_versions": { + "content": [ + [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:25:14.475388399" + }, + "cram_to_bam_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.98601" + }, + "cram_to_bam_versions": { + "content": [ + [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:24:49.673441798" + }, + "cram_bam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.495512" + }, + "bam_stub_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.076908" + }, + "cram_to_bam_index_qname_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.328458" + }, + "cram_to_bam_index_qname_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.330789" + }, + "cram_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.493129" + }, + "bam_stub_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.074313" + }, + "cram_to_bam_index_qname_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.322874" + }, + "cram_to_bam_index_qname_unselected": { + "content": [ + "test.unselected.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.322874" + }, + "cram_to_bam_index_qname_unselected_csi": { + "content": [ + "test.unselected.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.328458" + }, + "bam_versions": { + "content": [ + [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:23:27.151650338" + }, + "cram_to_bam_index_qname_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.333248" + }, + "bam_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.259774" + }, + "bam_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.261287" + }, + "cram_to_bam_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.995454" + }, + "cram_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.502625" + }, + "cram_versions": { + "content": [ + [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:24:12.95416913" + }, + "cram_to_bam_index_qname_unselected": { + "content": [ + "test.unselected.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.322874" + }, + "bam_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.264651" + }, + "cram_to_bam_index_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.962863" + }, + "cram_to_bam_index_qname_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.337634" + }, + "bam_stub_csi": { + "content": [ + "test.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.068596" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/view/tests/tags.yml b/modules/nf-core/samtools/view/tests/tags.yml new file mode 100644 index 00000000..4fdf1dd1 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/view: + - "modules/nf-core/samtools/view/**" diff --git a/modules/nf-core/shapeit5/ligate/environment.yml b/modules/nf-core/shapeit5/ligate/environment.yml new file mode 100644 index 00000000..7b8e63bb --- /dev/null +++ b/modules/nf-core/shapeit5/ligate/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::shapeit5=1.0.0 diff --git a/modules/nf-core/shapeit5/ligate/main.nf b/modules/nf-core/shapeit5/ligate/main.nf new file mode 100644 index 00000000..5624d7d9 --- /dev/null +++ b/modules/nf-core/shapeit5/ligate/main.nf @@ -0,0 +1,51 @@ +process SHAPEIT5_LIGATE { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/shapeit5:1.0.0--h0c8ee15_0': + 'biocontainers/shapeit5:1.0.0--h0c8ee15_0'}" + + input: + tuple val(meta), path(input_list), path (input_list_index) + + output: + tuple val(meta), path("*.{vcf,bcf,vcf.gz,bcf.gz}"), emit: merged_variants + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = task.ext.suffix ?: "vcf.gz" + """ + printf "%s\\n" $input_list | tr -d '[],' > all_files.txt + + SHAPEIT5_ligate \\ + $args \\ + --input all_files.txt \\ + --thread $task.cpus \\ + --output ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + shapeit5: "\$(SHAPEIT5_ligate | sed -nr '/Version/p' | grep -o -E '([0-9]+.){1,2}[0-9]' | head -n 1)" + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = task.ext.suffix ?: "vcf.gz" + """ + touch ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + shapeit5: "\$(SHAPEIT5_ligate | sed -nr '/Version/p' | grep -o -E '([0-9]+.){1,2}[0-9]' | head -n 1)" + END_VERSIONS + """ +} diff --git a/modules/nf-core/shapeit5/ligate/meta.yml b/modules/nf-core/shapeit5/ligate/meta.yml new file mode 100644 index 00000000..a8d6db14 --- /dev/null +++ b/modules/nf-core/shapeit5/ligate/meta.yml @@ -0,0 +1,55 @@ +name: "shapeit5_ligate" +description: | + Ligate multiple phased BCF/VCF files into a single whole chromosome file. + Typically run to ligate multiple chunks of phased common variants. +keywords: + - ligate + - haplotype + - shapeit +tools: + - "shapeit5": + description: "Fast and accurate method for estimation of haplotypes (phasing)" + homepage: "https://odelaneau.github.io/shapeit5/" + documentation: "https://odelaneau.github.io/shapeit5/docs/documentation" + tool_dev_url: "https://github.com/odelaneau/shapeit5" + doi: "10.1101/2022.10.19.512867" + licence: ["MIT"] + identifier: biotools:shapeit5 +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input_list: + type: file + description: | + VCF/BCF files containing genotype probabilities (GP field). + The files should be ordered by genomic position. + pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}" + - input_list_index: + type: file + description: VCF/BCF files index. + pattern: "*.csi" +output: + - merged_variants: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{vcf,bcf,vcf.gz,bcf.gz}": + type: file + description: | + Output VCF/BCF file for the merged regions. + Phased information (HS field) is updated accordingly for the full region. + pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@louislenezet" +maintainers: + - "@louislenezet" diff --git a/modules/nf-core/shapeit5/phasecommon/environment.yml b/modules/nf-core/shapeit5/phasecommon/environment.yml new file mode 100644 index 00000000..7b8e63bb --- /dev/null +++ b/modules/nf-core/shapeit5/phasecommon/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::shapeit5=1.0.0 diff --git a/modules/nf-core/shapeit5/phasecommon/main.nf b/modules/nf-core/shapeit5/phasecommon/main.nf new file mode 100644 index 00000000..d0c5d554 --- /dev/null +++ b/modules/nf-core/shapeit5/phasecommon/main.nf @@ -0,0 +1,64 @@ +process SHAPEIT5_PHASECOMMON { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/shapeit5:1.0.0--h0c8ee15_0': + 'biocontainers/shapeit5:1.0.0--h0c8ee15_0'}" + + input: + tuple val(meta) , path(input), path(input_index), path(pedigree), val(region), path(map) + tuple val(meta2), path(reference), path(reference_index) + tuple val(meta3), path(scaffold), path(scaffold_index) + + output: + tuple val(meta), path("*.{vcf,bcf,vcf.gz,bcf.gz}"), emit: phased_variant + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = task.ext.suffix ?: "vcf.gz" + + if ("$input" == "${prefix}.${suffix}") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + + def map_command = map ? "--map $map" : "" + def reference_command = reference ? "--reference $reference" : "" + def scaffold_command = scaffold ? "--scaffold $scaffold" : "" + def pedigree_command = pedigree ? "--pedigree $pedigree" : "" + + """ + SHAPEIT5_phase_common \\ + $args \\ + --input $input \\ + $map_command \\ + $reference_command \\ + $scaffold_command \\ + $pedigree_command \\ + --region $region \\ + --thread $task.cpus \\ + --output ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + shapeit5: "\$(SHAPEIT5_phase_common | sed -nr '/Version/p' | grep -o -E '([0-9]+.){1,2}[0-9]' | head -1)" + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = task.ext.suffix ?: "vcf.gz" + """ + touch ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + shapeit5: "\$(SHAPEIT5_phase_common | sed -nr '/Version/p' | grep -o -E '([0-9]+.){1,2}[0-9]' | head -1)" + END_VERSIONS + """ +} diff --git a/modules/nf-core/shapeit5/phasecommon/meta.yml b/modules/nf-core/shapeit5/phasecommon/meta.yml new file mode 100644 index 00000000..5aa05232 --- /dev/null +++ b/modules/nf-core/shapeit5/phasecommon/meta.yml @@ -0,0 +1,98 @@ +name: "shapeit5_phasecommon" +description: Tool to phase common sites, typically SNP array data, or the first step + of WES/WGS data. +keywords: + - phasing + - haplotype + - shapeit +tools: + - "shapeit5": + description: "Fast and accurate method for estimation of haplotypes (phasing)" + homepage: "https://odelaneau.github.io/shapeit5/" + documentation: "https://odelaneau.github.io/shapeit5/docs/documentation" + tool_dev_url: "https://github.com/odelaneau/shapeit5" + doi: "10.1101/2022.10.19.512867 " + licence: ["MIT"] + identifier: biotools:shapeit5 +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: | + Target dataset in VCF/BCF format defined at all variable positions. + The file could possibly be without GT field (for efficiency reasons a file containing only the positions is recommended). + pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}" + - input_index: + type: file + description: Index file of the input VCF/BCF file containing genotype likelihoods. + pattern: "*.{vcf.gz.csi,bcf.gz.csi}" + - pedigree: + type: file + description: | + Pedigree information in the following format: offspring father mother. + pattern: "*.{txt, tsv}" + - region: + type: string + description: | + Target region, usually a full chromosome (e.g. chr20:1000000-2000000 or chr20). + For chrX, please treat PAR and non-PAR regions as different choromosome in order to avoid mixing ploidy. + pattern: "chrXX:leftBufferPosition-rightBufferPosition" + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reference: + type: file + description: Reference panel of haplotypes in VCF/BCF format. + pattern: "*.{vcf.gz,bcf.gz}" + - reference_index: + type: file + description: Index file of the Reference panel file. + pattern: "*.{vcf.gz.csi,bcf.gz.csi}" + - - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - scaffold: + type: file + description: Scaffold of haplotypes in VCF/BCF format. + pattern: "*.{vcf.gz,bcf.gz}" + - scaffold_index: + type: file + description: Index file of the scaffold file. + pattern: "*.{vcf.gz.csi,bcf.gz.csi}" + - - meta4: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - map: + type: file + description: File containing the genetic map. + pattern: "*.gmap" +output: + - phased_variant: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{vcf,bcf,vcf.gz,bcf.gz}": + type: file + description: Phased variant dataset in VCF/BCF format. + pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@LouisLeNezet" +maintainers: + - "@LouisLeNezet" diff --git a/modules/nf-core/shapeit5/phasecommon/shapeit5-phasecommon.diff b/modules/nf-core/shapeit5/phasecommon/shapeit5-phasecommon.diff new file mode 100644 index 00000000..7afc3136 --- /dev/null +++ b/modules/nf-core/shapeit5/phasecommon/shapeit5-phasecommon.diff @@ -0,0 +1,20 @@ +Changes in module 'nf-core/shapeit5/phasecommon' +'modules/nf-core/shapeit5/phasecommon/meta.yml' is unchanged +Changes in 'shapeit5/phasecommon/main.nf': +--- modules/nf-core/shapeit5/phasecommon/main.nf ++++ modules/nf-core/shapeit5/phasecommon/main.nf +@@ -8,10 +8,9 @@ + 'biocontainers/shapeit5:1.0.0--h0c8ee15_0'}" + + input: +- tuple val(meta) , path(input), path(input_index), path(pedigree), val(region) ++ tuple val(meta) , path(input), path(input_index), path(pedigree), val(region), path(map) + tuple val(meta2), path(reference), path(reference_index) + tuple val(meta3), path(scaffold), path(scaffold_index) +- tuple val(meta4), path(map) + + output: + tuple val(meta), path("*.{vcf,bcf,vcf.gz,bcf.gz}"), emit: phased_variant + +'modules/nf-core/shapeit5/phasecommon/environment.yml' is unchanged +************************************************************ diff --git a/modules/nf-core/stitch/environment.yml b/modules/nf-core/stitch/environment.yml new file mode 100644 index 00000000..4978bec2 --- /dev/null +++ b/modules/nf-core/stitch/environment.yml @@ -0,0 +1,8 @@ +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::r-base=4.3.1 + - conda-forge::rsync=3.2.7 + - bioconda::r-stitch=1.6.10 + - bioconda::htslib=1.18 diff --git a/modules/nf-core/stitch/main.nf b/modules/nf-core/stitch/main.nf new file mode 100644 index 00000000..83a62741 --- /dev/null +++ b/modules/nf-core/stitch/main.nf @@ -0,0 +1,85 @@ +process STITCH { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/r-stitch:1.6.10--r43h06b5641_0': + 'biocontainers/r-stitch:1.6.10--r43h06b5641_0' }" + + input: + tuple val(meta), path(collected_crams), path(collected_crais), path(cramlist), path(posfile), path(input, stageAs: "input"), path(rdata, stageAs: "RData_in"), val(chromosome_name), val(K), val(nGen) + tuple val(meta3), path(fasta), path(fasta_fai) + val seed + + output: + tuple val(meta), path("input", type: "dir") , emit: input + tuple val(meta), path("RData", type: "dir") , emit: rdata + tuple val(meta), path("plots", type: "dir") , emit: plots , optional: { generate_input_only } + tuple val(meta), path("*.vcf.gz") , emit: vcf , optional: { generate_input_only || bgen_output } + tuple val(meta), path("*.bgen") , emit: bgen , optional: { generate_input_only || !bgen_output } + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: "" + def args2 = task.ext.args2 ?: "" + def generate_input_only = args2.contains( "--generateInputOnly TRUE" ) + def bgen_output = args2.contains( "--output_format bgen" ) + def reads_ext = collected_crams ? collected_crams.extension.unique() : [] + def rsync_cmd = rdata ? "rsync -rL ${rdata}/ RData" : "" + def stitch_cmd = seed ? "Rscript <(cat \$(which STITCH.R) | tail -n +2 | cat <(echo 'set.seed(${seed})') -)" : "STITCH.R" + def cramlist_cmd = cramlist && reads_ext == ["cram"] ? "--cramlist ${cramlist}" : "" + def bamlist_cmd = cramlist && reads_ext == ["bam" ] ? "--bamlist ${cramlist}" : "" + def reference_cmd = fasta ? "--reference ${fasta}" : "" + def regenerate_input_cmd = input && rdata && !cramlist ? "--regenerateInput FALSE --originalRegionName ${chromosome_name}" : "" + def rsync_version_cmd = rdata ? "rsync: \$(rsync --version | head -n1 | sed 's/^rsync version //; s/ .*\$//')" : "" + """ + ${rsync_cmd} ${args} + + ${stitch_cmd} \\ + --chr ${chromosome_name} \\ + --posfile ${posfile} \\ + --outputdir . \\ + --nCores ${task.cpus} \\ + --K ${K} \\ + --nGen ${nGen} \\ + ${cramlist_cmd} \\ + ${bamlist_cmd} \\ + ${reference_cmd} \\ + ${regenerate_input_cmd} \\ + ${args2} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ${rsync_version_cmd} + r-base: \$(Rscript -e "cat(strsplit(R.version[['version.string']], ' ')[[1]][3])") + r-stitch: \$(Rscript -e "cat(as.character(utils::packageVersion('STITCH')))") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: "" + def args2 = task.ext.args2 ?: "" + def generate_input_only = args2.contains( "--generateInputOnly TRUE" ) + def generate_plots_cmd = !generate_input_only ? "mkdir plots" : "" + def generate_vcf_cmd = !generate_input_only ? "touch ${prefix}.vcf.gz" : "" + def rsync_version_cmd = rdata ? "rsync: \$(rsync --version | head -n1 | sed 's/^rsync version //; s/ .*\$//')" : "" + """ + touch input + touch RData + ${generate_plots_cmd} + ${generate_vcf_cmd} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ${rsync_version_cmd} + r-base: \$(Rscript -e "cat(strsplit(R.version[['version.string']], ' ')[[1]][3])") + r-stitch: \$(Rscript -e "cat(as.character(utils::packageVersion('STITCH')))") + END_VERSIONS + """ +} diff --git a/modules/nf-core/stitch/meta.yml b/modules/nf-core/stitch/meta.yml new file mode 100644 index 00000000..0035d346 --- /dev/null +++ b/modules/nf-core/stitch/meta.yml @@ -0,0 +1,156 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: "stitch" +description: "STITCH is an R program for reference panel free, read aware, low coverage + sequencing genotype imputation. STITCH runs on a set of samples with sequencing + reads in BAM format, as well as a list of positions to genotype, and outputs imputed + genotypes in VCF format." +keywords: + - imputation + - genomics + - vcf + - bgen + - cram + - bam + - sam +tools: + - "stitch": + description: "STITCH - Sequencing To Imputation Through Constructing Haplotypes" + homepage: "https://github.com/rwdavies/stitch" + documentation: "https://github.com/rwdavies/stitch" + tool_dev_url: "https://github.com/rwdavies/stitch" + doi: "10.1038/ng.3594" + licence: ["GPL v3"] + identifier: biotools:stitch-snijderlab +input: + - - meta: + type: map + description: | + Groovy Map containing information about the set of samples + e.g. `[ id:'test' ]` + - collected_crams: + type: file + description: List of sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - collected_crais: + type: file + description: List of BAM/CRAM/SAM index files + pattern: "*.{bai,crai,sai}" + - cramlist: + type: file + description: | + Text file with the path to the cram files to use in imputation, one per line. Since the cram files are staged to the working directory for the process, this file should just contain the file names without any pre-pending path. + pattern: "*.txt" + - - meta2: + type: map + description: | + Groovy Map containing information about the set of positions to run the imputation over + e.g. `[ id:'test' ]` + - posfile: + type: file + description: | + Tab-separated file describing the variable positions to be used for imputation. Refer to the documentation for the `--posfile` argument of STITCH for more information. + pattern: "*.tsv" + - input: + type: directory + description: | + Folder of pre-generated input RData objects used when STITCH is called with the `--regenerateInput FALSE` flag. It is generated by running STITCH with the `--generateInputOnly TRUE` flag. + pattern: "input" + - rdata: + type: directory + description: | + Folder of pre-generated input RData objects used when STITCH is called with the `--regenerateInput FALSE` flag. It is generated by running STITCH with the `--generateInputOnly TRUE` flag. + pattern: "RData" + - chromosome_name: + type: string + description: Name of the chromosome to impute. Should match a chromosome name + in the reference genome. + - K: + type: integer + description: Number of ancestral haplotypes to use for imputation. Refer to + the documentation for the `--K` argument of STITCH for more information. + - nGen: + type: integer + description: Number of generations since founding of the population to use for + imputation. Refer to the documentation for the `--nGen` argument of STITCH + for more information. + - - meta3: + type: map + description: | + Groovy Map containing information about the reference genome used + e.g. `[ id:'test' ]` + - fasta: + type: file + description: FASTA reference genome file + pattern: "*.{fa,fasta}" + - fasta_fai: + type: file + description: FASTA index file + pattern: "*.{fai}" + - - seed: + type: integer + description: Seed for random number generation +output: + - input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - 'input", type: "dir': + type: directory + description: | + Folder of pre-generated input RData objects used when STITCH is called with the `--regenerateInput FALSE` flag. It is generated by running STITCH with the `--generateInputOnly TRUE` flag. + pattern: "input" + - rdata: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - 'RData", type: "dir': + type: directory + description: | + Folder of pre-generated input RData objects used when STITCH is called with the `--regenerateInput FALSE` flag. It is generated by running STITCH with the `--generateInputOnly TRUE` flag. + pattern: "RData" + - plots: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - 'plots", type: "dir': + type: directory + description: | + Folder containing plots produced by STITCH during imputation. Which plots are produced depends on the command-line arguments passed to STITCH. + pattern: "plots" + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - "*.vcf.gz": + type: file + description: | + Imputed genotype calls for the positions in `posfile`, in vcf format. This is the default output. + pattern: ".vcf.gz" + - bgen: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - "*.bgen": + type: file + description: | + Imputed genotype calls for the positions in `posfile`, in vcf format. This is the produced if `--output_format bgen` is specified. + pattern: ".bgen" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@saulpierotti" +maintainers: + - "@saulpierotti" diff --git a/modules/nf-core/stitch/stitch.diff b/modules/nf-core/stitch/stitch.diff new file mode 100644 index 00000000..78ceb435 --- /dev/null +++ b/modules/nf-core/stitch/stitch.diff @@ -0,0 +1,22 @@ +Changes in module 'nf-core/stitch' +'modules/nf-core/stitch/environment.yml' is unchanged +Changes in 'stitch/main.nf': +--- modules/nf-core/stitch/main.nf ++++ modules/nf-core/stitch/main.nf +@@ -8,8 +8,7 @@ + 'biocontainers/r-stitch:1.6.10--r43h06b5641_0' }" + + input: +- tuple val(meta), path(collected_crams), path(collected_crais), path(cramlist) +- tuple val(meta2), path(posfile), path(input, stageAs: "input"), path(rdata, stageAs: "RData_in"), val(chromosome_name), val(K), val(nGen) ++ tuple val(meta), path(collected_crams), path(collected_crais), path(cramlist), path(posfile), path(input, stageAs: "input"), path(rdata, stageAs: "RData_in"), val(chromosome_name), val(K), val(nGen) + tuple val(meta3), path(fasta), path(fasta_fai) + val seed + + +'modules/nf-core/stitch/meta.yml' is unchanged +'modules/nf-core/stitch/tests/main.nf.test' is unchanged +'modules/nf-core/stitch/tests/main.nf.test.snap' is unchanged +'modules/nf-core/stitch/tests/stitch_generate_input.config' is unchanged +'modules/nf-core/stitch/tests/stitch_impute_only.config' is unchanged +************************************************************ diff --git a/modules/nf-core/stitch/tests/main.nf.test b/modules/nf-core/stitch/tests/main.nf.test new file mode 100644 index 00000000..9a7d1803 --- /dev/null +++ b/modules/nf-core/stitch/tests/main.nf.test @@ -0,0 +1,178 @@ +def pathbam = "file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/" +def pathgenome = "file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/" +// positions and essential parameters +def posfile = "${pathgenome}dbsnp_138.hg38.first_10_biallelic_sites.tsv', checkIfExists: true)" +def stitch_params = "[ [ id: 'test_positions' ], $posfile, [], [], 'chr21', 2, 1 ]" + +// sequencing data in cram format +def crams_val = "[${pathbam}cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true), ${pathbam}cram/test2.paired_end.recalibrated.sorted.cram', checkIfExists: true)]" +def crais_val = "[${pathbam}cram/test.paired_end.recalibrated.sorted.cram.crai', checkIfExists: true), ${pathbam}cram/test2.paired_end.recalibrated.sorted.cram.crai', checkIfExists: true)]" +def reads_cram = "[ [ id: 'test_reads' ], $crams_val, $crais_val ]" + +// sequencing data in bam format +def bams_val = "[${pathbam}bam/test.paired_end.recalibrated.sorted.bam', checkIfExists: true), ${pathbam}bam/test2.paired_end.recalibrated.sorted.bam', checkIfExists: true)]" +def bais_val = "[${pathbam}bam/test.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true), ${pathbam}bam/test2.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true)]" +def reads_bam = "[ [ id:'test_reads' ], $bams_val, $bais_val ]" + +// reference genome +def reference = "[[ id:'test_reference' ], ${pathgenome}genome.fasta', checkIfExists: true), ${pathgenome}genome.fasta.fai', checkIfExists: true)]" + +// for reproducibility +def seed = 1 + +nextflow_process { + name "Test Process STITCH" + script "../main.nf" + process "STITCH" + + tag "modules" + tag "modules_nfcore" + tag "stitch" + + test("test_no_seed") { + when { + process { + """ + filelist = Channel.fromPath( $crams_val ) + .map { it[-1] as String } // get only filename + .collectFile( name: "cramlist.txt", newLine: true, sort: true ) + + input[0] = Channel.of( $reads_cram ).combine( filelist ) + input[1] = $stitch_params + input[2] = $reference + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.input.collect{ file(it[1]).listFiles().sort().name }, + process.out.rdata.collect{ file(it[1]).listFiles().sort().name }, + process.out.plots.collect{ file(it[1]).listFiles().sort().name }, + process.out.vcf.collect{ file(it[1]).name }, + process.out.versions + ).match() } + ) + } + } + + test("test_with_seed") { + when { + process { + """ + filelist = Channel.fromPath( $crams_val ) + .map { it[-1] as String } // get only filename + .collectFile( name: "cramlist.txt", newLine: true, sort: true ) + input[0] = Channel.of( $reads_cram ).combine( filelist ) + input[1] = $stitch_params + input[2] = $reference + input[3] = $seed + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.input.collect{ file(it[1]).listFiles().sort().name }, + process.out.rdata.collect{ file(it[1]).listFiles().sort().name }, + process.out.plots.collect{ file(it[1]).listFiles().sort().name }, + process.out.vcf.collect{ file(it[1]).name }, + process.out.versions + ).match() } + ) + } + } + + test("test_two_stage_imputation") { + setup { + run ("STITCH", alias: "STITCH_GENERATE_INPUTS") { + script "../main.nf" + config "./stitch_generate_input.config" + process { + """ + filelist = Channel.fromPath( $crams_val ) + .map { it[-1] as String } // get only filename + .collectFile( name: "cramlist.txt", newLine: true, sort: true ) + input[0] = Channel.of( $reads_cram ).combine( filelist ) + input[1] = $stitch_params + input[2] = $reference + input[3] = $seed + """ + } + } + } + + when { + config "./stitch_impute_only.config" + process { + """ + ch_input_2step = Channel.of( $stitch_params ) + .map { + meta, positions, target, rdata, chromosome_name, K, nGen -> + [ meta, positions ] + } + .combine( + STITCH_GENERATE_INPUTS.out.input + .join ( STITCH_GENERATE_INPUTS.out.rdata ) + ) + .map { + meta, positions, metaT, target, rdata -> + [ metaT, positions, target, rdata, "chr21", 2, 1 ] + } + input[0] = [[id: null], [], [], []] + input[1] = ch_input_2step + input[2] = [[id: null], [], []] + input[3] = $seed + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.input.collect{ file(it[1]).listFiles().sort().name }, + process.out.rdata.collect{ file(it[1]).listFiles().sort().name }, + process.out.plots.collect{ file(it[1]).listFiles().sort().name }, + process.out.vcf.collect{ file(it[1]).name }, + process.out.versions + ).match() } + ) + } + } + + test("test_with_bam") { + when { + process { + """ + filelist = Channel.fromPath( $bams_val ) + .map { it[-1] as String } // get only filename + .collectFile( name: "cramlist.txt", newLine: true, sort: true ) + input[0] = Channel.of( $reads_bam ).combine( filelist ) + input[1] = $stitch_params + input[2] = $reference + input[3] = $seed + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.input.collect{ file(it[1]).listFiles().sort().name }, + process.out.rdata.collect{ file(it[1]).listFiles().sort().name }, + process.out.plots.collect{ file(it[1]).listFiles().sort().name }, + process.out.vcf.collect{ file(it[1]).name }, + process.out.versions + ).match() } + ) + } + } + +} diff --git a/modules/nf-core/stitch/tests/main.nf.test.snap b/modules/nf-core/stitch/tests/main.nf.test.snap new file mode 100644 index 00000000..ee28277f --- /dev/null +++ b/modules/nf-core/stitch/tests/main.nf.test.snap @@ -0,0 +1,170 @@ +{ + "test_with_bam": { + "content": [ + [ + [ + "sample.1.input.chr21.RData", + "sample.2.input.chr21.RData" + ] + ], + [ + [ + "EM.all.chr21.RData", + "end.chr21.RData", + "endEM.chr21.RData", + "sampleNames.chr21.RData", + "start.chr21.RData", + "startEM.chr21.RData" + ] + ], + [ + [ + "alphaMat.chr21.all.s.1.png", + "alphaMat.chr21.normalized.s.1.png", + "hapSum.chr21.s.1.png", + "hapSum_log.chr21.s.1.png", + "metricsForPostImputationQC.chr21.sample.jpg", + "metricsForPostImputationQCChromosomeWide.chr21.sample.jpg", + "r2.chr21.goodonly.jpg" + ] + ], + [ + "stitch.chr21.vcf.gz" + ], + [ + "versions.yml:md5,23ba0a3cec10c1bdb411b3e1f31b008e" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-10-22T20:16:19.412761604" + }, + "test_no_seed": { + "content": [ + [ + [ + "sample.1.input.chr21.RData", + "sample.2.input.chr21.RData" + ] + ], + [ + [ + "EM.all.chr21.RData", + "end.chr21.RData", + "endEM.chr21.RData", + "sampleNames.chr21.RData", + "start.chr21.RData", + "startEM.chr21.RData" + ] + ], + [ + [ + "alphaMat.chr21.all.s.1.png", + "alphaMat.chr21.normalized.s.1.png", + "hapSum.chr21.s.1.png", + "hapSum_log.chr21.s.1.png", + "metricsForPostImputationQC.chr21.sample.jpg", + "metricsForPostImputationQCChromosomeWide.chr21.sample.jpg", + "r2.chr21.goodonly.jpg" + ] + ], + [ + "stitch.chr21.vcf.gz" + ], + [ + "versions.yml:md5,23ba0a3cec10c1bdb411b3e1f31b008e" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-10-22T18:49:25.941823568" + }, + "test_two_stage_imputation": { + "content": [ + [ + [ + "sample.1.input.chr21.RData", + "sample.2.input.chr21.RData" + ] + ], + [ + [ + "EM.all.chr21.RData", + "end.chr21.RData", + "endEM.chr21.RData", + "sampleNames.chr21.RData", + "start.chr21.RData", + "startEM.chr21.RData" + ] + ], + [ + [ + "alphaMat.chr21.all.s.1.png", + "alphaMat.chr21.normalized.s.1.png", + "hapSum.chr21.s.1.png", + "hapSum_log.chr21.s.1.png", + "metricsForPostImputationQC.chr21.sample.jpg", + "metricsForPostImputationQCChromosomeWide.chr21.sample.jpg", + "r2.chr21.goodonly.jpg" + ] + ], + [ + "stitch.chr21.vcf.gz" + ], + [ + "versions.yml:md5,019f1127aacfb7d14a0c4ae3d740a07d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-10-22T20:14:01.151837306" + }, + "test_with_seed": { + "content": [ + [ + [ + "sample.1.input.chr21.RData", + "sample.2.input.chr21.RData" + ] + ], + [ + [ + "EM.all.chr21.RData", + "end.chr21.RData", + "endEM.chr21.RData", + "sampleNames.chr21.RData", + "start.chr21.RData", + "startEM.chr21.RData" + ] + ], + [ + [ + "alphaMat.chr21.all.s.1.png", + "alphaMat.chr21.normalized.s.1.png", + "hapSum.chr21.s.1.png", + "hapSum_log.chr21.s.1.png", + "metricsForPostImputationQC.chr21.sample.jpg", + "metricsForPostImputationQCChromosomeWide.chr21.sample.jpg", + "r2.chr21.goodonly.jpg" + ] + ], + [ + "stitch.chr21.vcf.gz" + ], + [ + "versions.yml:md5,23ba0a3cec10c1bdb411b3e1f31b008e" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-10-22T18:59:46.592393397" + } +} \ No newline at end of file diff --git a/modules/nf-core/stitch/tests/stitch_generate_input.config b/modules/nf-core/stitch/tests/stitch_generate_input.config new file mode 100644 index 00000000..cb629a8e --- /dev/null +++ b/modules/nf-core/stitch/tests/stitch_generate_input.config @@ -0,0 +1,6 @@ +process { + withName: STITCH_GENERATE_INPUTS { + ext.args2 = "--generateInputOnly TRUE" + } +} + diff --git a/modules/nf-core/stitch/tests/stitch_impute_only.config b/modules/nf-core/stitch/tests/stitch_impute_only.config new file mode 100644 index 00000000..f3304cea --- /dev/null +++ b/modules/nf-core/stitch/tests/stitch_impute_only.config @@ -0,0 +1,5 @@ +process { + withName: STITCH { + ext.args2 = "--regenerateInputWithDefaultValues TRUE" + } +} diff --git a/modules/nf-core/tabix/bgzip/environment.yml b/modules/nf-core/tabix/bgzip/environment.yml new file mode 100644 index 00000000..017c259d --- /dev/null +++ b/modules/nf-core/tabix/bgzip/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda + +dependencies: + - bioconda::htslib=1.20 + - bioconda::tabix=1.11 diff --git a/modules/nf-core/tabix/bgzip/main.nf b/modules/nf-core/tabix/bgzip/main.nf new file mode 100644 index 00000000..67991c74 --- /dev/null +++ b/modules/nf-core/tabix/bgzip/main.nf @@ -0,0 +1,55 @@ +process TABIX_BGZIP { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/htslib:1.20--h5efdd21_2' : + 'biocontainers/htslib:1.20--h5efdd21_2' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("${output}") , emit: output + tuple val(meta), path("${output}.gzi"), emit: gzi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + in_bgzip = ["gz", "bgz", "bgzf"].contains(input.getExtension()) + extension = in_bgzip ? input.getBaseName().tokenize(".")[-1] : input.getExtension() + output = in_bgzip ? "${prefix}.${extension}" : "${prefix}.${extension}.gz" + command = in_bgzip ? '-d' : '' + // Name the index according to $prefix, unless a name has been requested + if ((args.matches("(^| )-i\\b") || args.matches("(^| )--index(\$| )")) && !args.matches("(^| )-I\\b") && !args.matches("(^| )--index-name\\b")) { + args = args + " -I ${output}.gzi" + } + """ + bgzip $command -c $args -@${task.cpus} $input > ${output} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + in_bgzip = ["gz", "bgz", "bgzf"].contains(input.getExtension()) + output = in_bgzip ? input.getBaseName() : "${prefix}.${input.getExtension()}.gz" + + """ + echo "" | gzip > ${output} + touch ${output}.gzi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/tabix/bgzip/meta.yml b/modules/nf-core/tabix/bgzip/meta.yml new file mode 100644 index 00000000..131e92cf --- /dev/null +++ b/modules/nf-core/tabix/bgzip/meta.yml @@ -0,0 +1,61 @@ +name: tabix_bgzip +description: Compresses/decompresses files +keywords: + - compress + - decompress + - bgzip + - tabix +tools: + - bgzip: + description: | + Bgzip compresses or decompresses files in a similar manner to, and compatible with, gzip. + homepage: https://www.htslib.org/doc/tabix.html + documentation: http://www.htslib.org/doc/bgzip.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:tabix +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: file to compress or to decompress +output: + - output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${output}: + type: file + description: Output compressed/decompressed file + pattern: "*." + - gzi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${output}.gzi: + type: file + description: Optional gzip index file for compressed inputs + pattern: "*.gzi" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@maxulysse" + - "@nvnieuwk" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@maxulysse" + - "@nvnieuwk" diff --git a/modules/nf-core/tabix/bgzip/tests/bgzip_compress.config b/modules/nf-core/tabix/bgzip/tests/bgzip_compress.config new file mode 100644 index 00000000..6b6ff55f --- /dev/null +++ b/modules/nf-core/tabix/bgzip/tests/bgzip_compress.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_BGZIP { + ext.args = ' -i' + } +} diff --git a/modules/nf-core/tabix/bgzip/tests/main.nf.test b/modules/nf-core/tabix/bgzip/tests/main.nf.test new file mode 100644 index 00000000..d784aa07 --- /dev/null +++ b/modules/nf-core/tabix/bgzip/tests/main.nf.test @@ -0,0 +1,111 @@ +nextflow_process { + + name "Test Process TABIX_BGZIP" + script "modules/nf-core/tabix/bgzip/main.nf" + process "TABIX_BGZIP" + + tag "modules" + tag "modules_nfcore" + tag "tabix" + tag "tabix/bgzip" + + test("sarscov2_vcf_bgzip_compress") { + when { + process { + """ + input[0] = [ + [ id:'bgzip_test' ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.output[0][1]).name + ).match("bgzip_test") + } + ) + } + } + + test("homo_genome_bedgz_compress") { + when { + process { + """ + input[0] = [ + [ id:'bedgz_test' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed.gz', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.output[0][1]).name + ).match("bedgz_test") + } + ) + } + } + + test("sarscov2_vcf_bgzip_compress_stub") { + options '-stub' + config "./bgzip_compress.config" + + when { + process { + """ + input[0] = [ + [ id:"test_stub" ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.output[0][1]).name + ).match("test_stub") + } + ) + } + } + + test("sarscov2_vcf_bgzip_compress_gzi") { + config "./bgzip_compress.config" + when { + process { + """ + input[0] = [ + [ id:"gzi_compress_test" ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.gzi[0][1]).name + ).match("gzi_compress_test") + } + ) + } + } +} diff --git a/modules/nf-core/tabix/bgzip/tests/main.nf.test.snap b/modules/nf-core/tabix/bgzip/tests/main.nf.test.snap new file mode 100644 index 00000000..0748143f --- /dev/null +++ b/modules/nf-core/tabix/bgzip/tests/main.nf.test.snap @@ -0,0 +1,218 @@ +{ + "gzi_compress_test": { + "content": [ + "gzi_compress_test.vcf.gz.gzi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-02-19T14:52:29.328146" + }, + "homo_genome_bedgz_compress": { + "content": [ + { + "0": [ + [ + { + "id": "bedgz_test" + }, + "bedgz_test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" + ], + "gzi": [ + + ], + "output": [ + [ + { + "id": "bedgz_test" + }, + "bedgz_test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "versions": [ + "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T11:28:34.159992362" + }, + "test_stub": { + "content": [ + "test_stub.vcf.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-02-19T14:52:20.811489" + }, + "sarscov2_vcf_bgzip_compress": { + "content": [ + { + "0": [ + [ + { + "id": "bgzip_test" + }, + "bgzip_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" + ], + "gzi": [ + + ], + "output": [ + [ + { + "id": "bgzip_test" + }, + "bgzip_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "versions": [ + "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T11:28:22.087769106" + }, + "sarscov2_vcf_bgzip_compress_gzi": { + "content": [ + { + "0": [ + [ + { + "id": "gzi_compress_test" + }, + "gzi_compress_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "1": [ + [ + { + "id": "gzi_compress_test" + }, + "gzi_compress_test.vcf.gz.gzi:md5,26fd00d4e26141cd11561f6e7d4a2ad0" + ] + ], + "2": [ + "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" + ], + "gzi": [ + [ + { + "id": "gzi_compress_test" + }, + "gzi_compress_test.vcf.gz.gzi:md5,26fd00d4e26141cd11561f6e7d4a2ad0" + ] + ], + "output": [ + [ + { + "id": "gzi_compress_test" + }, + "gzi_compress_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "versions": [ + "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T11:28:57.15091665" + }, + "bgzip_test": { + "content": [ + "bgzip_test.vcf.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-02-19T14:52:03.768295" + }, + "bedgz_test": { + "content": [ + "bedgz_test.bed" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-02-19T14:52:12.453855" + }, + "sarscov2_vcf_bgzip_compress_stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_stub" + }, + "test_stub.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test_stub" + }, + "test_stub.vcf.gz.gzi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" + ], + "gzi": [ + [ + { + "id": "test_stub" + }, + "test_stub.vcf.gz.gzi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "output": [ + [ + { + "id": "test_stub" + }, + "test_stub.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T11:28:45.219404786" + } +} \ No newline at end of file diff --git a/modules/nf-core/tabix/bgzip/tests/tags.yml b/modules/nf-core/tabix/bgzip/tests/tags.yml new file mode 100644 index 00000000..de0eec86 --- /dev/null +++ b/modules/nf-core/tabix/bgzip/tests/tags.yml @@ -0,0 +1,2 @@ +tabix/bgzip: + - "modules/nf-core/tabix/bgzip/**" diff --git a/modules/nf-core/tabix/bgzip/tests/vcf_none.config b/modules/nf-core/tabix/bgzip/tests/vcf_none.config new file mode 100644 index 00000000..f3a3c467 --- /dev/null +++ b/modules/nf-core/tabix/bgzip/tests/vcf_none.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_BGZIP { + ext.args = '' + } +} diff --git a/modules/nf-core/tabix/tabix/environment.yml b/modules/nf-core/tabix/tabix/environment.yml new file mode 100644 index 00000000..017c259d --- /dev/null +++ b/modules/nf-core/tabix/tabix/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda + +dependencies: + - bioconda::htslib=1.20 + - bioconda::tabix=1.11 diff --git a/modules/nf-core/tabix/tabix/main.nf b/modules/nf-core/tabix/tabix/main.nf new file mode 100644 index 00000000..13acd670 --- /dev/null +++ b/modules/nf-core/tabix/tabix/main.nf @@ -0,0 +1,45 @@ +process TABIX_TABIX { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/htslib:1.20--h5efdd21_2' : + 'biocontainers/htslib:1.20--h5efdd21_2' }" + + input: + tuple val(meta), path(tab) + + output: + tuple val(meta), path("*.tbi"), optional:true, emit: tbi + tuple val(meta), path("*.csi"), optional:true, emit: csi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + tabix \\ + --threads $task.cpus \\ + $args \\ + $tab + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${tab}.tbi + touch ${tab}.csi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/tabix/tabix/meta.yml b/modules/nf-core/tabix/tabix/meta.yml new file mode 100644 index 00000000..7864832d --- /dev/null +++ b/modules/nf-core/tabix/tabix/meta.yml @@ -0,0 +1,58 @@ +name: tabix_tabix +description: create tabix index from a sorted bgzip tab-delimited genome file +keywords: + - index + - tabix + - vcf +tools: + - tabix: + description: Generic indexer for TAB-delimited genome position files. + homepage: https://www.htslib.org/doc/tabix.html + documentation: https://www.htslib.org/doc/tabix.1.html + doi: 10.1093/bioinformatics/btq671 + licence: ["MIT"] + identifier: biotools:tabix +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - tab: + type: file + description: TAB-delimited genome position file compressed with bgzip + pattern: "*.{bed.gz,gff.gz,sam.gz,vcf.gz}" +output: + - tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tbi": + type: file + description: tabix index file + pattern: "*.{tbi}" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: coordinate sorted index file + pattern: "*.{csi}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@maxulysse" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@maxulysse" diff --git a/modules/nf-core/tabix/tabix/tests/main.nf.test b/modules/nf-core/tabix/tabix/tests/main.nf.test new file mode 100644 index 00000000..102b0d7b --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/main.nf.test @@ -0,0 +1,136 @@ +nextflow_process { + + name "Test Process TABIX_TABIX" + script "modules/nf-core/tabix/tabix/main.nf" + process "TABIX_TABIX" + + tag "modules" + tag "modules_nfcore" + tag "tabix" + tag "tabix/tabix" + + test("sarscov2_bedgz_tbi") { + config "./tabix_bed.config" + when { + process { + """ + input[0] = [ + [ id:'tbi_bed' ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed.gz', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out, + file(process.out.tbi[0][1]).name + ).match() } + ) + } + } + + test("sarscov2_gff_tbi") { + config "./tabix_gff.config" + when { + process { + """ + input[0] = [ + [ id:'tbi_gff' ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out, + file(process.out.tbi[0][1]).name).match() } + ) + } + + } + + test("sarscov2_vcf_tbi") { + config "./tabix_vcf_tbi.config" + when { + process { + """ + input[0] = [ + [ id:'tbi_vcf' ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out, + file(process.out.tbi[0][1]).name + ).match() } + ) + } + + } + + test("sarscov2_vcf_csi") { + config "./tabix_vcf_csi.config" + when { + process { + """ + input[0] = [ + [ id:'vcf_csi' ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out, + file(process.out.csi[0][1]).name + ).match() } + ) + } + + } + + test("sarscov2_vcf_csi_stub") { + config "./tabix_vcf_csi.config" + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'vcf_csi_stub' ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out, + file(process.out.csi[0][1]).name + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/tabix/tabix/tests/main.nf.test.snap b/modules/nf-core/tabix/tabix/tests/main.nf.test.snap new file mode 100644 index 00000000..c2b9ed0b --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/main.nf.test.snap @@ -0,0 +1,212 @@ +{ + "sarscov2_gff_tbi": { + "content": [ + { + "0": [ + [ + { + "id": "tbi_gff" + }, + "genome.gff3.gz.tbi:md5,f79a67d95a98076e04fbe0455d825926" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,07064637fb8a217174052be8e40234e2" + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "tbi_gff" + }, + "genome.gff3.gz.tbi:md5,f79a67d95a98076e04fbe0455d825926" + ] + ], + "versions": [ + "versions.yml:md5,07064637fb8a217174052be8e40234e2" + ] + }, + "genome.gff3.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T12:06:25.653807564" + }, + "sarscov2_bedgz_tbi": { + "content": [ + { + "0": [ + [ + { + "id": "tbi_bed" + }, + "test.bed.gz.tbi:md5,9a761d51cc81835fd1199201fdbcdd5d" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,07064637fb8a217174052be8e40234e2" + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "tbi_bed" + }, + "test.bed.gz.tbi:md5,9a761d51cc81835fd1199201fdbcdd5d" + ] + ], + "versions": [ + "versions.yml:md5,07064637fb8a217174052be8e40234e2" + ] + }, + "test.bed.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T12:06:09.754082161" + }, + "sarscov2_vcf_tbi": { + "content": [ + { + "0": [ + [ + { + "id": "tbi_vcf" + }, + "test.vcf.gz.tbi:md5,d22e5b84e4fcd18792179f72e6da702e" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,07064637fb8a217174052be8e40234e2" + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "tbi_vcf" + }, + "test.vcf.gz.tbi:md5,d22e5b84e4fcd18792179f72e6da702e" + ] + ], + "versions": [ + "versions.yml:md5,07064637fb8a217174052be8e40234e2" + ] + }, + "test.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T12:06:40.042648294" + }, + "sarscov2_vcf_csi_stub": { + "content": [ + { + "0": [ + [ + { + "id": "vcf_csi_stub" + }, + "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "vcf_csi_stub" + }, + "test.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,07064637fb8a217174052be8e40234e2" + ], + "csi": [ + [ + { + "id": "vcf_csi_stub" + }, + "test.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + [ + { + "id": "vcf_csi_stub" + }, + "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,07064637fb8a217174052be8e40234e2" + ] + }, + "test.vcf.gz.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T12:07:08.700367261" + }, + "sarscov2_vcf_csi": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "vcf_csi" + }, + "test.vcf.gz.csi:md5,04b41c1efd9ab3c6b1e008a286e27d2b" + ] + ], + "2": [ + "versions.yml:md5,07064637fb8a217174052be8e40234e2" + ], + "csi": [ + [ + { + "id": "vcf_csi" + }, + "test.vcf.gz.csi:md5,04b41c1efd9ab3c6b1e008a286e27d2b" + ] + ], + "tbi": [ + + ], + "versions": [ + "versions.yml:md5,07064637fb8a217174052be8e40234e2" + ] + }, + "test.vcf.gz.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T12:06:55.362067748" + } +} \ No newline at end of file diff --git a/modules/nf-core/tabix/tabix/tests/tabix_bed.config b/modules/nf-core/tabix/tabix/tests/tabix_bed.config new file mode 100644 index 00000000..7ff05905 --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/tabix_bed.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_TABIX { + ext.args = '-p bed' + } +} \ No newline at end of file diff --git a/modules/nf-core/tabix/tabix/tests/tabix_gff.config b/modules/nf-core/tabix/tabix/tests/tabix_gff.config new file mode 100644 index 00000000..20c0a1e3 --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/tabix_gff.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_TABIX { + ext.args = '-p gff' + } +} \ No newline at end of file diff --git a/modules/nf-core/tabix/tabix/tests/tabix_vcf_csi.config b/modules/nf-core/tabix/tabix/tests/tabix_vcf_csi.config new file mode 100644 index 00000000..eb4f2d7e --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/tabix_vcf_csi.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_TABIX { + ext.args = '-p vcf --csi' + } +} diff --git a/modules/nf-core/tabix/tabix/tests/tabix_vcf_tbi.config b/modules/nf-core/tabix/tabix/tests/tabix_vcf_tbi.config new file mode 100644 index 00000000..2774c8a9 --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/tabix_vcf_tbi.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_TABIX { + ext.args = '-p vcf' + } +} \ No newline at end of file diff --git a/modules/nf-core/tabix/tabix/tests/tags.yml b/modules/nf-core/tabix/tabix/tests/tags.yml new file mode 100644 index 00000000..6eda0653 --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/tags.yml @@ -0,0 +1,2 @@ +tabix/tabix: + - "modules/nf-core/tabix/tabix/**" diff --git a/modules/nf-core/vcflib/vcffixup/environment.yml b/modules/nf-core/vcflib/vcffixup/environment.yml new file mode 100644 index 00000000..d0a99ffd --- /dev/null +++ b/modules/nf-core/vcflib/vcffixup/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::vcflib=1.0.3" diff --git a/modules/nf-core/vcflib/vcffixup/main.nf b/modules/nf-core/vcflib/vcffixup/main.nf new file mode 100644 index 00000000..43f8ea63 --- /dev/null +++ b/modules/nf-core/vcflib/vcffixup/main.nf @@ -0,0 +1,46 @@ +process VCFLIB_VCFFIXUP { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/vcflib:1.0.3--hecb563c_1': + 'biocontainers/vcflib:1.0.3--hecb563c_1' }" + + input: + tuple val(meta), path(vcf), path(tbi) + + output: + tuple val(meta), path("*.vcf.gz"), emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '1.0.3' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + + """ + vcffixup \\ + $vcf | bgzip -c $args > ${prefix}_fixed.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + vcflib: $VERSION + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '1.0.3' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + touch ${prefix}.fixup.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + vcflib: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/vcflib/vcffixup/meta.yml b/modules/nf-core/vcflib/vcffixup/meta.yml new file mode 100644 index 00000000..4d95e4f0 --- /dev/null +++ b/modules/nf-core/vcflib/vcffixup/meta.yml @@ -0,0 +1,50 @@ +name: "vcflib_vcffixup" +description: Generates a VCF stream where AC and NS have been generated for each record + using sample genotypes. +keywords: + - vcf + - vcflib + - vcflib/vcffixup + - AC/NS/AF +tools: + - "vcflib": + description: "Command-line tools for manipulating VCF files" + homepage: https://github.com/vcflib/vcflib + documentation: https://github.com/vcflib/vcflib#USAGE + doi: "10.1101/2021.05.21.445151" + licence: ["MIT"] + identifier: biotools:vcflib +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: Compressed VCF file + pattern: "*.{.vcf.gz,vcf}" + - tbi: + type: file + description: Index of VCF file + pattern: "*.vcf.gz.tbi" +output: + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.vcf.gz": + type: file + description: Compressed VCF file + pattern: "*.vcf.gz" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@atrigila" +maintainers: + - "@atrigila" diff --git a/modules/nf-core/vcflib/vcffixup/tests/main.nf.test b/modules/nf-core/vcflib/vcffixup/tests/main.nf.test new file mode 100644 index 00000000..b17eaae7 --- /dev/null +++ b/modules/nf-core/vcflib/vcffixup/tests/main.nf.test @@ -0,0 +1,60 @@ +nextflow_process { + + name "Test Process VCFLIB_VCFFIXUP" + script "../main.nf" + process "VCFLIB_VCFFIXUP" + + tag "modules" + tag "modules_nfcore" + tag "vcflib" + tag "vcflib/vcffixup" + + test("sarscov2 - vcf") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - vcf - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.vcf[0][1]).name, + process.out.versions + ).match() } + ) + } + + } +} diff --git a/modules/nf-core/vcflib/vcffixup/tests/main.nf.test.snap b/modules/nf-core/vcflib/vcffixup/tests/main.nf.test.snap new file mode 100644 index 00000000..957d8354 --- /dev/null +++ b/modules/nf-core/vcflib/vcffixup/tests/main.nf.test.snap @@ -0,0 +1,50 @@ +{ + "sarscov2 - vcf": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_fixed.vcf.gz:md5,639ca71cddc7f5444f4376cdf474007e" + ] + ], + "1": [ + "versions.yml:md5,fac118ed81e4ca76d3a75c2f47f4c4ce" + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_fixed.vcf.gz:md5,639ca71cddc7f5444f4376cdf474007e" + ] + ], + "versions": [ + "versions.yml:md5,fac118ed81e4ca76d3a75c2f47f4c4ce" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-13T18:48:21.949531776" + }, + "sarscov2 - vcf - stub": { + "content": [ + "test.fixup.vcf.gz", + [ + "versions.yml:md5,fac118ed81e4ca76d3a75c2f47f4c4ce" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-13T18:48:32.365674014" + } +} \ No newline at end of file diff --git a/modules/nf-core/vcflib/vcffixup/tests/tags.yml b/modules/nf-core/vcflib/vcffixup/tests/tags.yml new file mode 100644 index 00000000..51cf4712 --- /dev/null +++ b/modules/nf-core/vcflib/vcffixup/tests/tags.yml @@ -0,0 +1,2 @@ +vcflib/vcffixup: + - "modules/nf-core/vcflib/vcffixup/**" diff --git a/nextflow.config b/nextflow.config index 4a3831e0..ed39be1e 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,147 +1,347 @@ /* - * ------------------------------------------------- - * nf-core/phaseimpute Nextflow config file - * ------------------------------------------------- - * Default config options for all environments. - */ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + nf-core/phaseimpute Nextflow config file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Default config options for all compute environments +---------------------------------------------------------------------------------------- +*/ // Global default params, used in configs params { - // Workflow flags - // TODO nf-core: Specify your pipeline's command line flags - genome = false - reads = "data/*{1,2}.fastq.gz" - single_end = false - outdir = './results' - - // Boilerplate options - name = false - multiqc_config = false - email = false - email_on_fail = false - max_multiqc_email_size = 25.MB - plaintext_email = false - monochrome_logs = false - help = false - igenomes_base = 's3://ngi-igenomes/igenomes/' - tracedir = "${params.outdir}/pipeline_info" - igenomes_ignore = false - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - hostnames = false - config_profile_description = false - config_profile_contact = false - config_profile_url = false - - // Defaults only, expecting to be overwritten - max_memory = 128.GB - max_cpus = 16 - max_time = 240.h + // steps + steps = null + + // Input options + input = null + input_region = null + map = null + tools = null + + // Panel preparation + panel = null + phase = false + normalize = true + compute_freq = false + remove_samples = null + chunk_model = 'sequential' + + // ChrCheck parameters + rename_chr = false + max_chr_names = 4 + + // References + genome = null + igenomes_base = 's3://ngi-igenomes/igenomes/' + igenomes_ignore = false + fasta = null + fasta_fai = null + + // MultiQC options + multiqc_config = null + multiqc_title = null + multiqc_logo = null + max_multiqc_email_size = '25.MB' + multiqc_methods_description = null + multiqc_replace_names = null + multiqc_sample_names = null + + // Simulate + depth = 1 + genotype = null + + // Validation + input_truth = null + bins = "0 0.01 0.05 0.1 0.2 0.5" + min_val_gl = 0.9 + min_val_dp = 5 + + // Imputation + batch_size = 100 + + // QUILT + ngen = 100 + buffer = 10000 + + // STITCH + k_val = 2 + seed = 1 + posfile = null + + // GLIMPSE2 + chunks = null + + // Boilerplate options + outdir = null + publish_dir_mode = 'copy' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + hook_url = null + help = false + help_full = false + show_hidden = false + version = false + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/' -} -// Container slug. Stable releases should specify release tag! -// Developmental code should specify :dev -process.container = 'nfcore/phaseimpute:dev' + // Config options + config_profile_name = null + config_profile_description = null + + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + config_profile_contact = null + config_profile_url = null + + // Schema validation default options + validate_params = true +} // Load base.config by default for all pipelines includeConfig 'conf/base.config' -// Load nf-core custom profiles from different Institutions -try { - includeConfig "${params.custom_config_base}/nfcore_custom.config" -} catch (Exception e) { - System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") -} +// Load modules.config for DSL2 module specific options +includeConfig 'conf/modules.config' + +// initialisation step +includeConfig 'conf/steps/initialisation.config' + +// chrcheck workflow +includeConfig 'conf/steps/chrcheck.config' + +// simulation step +includeConfig 'conf/steps/simulation.config' + +// panel_prep step +includeConfig 'conf/steps/panel_prep.config' + +// imputation step +includeConfig 'conf/steps/imputation_glimpse1.config' +includeConfig 'conf/steps/imputation_quilt.config' +includeConfig 'conf/steps/imputation_stitch.config' +includeConfig 'conf/steps/imputation_glimpse2.config' + +// validation step +includeConfig 'conf/steps/validation.config' profiles { - conda { process.conda = "$baseDir/environment.yml" } - debug { process.beforeScript = 'echo $HOSTNAME' } - docker { - docker.enabled = true - // Avoid this error: - // WARNING: Your kernel does not support swap limit capabilities or the cgroup is not mounted. Memory limited without swap. - // Testing this in nf-core after discussion here https://github.com/nf-core/tools/pull/351 - // once this is established and works well, nextflow might implement this behavior as new default. - docker.runOptions = '-u \$(id -u):\$(id -g)' - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - } - test { includeConfig 'conf/test.config' } + debug { + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false + nextflow.enable.configProcessNamesValidation = true + } + conda { + conda.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + conda.channels = ['conda-forge', 'bioconda'] + apptainer.enabled = false + } + mamba { + conda.enabled = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + docker { + docker.enabled = true + conda.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + docker.runOptions = '-u $(id -u):$(id -g)' + } + arm { + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + conda.enabled = false + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + podman { + podman.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + shifter { + shifter.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + charliecloud { + charliecloud.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + apptainer.enabled = false + } + apptainer { + apptainer.enabled = true + apptainer.autoMounts = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + wave { + apptainer.ociAutoPull = true + singularity.ociAutoPull = true + wave.enabled = true + wave.freeze = true + wave.strategy = 'conda,container' + } + gitpod { + executor.name = 'local' + executor.cpus = 4 + executor.memory = 8.GB + } + + test { includeConfig 'conf/test.config' } + test_full { includeConfig 'conf/test_full.config' } + test_sim { includeConfig 'conf/test_sim.config' } + test_validate { includeConfig 'conf/test_validate.config' } + test_all { includeConfig 'conf/test_all.config' } + test_quilt { includeConfig 'conf/test_quilt.config' } + test_stitch { includeConfig 'conf/test_stitch.config' } + test_glimpse2 { includeConfig 'conf/test_glimpse2.config' } + test_panelprep { includeConfig 'conf/test_panelprep.config' } + test_dog { includeConfig 'conf/test_dog.config' } + test_batch { includeConfig 'conf/test_batch.config' } } +// Load nf-core custom profiles from different Institutions +includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" + +// Load nf-core/phaseimpute custom profiles from different institutions. +includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/pipeline/phaseimpute.config" : "/dev/null" + +// Set default registry for Apptainer, Docker, Podman, Charliecloud and Singularity independent of -profile +// Will not be used unless Apptainer / Docker / Podman / Charliecloud / Singularity are enabled +// Set to your registry if you have a mirror of containers +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' +charliecloud.registry = 'quay.io' + // Load igenomes.config if required -if (!params.igenomes_ignore) { - includeConfig 'conf/igenomes.config' -} +includeConfig !params.igenomes_ignore ? 'conf/igenomes.config' : 'conf/igenomes_ignored.config' + +// Export these variables to prevent local Python/R libraries from conflicting with those in the container +// The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. +// See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. -// Export this variable to prevent local Python libraries from conflicting with those in the container env { - PYTHONNOUSERSITE = 1 + PYTHONNOUSERSITE = 1 + R_PROFILE_USER = "/.Rprofile" + R_ENVIRON_USER = "/.Renviron" + JULIA_DEPOT_PATH = "/usr/local/share/julia" } -// Capture exit codes from upstream processes when piping -process.shell = ['/bin/bash', '-euo', 'pipefail'] +// Set bash options +process.shell = """\ +bash +set -e # Exit if a tool returns a non-zero status/exit code +set -u # Treat unset variables and parameters as an error +set -o pipefail # Returns the status of the last command to exit with a non-zero status or zero if all successfully execute +set -C # No clobber - prevent output redirection from overwriting files. +""" + +// Disable process selector warnings by default. Use debug profile to enable warnings. +nextflow.enable.configProcessNamesValidation = false + +def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { - enabled = true - file = "${params.tracedir}/execution_timeline.html" + enabled = true + file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" } report { - enabled = true - file = "${params.tracedir}/execution_report.html" + enabled = true + file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" } trace { - enabled = true - file = "${params.tracedir}/execution_trace.txt" + enabled = true + file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" } dag { - enabled = true - file = "${params.tracedir}/pipeline_dag.svg" + enabled = true + file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" } manifest { - name = 'nf-core/phaseimpute' - author = '@louislenezet' - homePage = 'https://github.com/nf-core/phaseimpute' - description = 'Nf-core pipeline for phasing and imputing genomic data.' - mainScript = 'main.nf' - nextflowVersion = '>=19.10.0' - version = '1.0dev' + name = 'nf-core/phaseimpute' + author = """Louis Le Nezet, Anabella Trigila""" + homePage = 'https://github.com/nf-core/phaseimpute' + description = """Phasing and imputation pipeline""" + mainScript = 'main.nf' + nextflowVersion = '!>=24.04.2' + version = '1.0.0' + doi = '' } -// Function to ensure that resource requirements don't go beyond -// a maximum limit -def check_max(obj, type) { - if (type == 'memory') { - try { - if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) - return params.max_memory as nextflow.util.MemoryUnit - else - return obj - } catch (all) { - println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'time') { - try { - if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) - return params.max_time as nextflow.util.Duration - else - return obj - } catch (all) { - println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" - return obj +// Nextflow plugins +plugins { + id 'nf-schema@2.1.1' // Validation of pipeline parameters and creation of an input channel from a sample sheet +} + +validation { + defaultIgnoreParams = ["genomes"] + help { + enabled = true + command = "nextflow run $manifest.name -profile --input samplesheet.csv --outdir " + fullParameter = "help_full" + showHiddenParameter = "show_hidden" + beforeText = """ +-\033[2m----------------------------------------------------\033[0m- + \033[0;32m,--.\033[0;30m/\033[0;32m,-.\033[0m +\033[0;34m ___ __ __ __ ___ \033[0;32m/,-._.--~\'\033[0m +\033[0;34m |\\ | |__ __ / ` / \\ |__) |__ \033[0;33m} {\033[0m +\033[0;34m | \\| | \\__, \\__/ | \\ |___ \033[0;32m\\`-._,-`-,\033[0m + \033[0;32m`._,._,\'\033[0m +\033[0;35m ${manifest.name} ${manifest.version}\033[0m +-\033[2m----------------------------------------------------\033[0m- +""" + afterText = """${manifest.doi ? "* The pipeline\n" : ""}${manifest.doi.tokenize(",").collect { " https://doi.org/${it.trim().replace('https://doi.org/','')}"}.join("\n")}${manifest.doi ? "\n" : ""} +* The nf-core framework + https://doi.org/10.1038/s41587-020-0439-x + +* Software dependencies + https://github.com/${manifest.name}/blob/master/CITATIONS.md +""" } - } else if (type == 'cpus') { - try { - return Math.min( obj, params.max_cpus as int ) - } catch (all) { - println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" - return obj + summary { + beforeText = validation.help.beforeText + afterText = validation.help.afterText } - } } diff --git a/nextflow_schema.json b/nextflow_schema.json new file mode 100644 index 00000000..fe1e75ae --- /dev/null +++ b/nextflow_schema.json @@ -0,0 +1,485 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/nf-core/phaseimpute/master/nextflow_schema.json", + "title": "nf-core/phaseimpute pipeline parameters", + "description": "Phasing and imputation pipeline", + "type": "object", + "$defs": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": ["outdir"], + "properties": { + "input": { + "type": "string", + "format": "file-path", + "exists": true, + "schema": "assets/schema_input.json", + "mimetype": "text/csv", + "pattern": "^\\S+\\.csv$", + "description": "Path to comma-separated file containing information about the samples in the experiment.", + "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/phaseimpute/usage#samplesheet-input).", + "fa_icon": "fas fa-file-csv" + }, + "input_region": { + "type": "string", + "description": "Region of the genome to use (optional: if no file given, the whole genome will be used). The file should be a comma-separated file with 3 columns, and a header row.", + "schema": "assets/schema_input_region.json", + "format": "file-path", + "pattern": "^\\S+\\.csv$" + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open" + }, + "rename_chr": { + "type": "boolean", + "description": "Should the panel VCF files be renamed to match the reference genome (e.g. 'chr1' -> '1')" + }, + "max_chr_names": { + "type": "integer", + "description": "Maximum number of contigs name to print before resuming (i.e. show only subset and add '...' at the end).", + "hidden": true, + "default": 4 + }, + "remove_samples": { + "type": "string", + "description": "Comma-separated list of samples to remove from the reference panel. Useful for benchmarking purposes.", + "pattern": "^([a-zA-Z0-9]+)(,[a-zA-Z0-9]+)*$" + }, + "email": { + "type": "string", + "description": "Email address for completion summary.", + "fa_icon": "fas fa-envelope", + "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" + }, + "multiqc_title": { + "type": "string", + "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", + "fa_icon": "fas fa-file-signature" + }, + "steps": { + "type": "string", + "description": "Step(s) to run.", + "fa_icon": "fas fa-step-forward", + "pattern": "^((all|simulate|panelprep|impute|validate)?,?)*(? 0, the program exits with an error. Set to zero to have no filter of if using \u2013gt-validation", + "default": 5, + "pattern": "^\\d+$" + } + } + }, + "reference_genome_options": { + "title": "Reference genome options", + "type": "object", + "fa_icon": "fas fa-dna", + "description": "Reference genome related files and options required for the workflow.", + "properties": { + "genome": { + "type": "string", + "description": "Name of iGenomes reference.", + "fa_icon": "fas fa-book", + "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." + }, + "fasta": { + "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", + "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", + "description": "Path to FASTA genome file.", + "help_text": "This parameter is *mandatory* if `--genome` is not specified.", + "fa_icon": "far fa-file-code" + }, + "fasta_fai": { + "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", + "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?\\.fai$", + "description": "Path to FASTA index genome file.", + "help_text": "This parameter is *optional* even if `--genome` is not specified.", + "fa_icon": "far fa-file-code" + }, + "map": { + "type": "string", + "format": "file-path", + "exists": true, + "description": "Path to gmap genome file.", + "help_text": "This parameter is *optional*. This is used to refine the imputation process to match the recombination event rate in your specie.", + "fa_icon": "far fa-file-code", + "mimetype": "text/csv", + "schema": "assets/schema_map.json" + }, + "igenomes_ignore": { + "type": "boolean", + "description": "Do not load the iGenomes reference config.", + "fa_icon": "fas fa-ban", + "hidden": true, + "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." + }, + "igenomes_base": { + "type": "string", + "format": "directory-path", + "description": "The base path to the igenomes reference files", + "fa_icon": "fas fa-ban", + "hidden": true, + "default": "s3://ngi-igenomes/igenomes/" + } + } + }, + "institutional_config_options": { + "title": "Institutional config options", + "type": "object", + "fa_icon": "fas fa-university", + "description": "Parameters used to describe centralised config profiles. These should not be edited.", + "help_text": "The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline.", + "properties": { + "custom_config_version": { + "type": "string", + "description": "Git commit id for Institutional configs.", + "default": "master", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "custom_config_base": { + "type": "string", + "description": "Base directory for Institutional configs.", + "default": "https://raw.githubusercontent.com/nf-core/configs/master", + "hidden": true, + "help_text": "If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.", + "fa_icon": "fas fa-users-cog" + }, + "config_profile_name": { + "type": "string", + "description": "Institutional config name.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_description": { + "type": "string", + "description": "Institutional config description.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_contact": { + "type": "string", + "description": "Institutional config contact information.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_url": { + "type": "string", + "description": "Institutional config URL link.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, + "email_on_fail": { + "type": "string", + "description": "Email address for completion summary, only when pipeline fails.", + "fa_icon": "fas fa-exclamation-triangle", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", + "help_text": "An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully.", + "hidden": true + }, + "plaintext_email": { + "type": "boolean", + "description": "Send plain-text email instead of HTML.", + "fa_icon": "fas fa-remove-format", + "hidden": true + }, + "max_multiqc_email_size": { + "type": "string", + "description": "File size limit when attaching MultiQC reports to summary emails.", + "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", + "default": "25.MB", + "fa_icon": "fas fa-file-upload", + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Do not use coloured log outputs.", + "fa_icon": "fas fa-palette", + "hidden": true + }, + "hook_url": { + "type": "string", + "description": "Incoming hook URL for messaging service", + "fa_icon": "fas fa-people-group", + "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", + "hidden": true + }, + "multiqc_config": { + "type": "string", + "format": "file-path", + "description": "Custom config file to supply to MultiQC.", + "fa_icon": "fas fa-cog", + "hidden": true + }, + "multiqc_logo": { + "type": "string", + "description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file", + "fa_icon": "fas fa-image", + "hidden": true + }, + "multiqc_methods_description": { + "type": "string", + "description": "Custom MultiQC yaml file containing HTML including a methods description.", + "fa_icon": "fas fa-cog" + }, + "multiqc_replace_names": { + "type": "string", + "description": "Optional two-column sample renaming file. First column a set of patterns, second column a set of corresponding replacements. Passed via MultiQC's `--replace-names` option.", + "fa_icon": "far fa-file-code" + }, + "multiqc_sample_names": { + "type": "string", + "description": "Optional TSV file with headers, passed to the MultiQC --sample_names argument.", + "fa_icon": "far fa-file-code" + }, + "validate_params": { + "type": "boolean", + "description": "Boolean whether to validate parameters against the schema at runtime", + "default": true, + "fa_icon": "fas fa-check-square", + "hidden": true + }, + "pipelines_testdata_base_path": { + "type": "string", + "fa_icon": "far fa-check-circle", + "description": "Base URL or local path to location of pipeline test dataset files", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/", + "hidden": true + } + } + }, + "quilt_parameters": { + "title": "QUILT parameters", + "type": "object", + "description": "Arguments to customize QUILT run", + "default": "", + "properties": { + "buffer": { + "type": "integer", + "default": 10000, + "description": "Buffer of region to perform imputation over. So imputation is run form regionStart-buffer to regionEnd+buffer, and reported for regionStart to regionEnd, including the bases of regionStart and regionEnd." + }, + "ngen": { + "type": "integer", + "default": 100, + "description": "Number of generations since founding of the population to use for imputation." + } + } + }, + "stitch_parameters": { + "title": "STITCH parameters", + "type": "object", + "description": "Arguments to customize STITCH run", + "default": "", + "properties": { + "posfile": { + "type": "string", + "description": "Path to comma-separated file containing tab-separated files describing the variable positions to be used for imputation. Refer to the documentation for the `--posfile` argument of STITCH for more information.", + "format": "file-path", + "schema": "assets/schema_posfile.json", + "pattern": "^\\S+\\.(csv|tsv|txt)$", + "mimetype": "text/csv" + }, + "k_val": { + "type": "integer", + "default": 2, + "description": "Number of ancestral haplotypes to use for imputation. Refer to the documentation for the `--K` argument of STITCH for more information." + } + } + } + }, + "allOf": [ + { + "$ref": "#/$defs/input_output_options" + }, + { + "$ref": "#/$defs/simulate" + }, + { + "$ref": "#/$defs/panelprep" + }, + { + "$ref": "#/$defs/imputation_options" + }, + { + "$ref": "#/$defs/validation" + }, + { + "$ref": "#/$defs/reference_genome_options" + }, + { + "$ref": "#/$defs/institutional_config_options" + }, + { + "$ref": "#/$defs/generic_options" + }, + { + "$ref": "#/$defs/quilt_parameters" + }, + { + "$ref": "#/$defs/stitch_parameters" + } + ] +} diff --git a/nf-core-phaseimpute_logo_light.png b/nf-core-phaseimpute_logo_light.png new file mode 100644 index 00000000..767e1d57 Binary files /dev/null and b/nf-core-phaseimpute_logo_light.png differ diff --git a/nf-test.config b/nf-test.config new file mode 100644 index 00000000..41977a16 --- /dev/null +++ b/nf-test.config @@ -0,0 +1,22 @@ +config { + plugins { + load "nft-vcf@1.0.7" + load "nft-bam@0.2.0" + } + // location for all nf-tests + testsDir "workflows/phaseimpute" + + // nf-test directory including temporary files for each test + workDir System.getenv("NXF_TEST_DIR") ?: ".nf-test" + + // location of an optional nextflow.config file specific for executing tests + configFile "./nextflow.config" + configFile "tests/config/nf-test.config" + + stage { + copy "./nextflow_schema.json" + } + + // run all test with the defined docker profile from the main nextflow.config + profile "" +} diff --git a/subworkflows/local/bam_chr_rename_samtools/main.nf b/subworkflows/local/bam_chr_rename_samtools/main.nf new file mode 100644 index 00000000..dbf1d6a3 --- /dev/null +++ b/subworkflows/local/bam_chr_rename_samtools/main.nf @@ -0,0 +1,38 @@ +include { SAMTOOLS_REHEADER } from '../../../modules/nf-core/samtools/reheader' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index' + +workflow BAM_CHR_RENAME_SAMTOOLS { + take: + ch_bam // channel: [ [id], bam, index, prefix ] + + main: + + ch_versions = Channel.empty() + + // Rename the chromosome with or without prefix + SAMTOOLS_REHEADER( + ch_bam.map{ + meta, bam, index, prefix -> + def cmd = "" + if (prefix == "nochr") { + cmd = 'sed -E "s/^(@SQ.*\\tSN:)chr/\\1/"' + } else if (prefix == "chr") { + cmd = 'sed -E "s/^(@SQ.*\\tSN:)([0-9]+|X|Y|MT|M)/\\1chr\\2/"' + } else { + error "Invalid chr_prefix: ${prefix}" + } + [meta, bam, index, cmd] + }, // channel: [ [id], bam, index, cmd] + ) + ch_versions = ch_versions.mix(SAMTOOLS_REHEADER.out.versions.first()) + + SAMTOOLS_INDEX(SAMTOOLS_REHEADER.out.bam) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + + ch_bam_renamed = SAMTOOLS_REHEADER.out.bam + .combine(SAMTOOLS_INDEX.out.bai, by:0) + + emit: + bam_renamed = ch_bam_renamed // [ [id], bam, csi ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/bam_chr_rename_samtools/tests/main.nf.test b/subworkflows/local/bam_chr_rename_samtools/tests/main.nf.test new file mode 100644 index 00000000..0e3dda5b --- /dev/null +++ b/subworkflows/local/bam_chr_rename_samtools/tests/main.nf.test @@ -0,0 +1,110 @@ +nextflow_workflow { + + name "Test Subworkflow BAM_CHR_RENAME_SAMTOOLS" + script "../main.nf" + + config "./nextflow.config" + + workflow "BAM_CHR_RENAME_SAMTOOLS" + + tag "subworkflows" + tag "subworkflows_local" + tag "subworkflows/bam_chr_rename_samtools" + tag "bam_chr_rename_samtools" + + tag "samtools" + tag "samtools/reheader" + tag "samtools/index" + tag "gawk" + + test("Should remove chr prefix") { + when { + workflow { + """ + input[0] = Channel.fromList([ + [ + [id: "test_paired"], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExist:true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExist:true), + "nochr" + ], + [ + [id: "test_MT"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test_illumina_mt.bam', checkIfExist:true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test_illumina_mt.bam.bai', checkIfExist:true), + "nochr" + ], + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.versions, + workflow.out.bam_renamed.collect{[ + it[0], + path(it[1]).getFileName().toString(), + path(it[2]).getFileName().toString() + ] }, + workflow.out.bam_renamed.collect{ + bam(it[1]).getHeader().findAll { it.startsWith ("@SQ") } + } + ).match() } + ) + } + } + + test("Should add chr prefix") { + setup { + run("BAM_CHR_RENAME_SAMTOOLS", alias: "PREPROCESS") { + script "../main.nf" + process { + """ + input[0] = Channel.fromList([ + [ + [id: "test_paired"], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExist:true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExist:true), + "nochr" + ], + [ + [id: "test_MT"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test_illumina_mt.bam', checkIfExist:true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test_illumina_mt.bam.bai', checkIfExist:true), + "nochr" + ], + ]) + """ + } + } + } + + when { + workflow { + """ + input[0] = PREPROCESS.out.bam_renamed.combine(["chr"]) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.versions, + workflow.out.bam_renamed.collect{[ + it[0], + path(it[1]).getFileName().toString(), + path(it[2]).getFileName().toString() + ] }, + workflow.out.bam_renamed.collect{ + bam(it[1]).getHeader().findAll { it.startsWith ("@SQ") } + } + ).match() } + ) + } + } +} diff --git a/subworkflows/local/bam_chr_rename_samtools/tests/main.nf.test.snap b/subworkflows/local/bam_chr_rename_samtools/tests/main.nf.test.snap new file mode 100644 index 00000000..6f077931 --- /dev/null +++ b/subworkflows/local/bam_chr_rename_samtools/tests/main.nf.test.snap @@ -0,0 +1,76 @@ +{ + "Should remove chr prefix": { + "content": [ + [ + "versions.yml:md5,3777f4a1f04f5087333fbe98eee09df9", + "versions.yml:md5,fc2d8252e26e8b681f2dd73d8b08fdde" + ], + [ + [ + { + "id": "test_MT" + }, + "test_MT_withchr.bam", + "test_MT_withchr.bam.bai" + ], + [ + { + "id": "test_paired" + }, + "test_paired_withchr.bam", + "test_paired_withchr.bam.bai" + ] + ], + [ + [ + "@SQ\tSN:M\tLN:16569" + ], + [ + "@SQ\tSN:MT192765.1\tLN:29829" + ] + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T13:27:45.777831517" + }, + "Should add chr prefix": { + "content": [ + [ + "versions.yml:md5,3777f4a1f04f5087333fbe98eee09df9", + "versions.yml:md5,fc2d8252e26e8b681f2dd73d8b08fdde" + ], + [ + [ + { + "id": "test_MT" + }, + "test_MT_withchr.bam", + "test_MT_withchr.bam.bai" + ], + [ + { + "id": "test_paired" + }, + "test_paired_withchr.bam", + "test_paired_withchr.bam.bai" + ] + ], + [ + [ + "@SQ\tSN:chrM\tLN:16569" + ], + [ + "@SQ\tSN:chrMT192765.1\tLN:29829" + ] + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T13:28:15.125413039" + } +} \ No newline at end of file diff --git a/subworkflows/local/bam_chr_rename_samtools/tests/nextflow.config b/subworkflows/local/bam_chr_rename_samtools/tests/nextflow.config new file mode 100644 index 00000000..5246e574 --- /dev/null +++ b/subworkflows/local/bam_chr_rename_samtools/tests/nextflow.config @@ -0,0 +1,12 @@ +params { + max_memory = '2.GB' +} + +process { + withName: 'BAM_CHR_RENAME_SAMTOOLS:SAMTOOLS_REHEADER' { + ext.prefix = { "${meta.id}_withchr" } + } + withName: 'PREPROCESS:SAMTOOLS_REHEADER' { + ext.prefix = { "${meta.id}_nochr" } + } +} diff --git a/subworkflows/local/bam_chr_rename_samtools/tests/tags.yml b/subworkflows/local/bam_chr_rename_samtools/tests/tags.yml new file mode 100644 index 00000000..6f425d2a --- /dev/null +++ b/subworkflows/local/bam_chr_rename_samtools/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/bam_chr_rename_samtools: + - subworkflows/local/bam_chr_rename_samtools/** diff --git a/subworkflows/local/bam_downsample_samtools/main.nf b/subworkflows/local/bam_downsample_samtools/main.nf new file mode 100644 index 00000000..304a5a92 --- /dev/null +++ b/subworkflows/local/bam_downsample_samtools/main.nf @@ -0,0 +1,57 @@ +include { SAMTOOLS_DEPTH } from '../../../modules/nf-core/samtools/depth' +include { SAMTOOLS_VIEW } from '../../../modules/nf-core/samtools/view' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index' +include { GAWK } from '../../../modules/nf-core/gawk' + +workflow BAM_DOWNSAMPLE_SAMTOOLS { + + take: + ch_bam // channel: [ [id, genome], bam, bai ] + ch_depth // channel: [ [depth], depth ] + ch_fasta // channel: [ [genome], fasta, fai ] + + main: + ch_versions = Channel.empty() + + // Compute mean depth + SAMTOOLS_DEPTH(ch_bam, [[], []]) + ch_versions = ch_versions.mix(SAMTOOLS_DEPTH.out.versions.first()) + + // Use GAWK to get mean depth + GAWK(SAMTOOLS_DEPTH.out.tsv, []) + ch_versions = ch_versions.mix(GAWK.out.versions.first()) + + // Compute downsampling factor + ch_depth_factor = GAWK.out.output + .splitCsv(header: false, sep:'\t') + .map{ metaICR, row -> + [ metaICR, row[0] as Float ] + } + .combine(ch_depth) + .map{ metaICR, mean, metaD, depth -> + [ metaICR, metaICR + metaD, depth as Float / mean ] + } + + // Add all necessary channel for downsampling + ch_input_downsample = ch_bam + .combine(ch_depth_factor, by : 0) + .map{ _metaICR, bam, index, metaICRD, depth -> + [ metaICRD, bam, index, [], depth ] + } + + // Downsample + SAMTOOLS_VIEW( + ch_input_downsample, + ch_fasta, + [] + ) + ch_versions = ch_versions.mix(SAMTOOLS_VIEW.out.versions.first()) + + // Aggregate bam and index + ch_bam_emul = SAMTOOLS_VIEW.out.bam + .join(SAMTOOLS_VIEW.out.csi) + + emit: + bam_emul = ch_bam_emul // channel: [ [id, chr, region, depth], bam, bai ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/bam_downsample_samtools/tests/main.nf.test b/subworkflows/local/bam_downsample_samtools/tests/main.nf.test new file mode 100644 index 00000000..05ab92e2 --- /dev/null +++ b/subworkflows/local/bam_downsample_samtools/tests/main.nf.test @@ -0,0 +1,115 @@ +nextflow_workflow { + + name "Test Subworkflow BAM_DOWNSAMPLE_SAMTOOLS" + script "../main.nf" + config "./nextflow.config" + + workflow "BAM_DOWNSAMPLE_SAMTOOLS" + + tag "subworkflows" + tag "subworkflows_local" + tag "subworkflows/bam_downsample_samtools" + tag "bam_downsample_samtools" + + tag "samtools" + tag "samtools/depth" + tag "samtools/view" + tag "samtools/index" + tag "gawk" + + test("Downsample to 2X") { + when { + params { + max_cpus = 2 + max_memory = '2.GB' + } + workflow { + """ + input[0] = Channel.fromList([ + [ + [id: "NA12878"], + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bam", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bam.bai", checkIfExist:true), + ], + [ + [id: "NA19401"], + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA19401/NA19401.s.bam", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA19401/NA19401.s.bam.bai", checkIfExist:true), + ], + ]) + input[1] = Channel.of([[depth: 2], 2]).collect() + input[2] = Channel.of([ + [id: "GRCh38"], + file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz.fai", checkIfExist:true), + ]).collect() + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.versions, + workflow.out.bam_emul.collect{[ + it[0], + path(it[1]).getFileName().toString(), + path(it[2]).getFileName().toString() + ] }, + workflow.out.bam_emul.collect{ + bam(it[1]).getReads().size() + } + ).match() } + ) + } + } + + test("Downsample to 4X") { + when { + params { + max_cpus = 2 + max_memory = '2.GB' + } + workflow { + """ + input[0] = Channel.fromList([ + [ + [id: "NA12878"], + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bam", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bam.bai", checkIfExist:true), + ], + [ + [id: "NA19401"], + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA19401/NA19401.s.bam", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA19401/NA19401.s.bam.bai", checkIfExist:true), + ], + ]) + input[1] = Channel.of([[depth: 4], 4]).collect() + input[2] = Channel.of([ + [id: "GRCh38"], + file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz.fai", checkIfExist:true), + ]).collect() + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.versions, + workflow.out.bam_emul.collect{[ + it[0], + path(it[1]).getFileName().toString(), + path(it[2]).getFileName().toString() + ] }, + workflow.out.bam_emul.collect{ + bam(it[1]).getReads().size() + } + ).match() } + ) + } + } +} diff --git a/subworkflows/local/bam_downsample_samtools/tests/main.nf.test.snap b/subworkflows/local/bam_downsample_samtools/tests/main.nf.test.snap new file mode 100644 index 00000000..5eacd50a --- /dev/null +++ b/subworkflows/local/bam_downsample_samtools/tests/main.nf.test.snap @@ -0,0 +1,74 @@ +{ + "Downsample to 2X": { + "content": [ + [ + "versions.yml:md5,43ed2e6d85cac7ccd2fb22a733f585dc", + "versions.yml:md5,9eef568f4fc903d70199abf5a48bd08f", + "versions.yml:md5,a61d007b27e86103db8d68a8c79a98fb" + ], + [ + [ + { + "id": "NA12878", + "depth": 2 + }, + "NA12878.bam", + "NA12878.bam.csi" + ], + [ + { + "id": "NA19401", + "depth": 2 + }, + "NA19401.bam", + "NA19401.bam.csi" + ] + ], + [ + 1164, + 1196 + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T13:28:49.191579339" + }, + "Downsample to 4X": { + "content": [ + [ + "versions.yml:md5,43ed2e6d85cac7ccd2fb22a733f585dc", + "versions.yml:md5,9eef568f4fc903d70199abf5a48bd08f", + "versions.yml:md5,a61d007b27e86103db8d68a8c79a98fb" + ], + [ + [ + { + "id": "NA12878", + "depth": 4 + }, + "NA12878.bam", + "NA12878.bam.csi" + ], + [ + { + "id": "NA19401", + "depth": 4 + }, + "NA19401.bam", + "NA19401.bam.csi" + ] + ], + [ + 2402, + 2321 + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T13:29:24.179563513" + } +} \ No newline at end of file diff --git a/subworkflows/local/bam_downsample_samtools/tests/nextflow.config b/subworkflows/local/bam_downsample_samtools/tests/nextflow.config new file mode 100644 index 00000000..80b6895e --- /dev/null +++ b/subworkflows/local/bam_downsample_samtools/tests/nextflow.config @@ -0,0 +1,9 @@ +process { + withName: GAWK { + ext.args2 = "'{ total += \$3 } END { print total/NR }'" + ext.suffix = "txt" + } + withName: SAMTOOLS_VIEW { + ext.args = "--write-index" + } +} diff --git a/subworkflows/local/bam_downsample_samtools/tests/tags.yml b/subworkflows/local/bam_downsample_samtools/tests/tags.yml new file mode 100644 index 00000000..425f5360 --- /dev/null +++ b/subworkflows/local/bam_downsample_samtools/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/bam_downsample_samtools: + - subworkflows/local/bam_downsample_samtools/** diff --git a/subworkflows/local/bam_extract_region_samtools/main.nf b/subworkflows/local/bam_extract_region_samtools/main.nf new file mode 100644 index 00000000..a8248ef8 --- /dev/null +++ b/subworkflows/local/bam_extract_region_samtools/main.nf @@ -0,0 +1,52 @@ +include { SAMTOOLS_VIEW } from '../../../modules/nf-core/samtools/view' +include { SAMTOOLS_MERGE } from '../../../modules/nf-core/samtools/merge' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index' + +workflow BAM_EXTRACT_REGION_SAMTOOLS { + + take: + ch_bam // channel: [ [id], bam, bai ] + ch_region // channel: [ [chr, region], val(chr:start-end) ] + ch_fasta // channel: [ [genome], fasta, fai ] + main: + + ch_versions = Channel.empty() + + // Add fasta and region to bam channel + ch_input_region = ch_bam + .combine(ch_region) + .map{ metaI, bam, index, metaCR, region -> + [ metaI + metaCR, bam, index, region, [] ] + } + + // Extract region of interest + SAMTOOLS_VIEW( + ch_input_region, + [[], [], []], + [] + ) + ch_versions = ch_versions.mix(SAMTOOLS_VIEW.out.versions.first()) + + ch_bam_region = SAMTOOLS_VIEW.out.bam + .join(SAMTOOLS_VIEW.out.csi) + + SAMTOOLS_MERGE( + ch_bam_region + .map{ + metaICR, bam, index -> [metaICR.subMap("id", "batch") + [chr: "all"], bam, index] + } + .groupTuple(sort: true), + ch_fasta + ) + ch_versions = ch_versions.mix(SAMTOOLS_MERGE.out.versions.first()) + + SAMTOOLS_INDEX(SAMTOOLS_MERGE.out.bam) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + + ch_bam_region_all = SAMTOOLS_MERGE.out.bam + .join(SAMTOOLS_INDEX.out.bai) + + emit: + bam_region = ch_bam_region_all // channel: [ [id, chr], bam, index ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/bam_extract_region_samtools/main.nf.test b/subworkflows/local/bam_extract_region_samtools/main.nf.test new file mode 100644 index 00000000..b12b09d4 --- /dev/null +++ b/subworkflows/local/bam_extract_region_samtools/main.nf.test @@ -0,0 +1,118 @@ +nextflow_workflow { + + name "Test Subworkflow BAM_EXTRACT_REGION_SAMTOOLS" + script "../main.nf" + config "./nextflow.config" + + workflow "BAM_EXTRACT_REGION_SAMTOOLS" + + tag "subworkflows" + tag "subworkflows_local" + tag "subworkflows/bam_extract_region_samtools" + tag "bam_extract_region_samtools" + + tag "samtools" + tag "samtools_view" + tag "samtools_merge" + tag "samtools_index" + + test("Get subregion of bam file 10000bp") { + when { + params { + max_cpus = 2 + max_memory = '2.GB' + } + workflow { + """ + input[0] = input[0] = Channel.fromList([ + [ + [id: "NA12878"], + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bam", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bam.bai", checkIfExist:true), + ], + [ + [id: "NA19401"], + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA19401/NA19401.s.bam", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA19401/NA19401.s.bam.bai", checkIfExist:true), + ], + ]) + input[1] = Channel.of([ + [chr: "chr22", region: "chr22:16600000-16610000"], "chr22:16600000-16610000" + ]).collect() + input[2] = Channel.of([ + [id: "GRCh38"], + file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz.fai", checkIfExist:true), + ]).collect() + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.versions, + workflow.out.vcf_tbi.collect{[ + it[0], + path(it[1]).getFileName().toString(), + path(it[2]).getFileName().toString() + ] }, + workflow.out.bam_extract_region.collect{ + bam(it[1]).getReads().size() + } + ).match() } + ) + } + } + + test("Get subregion of bam file 1000bp") { + when { + params { + max_cpus = 2 + max_memory = '2.GB' + } + workflow { + """ + input[0] = input[0] = Channel.fromList([ + [ + [id: "NA12878"], + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bam", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bam.bai", checkIfExist:true), + ], + [ + [id: "NA19401"], + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA19401/NA19401.s.bam", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA19401/NA19401.s.bam.bai", checkIfExist:true), + ], + ]) + input[1] = Channel.of([ + [chr: "chr22", region: "chr22:16609000-16610000"], "chr22:16609000-16610000" + ]).collect() + input[2] = Channel.of([ + [id: "GRCh38"], + file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz.fai", checkIfExist:true), + ]).collect() + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.versions, + workflow.out.vcf_tbi.collect{[ + it[0], + path(it[1]).getFileName().toString(), + path(it[2]).getFileName().toString() + ] }, + workflow.out.bam_extract_region.collect{ + bam(it[1]).getReads().size() + } + ).match() } + ) + } + } +} diff --git a/subworkflows/local/bam_extract_region_samtools/tests/main.nf b/subworkflows/local/bam_extract_region_samtools/tests/main.nf new file mode 100644 index 00000000..57a302b7 --- /dev/null +++ b/subworkflows/local/bam_extract_region_samtools/tests/main.nf @@ -0,0 +1,520 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// MODULE: Installed directly from nf-core/modules +// +include { MULTIQC } from '../../modules/nf-core/multiqc' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { paramsSummaryMultiqc } from '../../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../../subworkflows/local/utils_nfcore_phaseimpute_pipeline' +include { getFilesSameExt } from '../../subworkflows/local/utils_nfcore_phaseimpute_pipeline' +include { getFileExtension } from '../../subworkflows/local/utils_nfcore_phaseimpute_pipeline' +include { exportCsv } from '../../subworkflows/local/utils_nfcore_phaseimpute_pipeline' + +// +// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules +// + +// Simulate subworkflows +include { BAM_EXTRACT_REGION_SAMTOOLS } from '../../subworkflows/local/bam_extract_region_samtools' +include { BAM_DOWNSAMPLE_SAMTOOLS } from '../../subworkflows/local/bam_downsample_samtools' +include { SAMTOOLS_COVERAGE as SAMTOOLS_COVERAGE_INP } from '../../modules/nf-core/samtools/coverage' +include { SAMTOOLS_COVERAGE as SAMTOOLS_COVERAGE_DWN } from '../../modules/nf-core/samtools/coverage' + +// Panelprep subworkflows +include { VCF_NORMALIZE_BCFTOOLS } from '../../subworkflows/local/vcf_normalize_bcftools' +include { VCF_SITES_EXTRACT_BCFTOOLS } from '../../subworkflows/local/vcf_sites_extract_bcftools' +include { VCF_PHASE_SHAPEIT5 } from '../../subworkflows/local/vcf_phase_shapeit5' +include { CHUNK_PREPARE_CHANNEL } from '../../subworkflows/local/chunk_prepare_channel' +include { VCF_CONCATENATE_BCFTOOLS as CONCAT_PANEL } from '../../subworkflows/local/vcf_concatenate_bcftools' +include { BCFTOOLS_STATS as BCFTOOLS_STATS_PANEL } from '../../modules/nf-core/bcftools/stats' + +// Imputation +include { LIST_TO_FILE } from '../../modules/local/list_to_file' +include { VCF_SPLIT_BCFTOOLS } from '../../subworkflows/local/vcf_split_bcftools' + +// GLIMPSE1 subworkflows +include { BAM_GL_BCFTOOLS as GL_GLIMPSE1 } from '../../subworkflows/local/bam_gl_bcftools' +include { VCF_IMPUTE_GLIMPSE1 } from '../../subworkflows/local/vcf_impute_glimpse1' +include { VCF_CONCATENATE_BCFTOOLS as CONCAT_GLIMPSE1} from '../../subworkflows/local/vcf_concatenate_bcftools' + +// GLIMPSE2 subworkflows +include { BAM_IMPUTE_GLIMPSE2 } from '../../subworkflows/local/bam_impute_glimpse2' +include { VCF_CONCATENATE_BCFTOOLS as CONCAT_GLIMPSE2} from '../../subworkflows/local/vcf_concatenate_bcftools' + +// QUILT subworkflows +include { VCF_CHUNK_GLIMPSE } from '../../subworkflows/local/vcf_chunk_glimpse' +include { BAM_IMPUTE_QUILT } from '../../subworkflows/local/bam_impute_quilt' +include { VCF_CONCATENATE_BCFTOOLS as CONCAT_QUILT } from '../../subworkflows/local/vcf_concatenate_bcftools' + +// STITCH subworkflows +include { BAM_IMPUTE_STITCH } from '../../subworkflows/local/bam_impute_stitch' +include { VCF_CONCATENATE_BCFTOOLS as CONCAT_STITCH } from '../../subworkflows/local/vcf_concatenate_bcftools' + +// Imputation stats +include { BCFTOOLS_STATS as BCFTOOLS_STATS_TOOLS } from '../../modules/nf-core/bcftools/stats' + +// Concordance subworkflows +include { BAM_GL_BCFTOOLS as GL_TRUTH } from '../../subworkflows/local/bam_gl_bcftools' +include { BCFTOOLS_QUERY } from '../../modules/nf-core/bcftools/query' +include { GAWK } from '../../modules/nf-core/gawk' +include { VCF_SPLIT_BCFTOOLS as SPLIT_TRUTH } from '../../subworkflows/local/vcf_split_bcftools' +include { BCFTOOLS_STATS as BCFTOOLS_STATS_TRUTH } from '../../modules/nf-core/bcftools/stats' +include { VCF_CONCATENATE_BCFTOOLS as CONCAT_TRUTH } from '../../subworkflows/local/vcf_concatenate_bcftools' +include { VCF_CONCORDANCE_GLIMPSE2 } from '../../subworkflows/local/vcf_concordance_glimpse2' + + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow PHASEIMPUTE { + + take: + ch_input_impute // channel: input file [ [id], file, index ] + ch_input_sim // channel: input file [ [id], file, index ] + ch_input_validate // channel: input file [ [id], file, index ] + ch_input_truth // channel: truth file [ [id], file, index ] + ch_fasta // channel: fasta file [ [genome], fasta, fai ] + ch_panel // channel: panel file [ [id, chr], vcf, index ] + ch_region // channel: region to use [ [chr, region], region] + ch_depth // channel: depth select [ [depth], depth ] + ch_map // channel: genetic map [ [chr], map] + ch_posfile // channel: posfile [ [id, chr], vcf, index, hap, legend] + ch_chunks // channel: chunks [ [chr], txt] + chunk_model // parameter: chunk model + ch_versions // channel: versions of software used + + main: + + ch_multiqc_files = Channel.empty() + + // + // Simulate data if asked + // + if (params.steps.split(',').contains("simulate") || params.steps.split(',').contains("all")) { + // Test if the input are all bam files + getFilesSameExt(ch_input_sim) + .map{ if (it != "bam" & it != "cram") { + error "All input files must be in the same format, either BAM or CRAM, to perform simulation: ${it}" + } } + + if (params.input_region) { + // Split the bam into the regions specified + BAM_EXTRACT_REGION_SAMTOOLS(ch_input_sim, ch_region, ch_fasta) + ch_versions = ch_versions.mix(BAM_EXTRACT_REGION_SAMTOOLS.out.versions) + ch_input_sim = BAM_EXTRACT_REGION_SAMTOOLS.out.bam_extract_region + } + + // Use input for simulation as truth for validation step + ch_input_truth = ch_input_sim + + // Compute coverage of input files + SAMTOOLS_COVERAGE_INP(ch_input_sim, ch_fasta) + ch_versions = ch_versions.mix(SAMTOOLS_COVERAGE_INP.out.versions) + ch_multiqc_files = ch_multiqc_files.mix(SAMTOOLS_COVERAGE_INP.out.coverage.map{it[1]}) + + if (params.depth) { + // Downsample input to desired depth + BAM_DOWNSAMPLE_SAMTOOLS(ch_input_sim, ch_depth, ch_fasta) + ch_versions = ch_versions.mix(BAM_DOWNSAMPLE_SAMTOOLS.out.versions) + ch_input_impute = BAM_DOWNSAMPLE_SAMTOOLS.out.bam_emul + + // Compute coverage of input files + SAMTOOLS_COVERAGE_DWN(BAM_DOWNSAMPLE_SAMTOOLS.out.bam_emul, ch_fasta) + ch_versions = ch_versions.mix(SAMTOOLS_COVERAGE_DWN.out.versions) + ch_multiqc_files = ch_multiqc_files.mix(SAMTOOLS_COVERAGE_DWN.out.coverage.map{it[1]}) + } + + if (params.genotype) { + error "Genotype simulation not yet implemented" + } + + // Create CSV from simulate step + exportCsv( + ch_input_impute.map{ meta, file, index -> + [meta, [2:"simulation/samples", 3:"simulation/samples"], file, index] + }, + ["id"], "sample,file,index", + "simulate.csv", "simulation/csv" + ) + } + + // + // Prepare panel + // + if (params.steps.split(',').contains("panelprep") || params.steps.split(',').contains("all")) { + // Normalize indels in panel + VCF_NORMALIZE_BCFTOOLS(ch_panel, ch_fasta) + ch_panel_phased = VCF_NORMALIZE_BCFTOOLS.out.vcf_tbi + ch_versions = ch_versions.mix(VCF_NORMALIZE_BCFTOOLS.out.versions) + + // Extract sites from normalized vcf + VCF_SITES_EXTRACT_BCFTOOLS(ch_panel_phased, ch_fasta) + ch_versions = ch_versions.mix(VCF_SITES_EXTRACT_BCFTOOLS.out.versions) + + // Generate all necessary channels + ch_posfile = VCF_SITES_EXTRACT_BCFTOOLS.out.posfile + + // Phase panel with Shapeit5 + if (params.phase == true) { + VCF_PHASE_SHAPEIT5( + VCF_NORMALIZE_BCFTOOLS.out.vcf_tbi.combine(Channel.of([[]])), + ch_region, + [[],[],[]], + [[],[],[]], + ch_map, + chunk_model + ) + ch_panel_phased = VCF_PHASE_SHAPEIT5.out.vcf_tbi + ch_versions = ch_versions.mix(VCF_PHASE_SHAPEIT5.out.versions) + } + + // Create chunks from reference VCF + VCF_CHUNK_GLIMPSE(ch_panel_phased, ch_map, chunk_model) + ch_versions = ch_versions.mix(VCF_CHUNK_GLIMPSE.out.versions) + + // Assign chunks channels + ch_chunks_glimpse1 = VCF_CHUNK_GLIMPSE.out.chunks_glimpse1 + ch_chunks_glimpse2 = VCF_CHUNK_GLIMPSE.out.chunks_glimpse2 + ch_chunks_quilt = VCF_CHUNK_GLIMPSE.out.chunks_quilt + + // Create CSVs from panelprep step + // Phased panel + exportCsv( + ch_panel_phased.map{ meta, vcf, index -> + [meta, [2:"prep_panel/panel", 3:"prep_panel/panel"], vcf, index] + }, + ["id", "chr"], "panel,chr,vcf,index", + "panel.csv", "prep_panel/csv" + ) + // Posfile + exportCsv( + ch_posfile.map{ meta, vcf, index, hap, legend -> + [meta, [2:"prep_panel/sites", 3:"prep_panel/haplegend", 4:"prep_panel/haplegend"], vcf, index, hap, legend] + }, + ["id", "chr"], "panel,chr,vcf,index,hap,legend", + "posfile.csv", "prep_panel/csv" + ) + // Chunks + exportCsv( + VCF_CHUNK_GLIMPSE.out.chunks.map{ meta, file -> + [meta, [2:"prep_panel/chunks"], file] + }, + ["id", "chr"], "panel,chr,file", + "chunks.csv", "prep_panel/csv" + ) + } + + // + // Impute target files + // + if (params.steps.split(',').contains("impute") || params.steps.split(',').contains("all")) { + // Split input files into BAMs and VCFs + ch_input_type = ch_input_impute + .branch { + bam: it[1] =~ 'bam|cram' + vcf: it[1] =~ '(vcf|bcf)(.gz)*' + other: true + } + + // Check if input files are only BAM/CRAM or VCF/BCF + ch_input_type.other + .map{ error "Input files must be either BAM/CRAM or VCF/BCF" } + + // Group BAMs by batch size + def nb_batch = -1 + ch_input_bams = ch_input_type.bam + .toSortedList { it1, it2 -> it1[0]["id"] <=> it2[0]["id"] } + .map { list -> list.collate(params.batch_size) + .collect{ nb_batch += 1; [[id: "all", batch: nb_batch], it] } } + .map { list -> [list.collect{ it[0] }, list.collect{ it[1] }] } + .transpose() + .map { metaI, filestuples-> [ + metaI + [metas: filestuples.collect{it[0].findAll{it.key != "batch"}}], + filestuples.collect{it[1]}, filestuples.collect{it[2]} + ] } + + LIST_TO_FILE( + ch_input_bams.map{ meta, file, _index -> [ + meta, file, meta.metas.collect { it.id } + ] } + ) + + ch_input_bams_withlist = ch_input_bams + .join(LIST_TO_FILE.out.txt) + + // Use panel from parameters if provided + if (params.panel && !params.steps.split(',').find { it in ["all", "panelprep"] }) { + ch_panel_phased = ch_panel + } + + if (params.tools.split(',').contains("glimpse1")) { + log.info("Impute with GLIMPSE1") + + // Use chunks from parameters if provided or use previous chunks from panelprep + if (params.chunks) { + CHUNK_PREPARE_CHANNEL(ch_chunks, "glimpse") + ch_chunks_glimpse1 = CHUNK_PREPARE_CHANNEL.out.chunks + } + + // Glimpse1 subworkflow + // Compute GL from BAM files and merge them + GL_GLIMPSE1( + ch_input_type.bam, + ch_posfile.map{ [it[0], it[4]] }, + ch_fasta + ) + ch_multiqc_files = ch_multiqc_files.mix(GL_GLIMPSE1.out.multiqc_files) + ch_versions = ch_versions.mix(GL_GLIMPSE1.out.versions) + + // Combine vcf and processed bam + ch_input_glimpse1 = ch_input_type.vcf + .mix(GL_GLIMPSE1.out.vcf_tbi) + + // Run imputation + VCF_IMPUTE_GLIMPSE1( + ch_input_glimpse1, + ch_panel_phased, + ch_chunks_glimpse1 + ) + ch_versions = ch_versions.mix(VCF_IMPUTE_GLIMPSE1.out.versions) + + // Concatenate by chromosomes + CONCAT_GLIMPSE1(VCF_IMPUTE_GLIMPSE1.out.vcf_tbi) + ch_versions = ch_versions.mix(CONCAT_GLIMPSE1.out.versions) + + // Add results to input validate + ch_input_validate = ch_input_validate.mix(CONCAT_GLIMPSE1.out.vcf_tbi) + + } + + if (params.tools.split(',').contains("glimpse2")) { + log.info("Impute with GLIMPSE2") + + if (params.chunks) { + CHUNK_PREPARE_CHANNEL(ch_chunks, "glimpse") + ch_chunks_glimpse2 = CHUNK_PREPARE_CHANNEL.out.chunks + } + + // Run imputation + BAM_IMPUTE_GLIMPSE2( + ch_input_bams_withlist + .map{ [it[0], it[1], it[2], it[3]] } + .mix(ch_input_type.vcf.combine([])), + ch_panel_phased, + ch_chunks_glimpse2, + ch_fasta + ) + ch_versions = ch_versions.mix(BAM_IMPUTE_GLIMPSE2.out.versions) + // Concatenate by chromosomes + CONCAT_GLIMPSE2(BAM_IMPUTE_GLIMPSE2.out.vcf_tbi) + ch_versions = ch_versions.mix(CONCAT_GLIMPSE2.out.versions) + + // Add results to input validate + ch_input_validate = ch_input_validate.mix(CONCAT_GLIMPSE2.out.vcf_tbi) + } + + if (params.tools.split(',').contains("stitch")) { + log.info("Impute with STITCH") + + // Impute with STITCH + BAM_IMPUTE_STITCH ( + ch_input_bams_withlist.map{ [it[0], it[1], it[2], it[4]] }, + ch_posfile.map{ [it[0], it[4]] }, + ch_region, + ch_fasta + ) + ch_versions = ch_versions.mix(BAM_IMPUTE_STITCH.out.versions) + + // Concatenate by chromosomes + CONCAT_STITCH(BAM_IMPUTE_STITCH.out.vcf_tbi) + ch_versions = ch_versions.mix(CONCAT_STITCH.out.versions) + + // Add results to input validate + ch_input_validate = ch_input_validate.mix(CONCAT_STITCH.out.vcf_tbi) + + } + + if (params.tools.split(',').contains("quilt")) { + log.info("Impute with QUILT") + + // Use provided chunks if --chunks + if (params.chunks) { + CHUNK_PREPARE_CHANNEL(ch_chunks, "quilt") + ch_chunks_quilt = CHUNK_PREPARE_CHANNEL.out.chunks + } + + // Impute BAMs with QUILT + BAM_IMPUTE_QUILT( + ch_input_bams_withlist.map{ [it[0], it[1], it[2], it[4]] }, + ch_posfile.map{ [it[0], it[3], it[4]] }, + ch_chunks_quilt, + ch_fasta.map{ [it[0], it[1]] } + ) + ch_versions = ch_versions.mix(BAM_IMPUTE_QUILT.out.versions) + + // Concatenate by chromosomes + CONCAT_QUILT(BAM_IMPUTE_QUILT.out.vcf_tbi) + ch_versions = ch_versions.mix(CONCAT_QUILT.out.versions) + + // Add results to input validate + ch_input_validate = ch_input_validate.mix(CONCAT_QUILT.out.vcf_tbi) + } + + // Split result by samples + VCF_SPLIT_BCFTOOLS(ch_input_validate.map{ [it[0], it[1], it[2], []] }) + ch_input_validate = VCF_SPLIT_BCFTOOLS.out.vcf_tbi + + // Compute stats on imputed files + BCFTOOLS_STATS_TOOLS( + ch_input_validate, + [[],[]], + [[],[]], + [[],[]], + [[],[]], + ch_fasta.map{ [it[0], it[1]] }) + ch_versions = ch_versions.mix(BCFTOOLS_STATS_TOOLS.out.versions) + ch_multiqc_files = ch_multiqc_files.mix(BCFTOOLS_STATS_TOOLS.out.stats.map{ [it[1]] }) + + // Export all files to csv + exportCsv( + ch_input_validate.map{ meta, file, index -> + [meta, [2:"imputation/${meta.tools}/samples/", 3:"imputation/${meta.tools}/samples/"], file, index] + }, + ["id", "tools"], "sample,tools,vcf,index", + "impute.csv", "imputation/csv" + ) + } + + if (params.steps.split(',').contains("validate") || params.steps.split(',').contains("all")) { + // Concatenate all sites into a single VCF (for GLIMPSE concordance) + CONCAT_PANEL(ch_posfile.map{ [it[0], it[1], it[2]] }) + ch_versions = ch_versions.mix(CONCAT_PANEL.out.versions) + ch_panel_sites = CONCAT_PANEL.out.vcf_tbi + + // Compute stats on panel + BCFTOOLS_STATS_PANEL( + ch_panel_sites, + [[],[]], + [[],[]], + [[],[]], + [[],[]], + ch_fasta.map{ [it[0], it[1]] }) + ch_versions = ch_versions.mix(BCFTOOLS_STATS_PANEL.out.versions) + ch_multiqc_files = ch_multiqc_files.mix(BCFTOOLS_STATS_PANEL.out.stats.map{ [it[1]] }) + + ch_truth_vcf = Channel.empty() + + // Channels for branching + ch_truth = ch_input_truth + .map { [it[0], it[1], it[2], getFileExtension(it[1])] } + .branch { + bam: it[3] =~ 'bam|cram' + vcf: it[3] =~ '(vcf|bcf)(.gz)*' + other: true + } + + ch_truth.other + .map{ error "Input files must be either BAM/CRAM or VCF/BCF" } + + GL_TRUTH( + ch_truth.bam.map { [it[0], it[1], it[2]] }, + ch_posfile.map{ [it[0], it[4]] }, + ch_fasta + ) + ch_versions = ch_versions.mix(GL_TRUTH.out.versions) + + // Mix the original vcf and the computed vcf + ch_truth_vcf = ch_truth.vcf + .map { [it[0], it[1], it[2]] } + .mix(GL_TRUTH.out.vcf_tbi) + + // Concatenate truth vcf by chromosomes + CONCAT_TRUTH(ch_truth_vcf) + ch_versions = ch_versions.mix(CONCAT_TRUTH.out.versions) + + // Prepare renaming file + BCFTOOLS_QUERY(CONCAT_TRUTH.out.vcf_tbi, [], [], []) + GAWK(BCFTOOLS_QUERY.out.output, []) + ch_pluginsplit = CONCAT_TRUTH.out.vcf_tbi.join(GAWK.out.output.view()) + + // Split truth vcf by samples + SPLIT_TRUTH(ch_pluginsplit) + ch_versions = ch_versions.mix(SPLIT_TRUTH.out.versions) + + // Compute stats on truth files + BCFTOOLS_STATS_TRUTH( + SPLIT_TRUTH.out.vcf_tbi, + [[],[]], + [[],[]], + [[],[]], + [[],[]], + ch_fasta.map{ [it[0], it[1]] } + ) + ch_versions = ch_versions.mix(BCFTOOLS_STATS_TRUTH.out.versions) + ch_multiqc_files = ch_multiqc_files.mix(BCFTOOLS_STATS_TRUTH.out.stats.map{ [it[1]] }) + + // Compute concordance analysis + VCF_CONCORDANCE_GLIMPSE2( + ch_input_validate, + SPLIT_TRUTH.out.vcf_tbi, + ch_panel_sites, + ch_region + ) + ch_multiqc_files = ch_multiqc_files.mix(VCF_CONCORDANCE_GLIMPSE2.out.multiqc_files) + ch_versions = ch_versions.mix(VCF_CONCORDANCE_GLIMPSE2.out.versions) + } + + // + // Collate and save software versions + // + softwareVersionsToYAML(ch_versions) + .collectFile( + storeDir: "${params.outdir}/pipeline_info", + name: 'nf_core_' + 'pipeline_software_' + 'mqc_' + 'versions.yml', + sort: true, newLine: true + ).set { ch_collated_versions } + + // + // MODULE: MultiQC + // + ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty() + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) + ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) + ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: false)) + ch_multiqc_replace_names = params.multiqc_replace_names ? Channel.fromPath(params.multiqc_replace_names, checkIfExists: true) : Channel.empty() + ch_multiqc_sample_names = params.multiqc_sample_names ? Channel.fromPath(params.multiqc_sample_names, checkIfExists: true) : Channel.empty() + + MULTIQC ( + ch_multiqc_files.collect(), + ch_multiqc_config.toList(), + ch_multiqc_custom_config.toList(), + ch_multiqc_logo.toList(), + ch_multiqc_replace_names.toList(), + ch_multiqc_sample_names.toList() + ) + + emit: + multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [ path(versions.yml) ] +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ diff --git a/subworkflows/local/bam_extract_region_samtools/tests/main.nf.test b/subworkflows/local/bam_extract_region_samtools/tests/main.nf.test new file mode 100644 index 00000000..2769f1e7 --- /dev/null +++ b/subworkflows/local/bam_extract_region_samtools/tests/main.nf.test @@ -0,0 +1,118 @@ +nextflow_workflow { + + name "Test Subworkflow BAM_EXTRACT_REGION_SAMTOOLS" + script "../main.nf" + config "./nextflow.config" + + workflow "BAM_EXTRACT_REGION_SAMTOOLS" + + tag "subworkflows" + tag "subworkflows_local" + tag "subworkflows/bam_extract_region_samtools" + tag "bam_extract_region_samtools" + + tag "samtools" + tag "samtools_view" + tag "samtools_merge" + tag "samtools_index" + + test("Get subregion of bam file 10000bp") { + when { + params { + max_cpus = 2 + max_memory = '2.GB' + } + workflow { + """ + input[0] = input[0] = Channel.fromList([ + [ + [id: "NA12878"], + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bam", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bam.bai", checkIfExist:true), + ], + [ + [id: "NA19401"], + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA19401/NA19401.s.bam", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA19401/NA19401.s.bam.bai", checkIfExist:true), + ], + ]) + input[1] = Channel.of([ + [chr: "chr22", region: "chr22:16600000-16610000"], "chr22:16600000-16610000" + ]).collect() + input[2] = Channel.of([ + [id: "GRCh38"], + file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz.fai", checkIfExist:true), + ]).collect() + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.versions, + workflow.out.vcf_tbi.collect{[ + it[0], + path(it[1]).getFileName().toString(), + path(it[2]).getFileName().toString() + ] }, + workflow.out.bam_region.collect{ + bam(it[1]).getReads().size() + } + ).match() } + ) + } + } + + test("Get subregion of bam file 1000bp") { + when { + params { + max_cpus = 2 + max_memory = '2.GB' + } + workflow { + """ + input[0] = input[0] = Channel.fromList([ + [ + [id: "NA12878"], + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bam", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bam.bai", checkIfExist:true), + ], + [ + [id: "NA19401"], + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA19401/NA19401.s.bam", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA19401/NA19401.s.bam.bai", checkIfExist:true), + ], + ]) + input[1] = Channel.of([ + [chr: "chr22", region: "chr22:16609000-16610000"], "chr22:16609000-16610000" + ]).collect() + input[2] = Channel.of([ + [id: "GRCh38"], + file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz.fai", checkIfExist:true), + ]).collect() + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.versions, + workflow.out.vcf_tbi.collect{[ + it[0], + path(it[1]).getFileName().toString(), + path(it[2]).getFileName().toString() + ] }, + workflow.out.bam_region.collect{ + bam(it[1]).getReads().size() + } + ).match() } + ) + } + } +} diff --git a/subworkflows/local/bam_extract_region_samtools/tests/main.nf.test.snap b/subworkflows/local/bam_extract_region_samtools/tests/main.nf.test.snap new file mode 100644 index 00000000..82e4b44b --- /dev/null +++ b/subworkflows/local/bam_extract_region_samtools/tests/main.nf.test.snap @@ -0,0 +1,44 @@ +{ + "Get subregion of bam file 10000bp": { + "content": [ + [ + "versions.yml:md5,17ab59cf0e993cc8db9086093cdeb6e2", + "versions.yml:md5,3043c69a324445a251b7e9569bd012d3", + "versions.yml:md5,4dbd8db078f2e38fb335b8c3ee3db169" + ], + [ + + ], + [ + 2498, + 2510 + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T13:55:45.543427226" + }, + "Get subregion of bam file 1000bp": { + "content": [ + [ + "versions.yml:md5,17ab59cf0e993cc8db9086093cdeb6e2", + "versions.yml:md5,3043c69a324445a251b7e9569bd012d3", + "versions.yml:md5,4dbd8db078f2e38fb335b8c3ee3db169" + ], + [ + + ], + [ + 368, + 327 + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T13:57:19.117705738" + } +} \ No newline at end of file diff --git a/subworkflows/local/bam_extract_region_samtools/tests/nextflow.config b/subworkflows/local/bam_extract_region_samtools/tests/nextflow.config new file mode 100644 index 00000000..c44aaf4b --- /dev/null +++ b/subworkflows/local/bam_extract_region_samtools/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + withName: SAMTOOLS_VIEW { + ext.args = ["--output-fmt bam", "--write-index"].join(' ') + ext.prefix = { "${meta.id}_R${meta.region.replace(':','_')}" } + } +} diff --git a/subworkflows/local/bam_extract_region_samtools/tests/tags.yml b/subworkflows/local/bam_extract_region_samtools/tests/tags.yml new file mode 100644 index 00000000..8a080758 --- /dev/null +++ b/subworkflows/local/bam_extract_region_samtools/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/bam_extract_region_samtools: + - subworkflows/local/bam_extract_region_samtools/** diff --git a/subworkflows/local/bam_gl_bcftools/main.nf b/subworkflows/local/bam_gl_bcftools/main.nf new file mode 100644 index 00000000..5b0d9b0d --- /dev/null +++ b/subworkflows/local/bam_gl_bcftools/main.nf @@ -0,0 +1,87 @@ +include { GAWK } from '../../../modules/nf-core/gawk' +include { TABIX_BGZIP } from '../../../modules/nf-core/tabix/bgzip' +include { BCFTOOLS_MPILEUP } from '../../../modules/nf-core/bcftools/mpileup' +include { BCFTOOLS_MERGE } from '../../../modules/nf-core/bcftools/merge' +include { BCFTOOLS_ANNOTATE } from '../../../modules/nf-core/bcftools/annotate' + +workflow BAM_GL_BCFTOOLS { + + take: + ch_bam // channel: [ [id], bam, bai ] + ch_posfile // channel: [ [panel, chr], legend] + ch_fasta // channel: [ [genome], fasta, fai] + + main: + + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() + + // Convert legend to TSV with ',' + GAWK(ch_posfile, []) + ch_versions = ch_versions.mix(GAWK.out.versions) + + // Compress TSV + TABIX_BGZIP(GAWK.out.output) + ch_versions = ch_versions.mix(TABIX_BGZIP.out.versions) + + ch_mpileup = ch_bam + .combine(TABIX_BGZIP.out.output) + .map{metaI, bam, _bai, metaPC, tsv -> + [metaI + ["panel": metaPC.id, "chr": metaPC.chr], bam, tsv] + } + + BCFTOOLS_MPILEUP( + ch_mpileup, + ch_fasta, + false + ) + ch_versions = ch_versions.mix(BCFTOOLS_MPILEUP.out.versions) + ch_multiqc_files = ch_multiqc_files.mix(BCFTOOLS_MPILEUP.out.stats.map{ it[1] }) + + // Branch depending on number of files + ch_all_vcf = BCFTOOLS_MPILEUP.out.vcf + .join(BCFTOOLS_MPILEUP.out.tbi) + .map{ metaIPC, vcf, tbi -> [metaIPC.subMap("panel", "chr", "batch"), [metaIPC, vcf, tbi]] } + .groupTuple(sort: { it1, it2 -> it1[0]["id"] <=> it2[0]["id"] }) // Sort by id + .map{ metaPC, filestups -> [ + metaPC + [id: "all", metas: filestups.collect{it[0]}], + filestups.collect{it[1]}, + filestups.collect{it[2]}, + filestups.collect{it[1]}.size() + ] } // Compute number of records + .branch{ + one: it[3] == 1 + more: it[3] > 1 + } + + // Merge VCFs + BCFTOOLS_MERGE( + ch_all_vcf.more.map{ [it[0], it[1], it[2], []] }, + ch_fasta + ) + ch_versions = ch_versions.mix(BCFTOOLS_MERGE.out.versions) + + // Mix all vcfs + ch_to_annotate = ch_all_vcf.one + .map{ [it[0]["metas"][0], it[1][0], it[2][0]] } + .mix( + BCFTOOLS_MERGE.out.vcf + .join(BCFTOOLS_MERGE.out.tbi) + ) + + // Annotate the variants + BCFTOOLS_ANNOTATE(ch_to_annotate + .combine(Channel.of([[], [], [], []])) + ) + ch_versions = ch_versions.mix(BCFTOOLS_ANNOTATE.out.versions) + + // Output + ch_output = BCFTOOLS_ANNOTATE.out.vcf + .join(BCFTOOLS_ANNOTATE.out.tbi) + .map{ metaIPC, vcf, tbi -> [metaIPC + [ variantcaller:'bcftools' ], vcf, tbi] } + + emit: + vcf_tbi = ch_output // channel: [ [id, panel, chr], vcf, tbi ] + versions = ch_versions // channel: [ versions.yml ] + multiqc_files = ch_multiqc_files +} diff --git a/subworkflows/local/bam_gl_bcftools/tests/main.nf.test b/subworkflows/local/bam_gl_bcftools/tests/main.nf.test new file mode 100644 index 00000000..027a5969 --- /dev/null +++ b/subworkflows/local/bam_gl_bcftools/tests/main.nf.test @@ -0,0 +1,109 @@ +nextflow_workflow { + + name "Test Subworkflow BAM_GL_BCFTOOLS" + script "../main.nf" + config "./nextflow.config" + + workflow "BAM_GL_BCFTOOLS" + + tag "subworkflows" + tag "subworkflows_local" + tag "subworkflows/bam_gl_bcftools" + tag "bam_gl_bcftools" + + tag "gawk" + tag "tabix" + tag "tabix/bgzip" + tag "bcftools" + tag "bcftools/mpileup" + tag "bcftools/annotate" + + test("Compute genotype likelihood with merging") { + when { + workflow { + """ + input[0] = Channel.fromList([ + [ + [id: "NA12878"], + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bam", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bam.bai", checkIfExist:true), + ], + [ + [id: "NA19401"], + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA19401/NA19401.s.bam", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA19401/NA19401.s.bam.bai", checkIfExist:true), + ], + ]) + input[1] = Channel.of([ + [id: "1000GP", chr: "22"], + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.legend.gz", checkIfExist:true) + ]).collect() + input[2] = Channel.of([ + [id: "GRCh38"], + file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz.fai", checkIfExist:true), + ]).collect() + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.versions, + workflow.out.vcf_tbi.collect{[ + it[0], + path(it[1]).getFileName().toString(), + path(it[2]).getFileName().toString() + ] }, + workflow.out.vcf_tbi.collect{ + path(it[1]).vcf.summary + } + ).match() } + ) + } + } + + test("Compute genotype likelihood with only one input") { + when { + workflow { + """ + input[0] = Channel.fromList([ + [ + [id: "NA12878"], + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bam", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bam.bai", checkIfExist:true), + ], + ]) + input[1] = Channel.of([ + [id: "1000GP", chr: "22"], + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.legend.gz", checkIfExist:true) + ]).collect() + input[2] = Channel.of([ + [id: "GRCh38"], + file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz.fai", checkIfExist:true), + ]).collect() + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.versions, + workflow.out.vcf_tbi.collect{[ + it[0], + path(it[1]).getFileName().toString(), + path(it[2]).getFileName().toString() + ] }, + workflow.out.vcf_tbi.collect{ + path(it[1]).vcf.summary + } + ).match() } + ) + } + } +} diff --git a/subworkflows/local/bam_gl_bcftools/tests/main.nf.test.snap b/subworkflows/local/bam_gl_bcftools/tests/main.nf.test.snap new file mode 100644 index 00000000..71b76e85 --- /dev/null +++ b/subworkflows/local/bam_gl_bcftools/tests/main.nf.test.snap @@ -0,0 +1,76 @@ +{ + "Compute genotype likelihood with merging": { + "content": [ + [ + "versions.yml:md5,0980a298fde671494f5b4e9cb3a6ebc2", + "versions.yml:md5,4739962c934731254db8b31083aa0df5", + "versions.yml:md5,544ff5e39aa23f18c41587a739beaf34", + "versions.yml:md5,74118beefc19065bed2de5d60277be55", + "versions.yml:md5,c9d32dd20061f19422ff4d4b6f3fb24d", + "versions.yml:md5,c9d32dd20061f19422ff4d4b6f3fb24d" + ], + [ + [ + { + "panel": "1000GP", + "chr": "22", + "id": "all", + "metas": [ + { + "id": "NA12878", + "panel": "1000GP", + "chr": "22" + }, + { + "id": "NA19401", + "panel": "1000GP", + "chr": "22" + } + ], + "variantcaller": "bcftools" + }, + "all.annotate.vcf.gz", + "all.annotate.vcf.gz.tbi" + ] + ], + [ + "VcfFile [chromosomes=[chr22], sampleCount=2, variantCount=903, phased=false, phasedAutodetect=false]" + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T13:49:19.689761982" + }, + "Compute genotype likelihood with only one input": { + "content": [ + [ + "versions.yml:md5,0980a298fde671494f5b4e9cb3a6ebc2", + "versions.yml:md5,4739962c934731254db8b31083aa0df5", + "versions.yml:md5,544ff5e39aa23f18c41587a739beaf34", + "versions.yml:md5,c9d32dd20061f19422ff4d4b6f3fb24d" + ], + [ + [ + { + "id": "NA12878", + "panel": "1000GP", + "chr": "22", + "variantcaller": "bcftools" + }, + "NA12878.annotate.vcf.gz", + "NA12878.annotate.vcf.gz.tbi" + ] + ], + [ + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=903, phased=false, phasedAutodetect=false]" + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T13:50:09.373771371" + } +} \ No newline at end of file diff --git a/subworkflows/local/bam_gl_bcftools/tests/nextflow.config b/subworkflows/local/bam_gl_bcftools/tests/nextflow.config new file mode 100644 index 00000000..f636a4df --- /dev/null +++ b/subworkflows/local/bam_gl_bcftools/tests/nextflow.config @@ -0,0 +1,35 @@ +process { + + resourceLimits = [ + cpus: 4, + memory: '2.GB', + time: '6.h' + ] + + withName: GAWK { + ext.args2 = "'NR>1 { split(\$1, a, \"[:-_]\"); print a[1], \$2, \$3 \",\" \$4 }'" + ext.suffix = "txt" + } + withName: BCFTOOLS_MPILEUP { + ext.args = [ + "-I", + "-E", + "-a 'FORMAT/DP'" + ].join(' ') + ext.args2 = [ + "-Aim", + "-C alleles" + ].join(' ') + ext.prefix = { "${meta.id}" } + } + withName: BCFTOOLS_MERGE { + ext.args = [ + "--write-index=tbi", + ].join(' ') + ext.prefix = { "${meta.id}" } + } + withName: BCFTOOLS_ANNOTATE { + ext.args = ["--set-id '%CHROM:%POS:%REF:%ALT'", "-Oz", "--write-index=tbi"].join(' ') + ext.prefix = { "${meta.id}.annotate" } + } +} diff --git a/subworkflows/local/bam_gl_bcftools/tests/tags.yml b/subworkflows/local/bam_gl_bcftools/tests/tags.yml new file mode 100644 index 00000000..cb8f468a --- /dev/null +++ b/subworkflows/local/bam_gl_bcftools/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/bam_gl_bcftools: + - subworkflows/local/bam_gl_bcftools/** diff --git a/subworkflows/local/bam_impute_glimpse2/main.nf b/subworkflows/local/bam_impute_glimpse2/main.nf new file mode 100644 index 00000000..63a913cd --- /dev/null +++ b/subworkflows/local/bam_impute_glimpse2/main.nf @@ -0,0 +1,70 @@ +include { GAWK } from '../../../modules/nf-core/gawk' +include { GLIMPSE2_PHASE } from '../../../modules/nf-core/glimpse2/phase' +include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_1 } from '../../../modules/nf-core/bcftools/index' +include { GLIMPSE2_LIGATE } from '../../../modules/nf-core/glimpse2/ligate' +include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_2 } from '../../../modules/nf-core/bcftools/index' + +workflow BAM_IMPUTE_GLIMPSE2 { + + take: + ch_input // channel (mandatory): [ [id], [bam], [bai], bamlist ] + ch_panel // channel (mandatory): [ [panel, chr], vcf, tbi ] + ch_chunks // channel (optional): [ [panel, chr], region1, region2 ] + ch_fasta // channel (mandatory): [ [genome], fa, fai ] + + main: + + ch_versions = Channel.empty() + + // Impute with Glimpse2 without using binary files + samples_file = Channel.of([[]]).collect() + gmap_file = Channel.of([[]]).collect() + + // Create input channel to impute with Glimpse2 + + // Join chunks and panel + ch_chunks_panel = ch_chunks + .combine(ch_panel, by:0) + .map{ metaPC, regionin, regionout, panel, index -> + [["panel": metaPC.id, "chr": metaPC.chr], regionin, regionout, panel, index] + } + + // Join input and chunks reference + ch_phase_input = ch_input + .combine(samples_file) + .combine(ch_chunks_panel) + .combine(gmap_file) + .map{ metaI, bam, bai, bamlist, samples, metaPC, regionin, regionout, panel, panel_index, gmap -> + [metaI + metaPC + ["chunk": regionout], + bam, bai, bamlist, samples, regionin, regionout, panel, panel_index, gmap] + } + + // Impute with Glimpse2 + GLIMPSE2_PHASE(ch_phase_input, ch_fasta) + ch_versions = ch_versions.mix(GLIMPSE2_PHASE.out.versions) + + // Index phased file + BCFTOOLS_INDEX_1(GLIMPSE2_PHASE.out.phased_variants) + ch_versions = ch_versions.mix(BCFTOOLS_INDEX_1.out.versions) + + // Ligate all phased files in one and index it + ligate_input = GLIMPSE2_PHASE.out.phased_variants + .join( BCFTOOLS_INDEX_1.out.csi ) + .map{ metaIPCR, vcf, index -> [metaIPCR.subMap("id", "panel", "chr", "batch"), vcf, index] } + .groupTuple() + + GLIMPSE2_LIGATE ( ligate_input ) + ch_versions = ch_versions.mix(GLIMPSE2_LIGATE.out.versions ) + + BCFTOOLS_INDEX_2 ( GLIMPSE2_LIGATE.out.merged_variants ) + ch_versions = ch_versions.mix( BCFTOOLS_INDEX_2.out.versions ) + + // Join imputed and index files + ch_imputed_vcf_tbi = GLIMPSE2_LIGATE.out.merged_variants + .join(BCFTOOLS_INDEX_2.out.tbi) + .map{ metaIPC, vcf, index -> [metaIPC + [tools: "glimpse2"], vcf, index] } + + emit: + vcf_tbi = ch_imputed_vcf_tbi // channel: [ [id, panel, chr, tool], vcf, tbi ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/bam_impute_glimpse2/tests/main.nf.test b/subworkflows/local/bam_impute_glimpse2/tests/main.nf.test new file mode 100644 index 00000000..b6a2bc6c --- /dev/null +++ b/subworkflows/local/bam_impute_glimpse2/tests/main.nf.test @@ -0,0 +1,171 @@ +nextflow_workflow { + + name "Test Subworkflow BAM_IMPUTE_GLIMPSE2" + script "../main.nf" + config "./nextflow.config" + + workflow "BAM_IMPUTE_GLIMPSE2" + + tag "subworkflows" + tag "subworkflows_local" + tag "subworkflows/bam_impute_glimpse2" + tag "bam_impute_glimpse2" + + tag "glimpse2" + tag "glimpse2/phase" + tag "glimpse2/ligate" + tag "bcftools" + tag "bcftools/index" + + test("Impute with glimpse2 one vcf - input bcf") { + when { + workflow { + """ + input[0] = Channel.fromList([ + [ + [id: "NA12878"], + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bcf", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bcf.csi", checkIfExist:true), + [] + ] + ]) + input[1] = Channel.of([ + [id: "1000GP", chr: "22"], + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz.csi", checkIfExist:true), + ]).collect() + input[2] = Channel.of( + [[chr: "22", id: "1000GP"], "chr22:16570065-16597215", "chr22:16570065-16592216"], + [[chr: "22", id: "1000GP"], "chr22:16587172-16609999", "chr22:16592229-16609999"] + ) + input[3] = Channel.of([ + [id: "GRCh38"], + file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz.fai", checkIfExist:true), + ]).collect() + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.versions, + workflow.out.vcf_tbi.collect{[ + it[0], + path(it[1]).getFileName().toString(), + path(it[2]).getFileName().toString() + ] }, + workflow.out.vcf_tbi.collect{ path(it[1]).vcf.summary }, + workflow.out.vcf_tbi.collect{ path(it[1]).vcf.header.getGenotypeSamples().sort() } + ).match() } + ) + } + } + + test("Impute with glimpse2 two individuals - input bam with bamlist") { + when { + workflow { + """ + bamlist = Channel.of( + "NA12878.s.bam\tNA12878", + "NA19401.s.bam\tNA19401" + ).collectFile(name: 'bamlist.txt', newLine: true) + input[0] = Channel.fromList([ + [ + [id: "allid"], + [ + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bam", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA19401/NA19401.s.bam", checkIfExist:true) + ], + [ + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bam.bai", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA19401/NA19401.s.bam.bai", checkIfExist:true) + ] + ], + ]).combine(bamlist) + input[1] = Channel.of([ + [id: "1000GP", chr: "22"], + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz.csi", checkIfExist:true), + ]).collect() + input[2] = Channel.of( + [[chr: "22", id: "1000GP"], "chr22:16570065-16597215", "chr22:16570065-16592216"], + [[chr: "22", id: "1000GP"], "chr22:16587172-16609999", "chr22:16592229-16609999"] + ) + input[3] = Channel.of([ + [id: "GRCh38"], + file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz.fai", checkIfExist:true), + ]).collect() + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.versions, + workflow.out.vcf_tbi.collect{[ + it[0], + path(it[1]).getFileName().toString(), + path(it[2]).getFileName().toString() + ] }, + workflow.out.vcf_tbi.collect{ path(it[1]).vcf.summary }, + workflow.out.vcf_tbi.collect{ path(it[1]).vcf.header.getGenotypeSamples().sort() } + ).match() } + ) + } + } + + test("Impute with glimpse2 one individual - input file bam with bamlist") { + when { + workflow { + """ + bamlist = Channel.of( + "NA12878.s.bam\tNA12878" + ).collectFile(name: 'bamlist.txt', newLine: true) + input[0] = Channel.fromList([ + [ + [id: "NA12878"], + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bam", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bam.bai", checkIfExist:true), + ] + ]).combine(bamlist) + input[1] = Channel.of([ + [id: "1000GP", chr: "22"], + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz.csi", checkIfExist:true), + ]).collect() + input[2] = Channel.of( + [[chr: "22", id: "1000GP"], "chr22:16570065-16597215", "chr22:16570065-16592216"], + [[chr: "22", id: "1000GP"], "chr22:16587172-16609999", "chr22:16592229-16609999"] + ) + input[3] = Channel.of([ + [id: "GRCh38"], + file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz.fai", checkIfExist:true), + ]).collect() + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.versions, + workflow.out.vcf_tbi.collect{[ + it[0], + path(it[1]).getFileName().toString(), + path(it[2]).getFileName().toString() + ] }, + workflow.out.vcf_tbi.collect{ path(it[1]).vcf.summary }, + workflow.out.vcf_tbi.collect{ path(it[1]).vcf.header.getGenotypeSamples().sort() } + ).match() } + ) + } + } +} diff --git a/subworkflows/local/bam_impute_glimpse2/tests/main.nf.test.snap b/subworkflows/local/bam_impute_glimpse2/tests/main.nf.test.snap new file mode 100644 index 00000000..7a717fc9 --- /dev/null +++ b/subworkflows/local/bam_impute_glimpse2/tests/main.nf.test.snap @@ -0,0 +1,114 @@ +{ + "Impute with glimpse2 one vcf - input bcf": { + "content": [ + [ + "versions.yml:md5,02e20b0099653bc476dac65a92e768c1", + "versions.yml:md5,02e20b0099653bc476dac65a92e768c1", + "versions.yml:md5,1d8512dd2d5f6649a1111faabd24b399", + "versions.yml:md5,2bfd212564086adb64cf01741a0d83ad", + "versions.yml:md5,2bfd212564086adb64cf01741a0d83ad", + "versions.yml:md5,7d3bb3301fbcde0b12f553a2ae7d5b56" + ], + [ + [ + { + "id": "NA12878", + "panel": "1000GP", + "chr": "22", + "tools": "glimpse2" + }, + "NA12878.vcf.gz", + "NA12878.vcf.gz.tbi" + ] + ], + [ + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=903, phased=true, phasedAutodetect=true]" + ], + [ + [ + "NA12878" + ] + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T13:51:00.205148681" + }, + "Impute with glimpse2 one individual - input file bam with bamlist": { + "content": [ + [ + "versions.yml:md5,02e20b0099653bc476dac65a92e768c1", + "versions.yml:md5,02e20b0099653bc476dac65a92e768c1", + "versions.yml:md5,1d8512dd2d5f6649a1111faabd24b399", + "versions.yml:md5,2bfd212564086adb64cf01741a0d83ad", + "versions.yml:md5,2bfd212564086adb64cf01741a0d83ad", + "versions.yml:md5,7d3bb3301fbcde0b12f553a2ae7d5b56" + ], + [ + [ + { + "id": "NA12878", + "panel": "1000GP", + "chr": "22", + "tools": "glimpse2" + }, + "NA12878.vcf.gz", + "NA12878.vcf.gz.tbi" + ] + ], + [ + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=903, phased=true, phasedAutodetect=true]" + ], + [ + [ + "NA12878" + ] + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T13:52:40.66012322" + }, + "Impute with glimpse2 two individuals - input bam with bamlist": { + "content": [ + [ + "versions.yml:md5,02e20b0099653bc476dac65a92e768c1", + "versions.yml:md5,02e20b0099653bc476dac65a92e768c1", + "versions.yml:md5,1d8512dd2d5f6649a1111faabd24b399", + "versions.yml:md5,2bfd212564086adb64cf01741a0d83ad", + "versions.yml:md5,2bfd212564086adb64cf01741a0d83ad", + "versions.yml:md5,7d3bb3301fbcde0b12f553a2ae7d5b56" + ], + [ + [ + { + "id": "allid", + "panel": "1000GP", + "chr": "22", + "tools": "glimpse2" + }, + "allid.vcf.gz", + "allid.vcf.gz.tbi" + ] + ], + [ + "VcfFile [chromosomes=[chr22], sampleCount=2, variantCount=903, phased=true, phasedAutodetect=true]" + ], + [ + [ + "NA12878", + "NA19401" + ] + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T13:51:55.269054819" + } +} \ No newline at end of file diff --git a/subworkflows/local/bam_impute_glimpse2/tests/nextflow.config b/subworkflows/local/bam_impute_glimpse2/tests/nextflow.config new file mode 100644 index 00000000..5cf18b90 --- /dev/null +++ b/subworkflows/local/bam_impute_glimpse2/tests/nextflow.config @@ -0,0 +1,18 @@ +process { + + resourceLimits = [ + cpus: 4, + memory: '2.GB', + time: '6.h' + ] + + withName: GLIMPSE_LIGATE { + ext.prefix = { "${meta.id}_${meta.chr}_ligate" } + publishDir = [ enabled: false ] + } + + withName: BCFTOOLS_INDEX_2 { + ext.args = "--tbi" + publishDir = [ enabled: false ] + } +} diff --git a/subworkflows/local/bam_impute_glimpse2/tests/tags.yml b/subworkflows/local/bam_impute_glimpse2/tests/tags.yml new file mode 100644 index 00000000..462853ae --- /dev/null +++ b/subworkflows/local/bam_impute_glimpse2/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/bam_impute_glimpse2: + - subworkflows/local/bam_impute_glimpse2/** diff --git a/subworkflows/local/bam_impute_quilt/main.nf b/subworkflows/local/bam_impute_quilt/main.nf new file mode 100644 index 00000000..1f36cd8d --- /dev/null +++ b/subworkflows/local/bam_impute_quilt/main.nf @@ -0,0 +1,64 @@ +include { QUILT_QUILT } from '../../../modules/nf-core/quilt/quilt' +include { BCFTOOLS_ANNOTATE } from '../../../modules/nf-core/bcftools/annotate' + +workflow BAM_IMPUTE_QUILT { + + take: + ch_input // channel: [ [id], [bam], [bai], bamlist ] + ch_hap_legend // channel: [ [panel, chr], hap, legend ] + ch_chunks // channel: [ [panel, chr], chr, start_coordinate, end_coordinate ] + ch_fasta // channel: [ [genome], fa, fai ] + + main: + + ch_versions = Channel.empty() + + posfile = [] + phasefile = [] + posfile_phasefile = [[id: null], posfile, phasefile] + genetic_map_file = [] + + ngen_params = params.ngen + buffer_params = params.buffer + + ch_hap_chunks = ch_hap_legend + .combine(ch_chunks, by:0) + .map { it + ngen_params + buffer_params + [[]] } + + if (!genetic_map_file.isEmpty()) { + // Add genetic map file (untested) + ch_hap_chunks = ch_hap_chunks + .map{it[0..-1]} + .join(genetic_map_file) + } + + ch_quilt = ch_input + .combine(ch_hap_chunks) + .map { + metaI, bam, bai, bamlist, metaPC, hap, legend, chr, start, end, ngen, buffer, gmap -> + [ + metaI + [panel: metaPC.id, chr: metaPC.chr, chunk: metaPC.chr + ":" + start + "-" + end], + bam, bai, bamlist, hap, legend, chr, start, end, ngen, buffer, gmap + ] + } + + // Run QUILT + QUILT_QUILT ( ch_quilt, posfile_phasefile, ch_fasta ) + ch_versions = ch_versions.mix(QUILT_QUILT.out.versions.first()) + + // Annotate the variants + BCFTOOLS_ANNOTATE(QUILT_QUILT.out.vcf + .join(QUILT_QUILT.out.tbi) + .combine(Channel.of([[], [], [], []])) + ) + ch_versions = ch_versions.mix(BCFTOOLS_ANNOTATE.out.versions.first()) + + // Join VCFs and TBIs + ch_vcf_tbi = BCFTOOLS_ANNOTATE.out.vcf + .join(BCFTOOLS_ANNOTATE.out.tbi) + .map { metaIPC, vcf, tbi -> [metaIPC + [tools: "quilt"], vcf, tbi] } + + emit: + vcf_tbi = ch_vcf_tbi // channel: [ [id, panel], vcf, tbi ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/bam_impute_quilt/tests/main.nf.test b/subworkflows/local/bam_impute_quilt/tests/main.nf.test new file mode 100644 index 00000000..57e19e49 --- /dev/null +++ b/subworkflows/local/bam_impute_quilt/tests/main.nf.test @@ -0,0 +1,120 @@ +nextflow_workflow { + + name "Test Subworkflow BAM_IMPUTE_QUILT" + script "../main.nf" + config "./nextflow.config" + + workflow "BAM_IMPUTE_QUILT" + + tag "subworkflows" + tag "subworkflows_local" + tag "subworkflows/bam_impute_quilt" + tag "bam_impute_quilt" + + tag "quilt" + tag "quilt/quilt" + tag "bcftools" + tag "bcftools/index" + tag "bcftools/annotate" + + test("Impute with quilt two individuals with bamlist") { + when { + workflow { + """ + bamlist = Channel.of( + "NA12878.s.bam", + "NA19401.s.bam" + ).collectFile(name: 'bamlist.txt', newLine: true) + input[0] = Channel.fromList([ + [ + [id: "allid"], + [ + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bam", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA19401/NA19401.s.bam", checkIfExist:true) + ], + [ + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bam.bai", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA19401/NA19401.s.bam.bai", checkIfExist:true) + ] + ], + ]).combine(bamlist) + input[1] = Channel.of([ + [id: "1000GP", chr: "22"], + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.hap.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.legend.gz", checkIfExist:true) + ]).collect() + input[2] = Channel.of( + [[chr: "22", id: "1000GP"], "chr22", "16570065", "16597215"], + [[chr: "22", id: "1000GP"], "chr22", "16587172", "16609999"] + ) + input[3] = Channel.of([ + [id: "GRCh38"], + file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz", checkIfExist:true) + ]).collect() + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.versions, + workflow.out.vcf_tbi.collect{[ + it[0], + path(it[1]).getFileName().toString(), + path(it[2]).getFileName().toString() + ] }, + workflow.out.vcf_tbi.collect{ path(it[1]).vcf.summary }, + workflow.out.vcf_tbi.collect{ path(it[1]).vcf.header.getGenotypeSamples().sort() } + ).match() } + ) + } + } + + test("Impute with quilt one individuals") { + when { + workflow { + """ + input[0] = Channel.fromList([ + [ + [id: "NA12878"], + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bam", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bam.bai", checkIfExist:true), + [] + ] + ]) + input[1] = Channel.of([ + [id: "1000GP", chr: "22"], + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.hap.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.legend.gz", checkIfExist:true) + ]).collect() + input[2] = Channel.of( + [[chr: "22", id: "1000GP"], "chr22", "16570065", "16597215"], + [[chr: "22", id: "1000GP"], "chr22", "16587172", "16609999"] + ) + input[3] = Channel.of([ + [id: "GRCh38"], + file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz", checkIfExist:true) + ]).collect() + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.versions, + workflow.out.vcf_tbi.collect{[ + it[0], + path(it[1]).getFileName().toString(), + path(it[2]).getFileName().toString() + ] }, + workflow.out.vcf_tbi.collect{ path(it[1]).vcf.summary }, + workflow.out.vcf_tbi.collect{ path(it[1]).vcf.header.getGenotypeSamples().sort() } + ).match() } + ) + } + } +} diff --git a/subworkflows/local/bam_impute_quilt/tests/main.nf.test.snap b/subworkflows/local/bam_impute_quilt/tests/main.nf.test.snap new file mode 100644 index 00000000..1136b7f1 --- /dev/null +++ b/subworkflows/local/bam_impute_quilt/tests/main.nf.test.snap @@ -0,0 +1,102 @@ +{ + "Impute with quilt two individuals with bamlist": { + "content": [ + [ + "versions.yml:md5,ac2d0f2a8e77cd796d3b4b84fdd46b0d", + "versions.yml:md5,cb038a1f67a47a76d0e0a983cfd56789" + ], + [ + [ + { + "id": "allid", + "panel": "1000GP", + "chr": "22", + "chunk": "16570065-16597215", + "tools": "quilt" + }, + "allid_22_16570065-16597215.impute.annotate.vcf.gz", + "allid_22_16570065-16597215.impute.annotate.vcf.gz.tbi" + ], + [ + { + "id": "allid", + "panel": "1000GP", + "chr": "22", + "chunk": "16587172-16609999", + "tools": "quilt" + }, + "allid_22_16587172-16609999.impute.annotate.vcf.gz", + "allid_22_16587172-16609999.impute.annotate.vcf.gz.tbi" + ] + ], + [ + "VcfFile [chromosomes=[chr22], sampleCount=2, variantCount=572, phased=true, phasedAutodetect=false]", + "VcfFile [chromosomes=[chr22], sampleCount=2, variantCount=570, phased=true, phasedAutodetect=false]" + ], + [ + [ + "NA12878", + "NA19401" + ], + [ + "NA12878", + "NA19401" + ] + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T13:53:40.231874326" + }, + "Impute with quilt one individuals": { + "content": [ + [ + "versions.yml:md5,ac2d0f2a8e77cd796d3b4b84fdd46b0d", + "versions.yml:md5,cb038a1f67a47a76d0e0a983cfd56789" + ], + [ + [ + { + "id": "NA12878", + "panel": "1000GP", + "chr": "22", + "chunk": "16570065-16597215", + "tools": "quilt" + }, + "NA12878_22_16570065-16597215.impute.annotate.vcf.gz", + "NA12878_22_16570065-16597215.impute.annotate.vcf.gz.tbi" + ], + [ + { + "id": "NA12878", + "panel": "1000GP", + "chr": "22", + "chunk": "16587172-16609999", + "tools": "quilt" + }, + "NA12878_22_16587172-16609999.impute.annotate.vcf.gz", + "NA12878_22_16587172-16609999.impute.annotate.vcf.gz.tbi" + ] + ], + [ + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=572, phased=true, phasedAutodetect=false]", + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=570, phased=true, phasedAutodetect=false]" + ], + [ + [ + "NA12878" + ], + [ + "NA12878" + ] + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T13:54:20.551354608" + } +} \ No newline at end of file diff --git a/subworkflows/local/bam_impute_quilt/tests/nextflow.config b/subworkflows/local/bam_impute_quilt/tests/nextflow.config new file mode 100644 index 00000000..51e57a80 --- /dev/null +++ b/subworkflows/local/bam_impute_quilt/tests/nextflow.config @@ -0,0 +1,21 @@ +process { + + resourceLimits = [ + cpus: 4, + memory: '2.GB', + time: '6.h' + ] + + // Impute quilt + withName: QUILT_QUILT { + ext.prefix = { "${meta.id}_${meta.chr}_${meta.chunk}.impute" } + publishDir = [enabled: false] + } + + // Annotate quilt imputed VCFs + withName: BCFTOOLS_ANNOTATE { + ext.args = ["--set-id '%CHROM:%POS:%REF:%ALT'", "-Oz", "--write-index=tbi"].join(' ') + ext.prefix = { "${meta.id}_${meta.chr}_${meta.chunk}.impute.annotate" } + publishDir = [ enabled: false ] + } +} diff --git a/subworkflows/local/bam_impute_quilt/tests/tags.yml b/subworkflows/local/bam_impute_quilt/tests/tags.yml new file mode 100644 index 00000000..4d1f2ebd --- /dev/null +++ b/subworkflows/local/bam_impute_quilt/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/bam_impute_quilt: + - subworkflows/local/bam_impute_quilt/** diff --git a/subworkflows/local/bam_impute_stitch/main.nf b/subworkflows/local/bam_impute_stitch/main.nf new file mode 100644 index 00000000..4b21d37e --- /dev/null +++ b/subworkflows/local/bam_impute_stitch/main.nf @@ -0,0 +1,72 @@ +include { GAWK } from '../../../modules/nf-core/gawk' +include { STITCH } from '../../../modules/nf-core/stitch' +include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index' + +workflow BAM_IMPUTE_STITCH { + + take: + ch_input // channel: [ [id], [bam], [bai], bamlist ] + ch_posfile // channel: [ [panel, chr], legend ] + ch_region // channel: [ [chr, region], region ] + ch_fasta // channel: [ [genome], fa, fai ] + + main: + + ch_versions = Channel.empty() + // Run STITCH + seed = params.seed + + // Value channels + def input_empty = [[]] + def rdata_empty = [[]] + k_val_params = params.k_val + ngen_params = params.ngen + + // Transform posfile to TSV with ',' + GAWK(ch_posfile, []) + ch_versions = ch_versions.mix(GAWK.out.versions) + + // Get chromosomes of posfile + ch_posfile = GAWK.out.output + .map{metaPC, posfile -> [[chr: metaPC.chr], metaPC, posfile]} + + // Get chromosomes of fasta + ch_chromosomes = ch_region + .map{metaCR, _region -> [[chr: metaCR.chr], metaCR.chr]} + + // Make final channel with parameters + ch_parameters = ch_posfile + .map { it + input_empty + rdata_empty} + .join(ch_chromosomes) + .map { it + k_val_params + ngen_params} + .map { _metaC, metaPC, posfile, input, rdata, chr, k_val, ngen -> + [metaPC, posfile, input, rdata, chr, k_val, ngen] + } + + ch_bam_params = ch_input // Add chr to meta map + .combine(ch_parameters) + .map{ + metaI, bam, bai, bamlist, metaPC, posfile, input, rdata, chr, k_val, ngen -> + [ + metaI + [chr: metaPC.chr, panel:metaPC.id], + bam, bai, bamlist, posfile, input, rdata, chr, k_val, ngen + ] + } + + STITCH( ch_bam_params, ch_fasta, seed ) + ch_versions = ch_versions.mix(STITCH.out.versions) + + // Index imputed annotated VCF + BCFTOOLS_INDEX(STITCH.out.vcf) + ch_versions = ch_versions.mix(BCFTOOLS_INDEX.out.versions) + + // Join VCFs and TBIs + ch_vcf_tbi = STITCH.out.vcf + .join(BCFTOOLS_INDEX.out.tbi) + .map { metaI, vcf, tbi -> [ metaI + [tools: "stitch"], vcf, tbi ] } + + emit: + vcf_tbi = ch_vcf_tbi // channel: [ [id, chr], vcf, tbi ] + versions = ch_versions // channel: [ versions.yml ] + +} diff --git a/subworkflows/local/bam_impute_stitch/tests/main.nf.test b/subworkflows/local/bam_impute_stitch/tests/main.nf.test new file mode 100644 index 00000000..55d9a0fa --- /dev/null +++ b/subworkflows/local/bam_impute_stitch/tests/main.nf.test @@ -0,0 +1,81 @@ +nextflow_workflow { + + name "Test Subworkflow BAM_IMPUTE_STITCH" + script "../main.nf" + config "./nextflow.config" + + workflow "BAM_IMPUTE_STITCH" + + tag "subworkflows" + tag "subworkflows_local" + tag "subworkflows/bam_impute_stitch" + tag "bam_impute_stitch" + + tag "stitch" + tag "bcftools" + tag "bcftools_index" + + test("Impute with stitch two individuals with bamlist") { + when { + params { + max_cpus = 2 + max_memory = '2.GB' + } + workflow { + """ + bamlist = Channel.of( + "NA12878.s.bam", + "NA19401.s.bam" + ).collectFile(name: 'bamlist.txt', newLine: true) + ch_samples = Channel.fromList([ + [ + [id: "allid"], + [ + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bam", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA19401/NA19401.s.bam", checkIfExist:true) + ], + [ + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bam.bai", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA19401/NA19401.s.bam.bai", checkIfExist:true) + ] + ], + ]) + + ch_posfile = Channel.of([ + [id: "1000GP", chr: "chr22"], + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.legend.gz", checkIfExist:true) + ]) + + ch_region = Channel.of([ + [chr: "chr22", region: "chr22:16570000-16610000"], "chr22:16570000-16610000" + ]) + + input[0] = ch_samples.combine(bamlist) + input[1] = ch_posfile + input[2] = ch_region + input[3] = Channel.of([ + [id: "GRCh38"], + file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz.fai", checkIfExist:true), + ]).collect() + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.versions, + workflow.out.vcf_tbi.collect{[ + it[0], + path(it[1]).getFileName().toString(), + path(it[2]).getFileName().toString() + ] }, + workflow.out.vcf_tbi.collect{ path(it[1]).vcf.summary }, + workflow.out.vcf_tbi.collect{ path(it[1]).vcf.header.getGenotypeSamples().sort() } + ).match() } + ) + } + } +} diff --git a/subworkflows/local/bam_impute_stitch/tests/main.nf.test.snap b/subworkflows/local/bam_impute_stitch/tests/main.nf.test.snap new file mode 100644 index 00000000..36e74e98 --- /dev/null +++ b/subworkflows/local/bam_impute_stitch/tests/main.nf.test.snap @@ -0,0 +1,37 @@ +{ + "Impute with stitch two individuals with bamlist": { + "content": [ + [ + "versions.yml:md5,4330856a215c4986b0591d550a4d22bf", + "versions.yml:md5,8561197b98b9a1e1f8f1b01c1369da35", + "versions.yml:md5,c0772dccc2cc2a3c8f0b3fa5f02312bc" + ], + [ + [ + { + "id": "allid", + "chr": "chr22", + "panel": "1000GP", + "tools": "stitch" + }, + "stitch.chr22.vcf.gz", + "stitch.chr22.vcf.gz.tbi" + ] + ], + [ + "VcfFile [chromosomes=[chr22], sampleCount=2, variantCount=903, phased=false, phasedAutodetect=false]" + ], + [ + [ + "NA12878", + "NA19401" + ] + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T13:55:08.174481463" + } +} \ No newline at end of file diff --git a/subworkflows/local/bam_impute_stitch/tests/nextflow.config b/subworkflows/local/bam_impute_stitch/tests/nextflow.config new file mode 100644 index 00000000..787f8b34 --- /dev/null +++ b/subworkflows/local/bam_impute_stitch/tests/nextflow.config @@ -0,0 +1,17 @@ +process { + withName: GAWK { + ext.args2 = "'BEGIN { OFS=\"\\t\" } NR>1 { split(\$1, a, \"[:-_]\"); print a[1], \$2, \$3, \$4 }'" // Remove duplicates + ext.prefix = { "${meta.id}_${meta.chr}_posfile_stitch" } + ext.suffix = "txt" + publishDir = [enabled: false] + tag = {"${meta.id} ${meta.chr}"} + } + withName: STITCH { + ext.prefix = { "${meta.id}.stitch" } + } + + withName: BCFTOOLS_INDEX { + ext.args = '--tbi' + publishDir = [enabled: false] + } +} diff --git a/subworkflows/local/bam_impute_stitch/tests/tags.yml b/subworkflows/local/bam_impute_stitch/tests/tags.yml new file mode 100644 index 00000000..f4e8f7dc --- /dev/null +++ b/subworkflows/local/bam_impute_stitch/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/bam_impute_stitch: + - subworkflows/local/bam_impute_stitch/** diff --git a/subworkflows/local/chunk_prepare_channel/main.nf b/subworkflows/local/chunk_prepare_channel/main.nf new file mode 100644 index 00000000..446744e2 --- /dev/null +++ b/subworkflows/local/chunk_prepare_channel/main.nf @@ -0,0 +1,27 @@ +workflow CHUNK_PREPARE_CHANNEL { + + take: + ch_chunks // channel: [ [id, chr], txt ] + tool + + main: + + if(tool == "glimpse"){ + ch_chunks = ch_chunks.map { chr, txt -> [chr, file(txt)]} + .splitCsv(header: ['ID', 'Chr', 'RegionIn', 'RegionOut', 'Size1', 'Size2'], sep: "\t", skip: 0) + .map { meta, it -> [meta, it["RegionIn"], it["RegionOut"]]} + } else if(tool == "quilt") { + ch_chunks = ch_chunks.map { chr, txt -> [chr, file(txt)]} + .splitCsv(header: ['ID', 'Chr', 'RegionIn', 'RegionOut', 'Size1', 'Size2'], sep: "\t", skip: 0) + .map { meta, it -> + def startEnd = it["RegionIn"].split(':')[1].split('-') + [meta, meta.chr, startEnd[0], startEnd[1]] + } + } else { + error "Only 'glimpse' and 'quilt' output format are supported. Got ${tool}" + } + + emit: + chunks = ch_chunks // channel: [ [meta], regionstart, regionend ] + +} diff --git a/subworkflows/local/chunk_prepare_channel/tests/main.nf.test b/subworkflows/local/chunk_prepare_channel/tests/main.nf.test new file mode 100644 index 00000000..6f09d9ce --- /dev/null +++ b/subworkflows/local/chunk_prepare_channel/tests/main.nf.test @@ -0,0 +1,87 @@ +nextflow_workflow { + + name "Test Subworkflow CHUNK_PREPARE_CHANNEL" + script "../main.nf" + + workflow "CHUNK_PREPARE_CHANNEL" + + tag "subworkflows" + tag "subworkflows_local" + tag "subworkflows/chunk_prepare_channel" + tag "chunk_prepare_channel" + + test("Prepare chunks channel Glimpse") { + when { + params { + max_cpus = 2 + max_memory = '2.GB' + } + workflow { + """ + input[0] = Channel.of([ + [id: "1000GP", chr: "chr21"], + file(params.pipelines_testdata_base_path + "hum_data/panel/chr21/1000GP.chr21_chunks.txt", checkIfExist:true), + ]) + input[1] = "glimpse" + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() }, + ) + } + } + + test("Prepare chunks channel Quilt") { + when { + params { + max_cpus = 2 + max_memory = '2.GB' + } + workflow { + """ + input[0] = Channel.of([ + [id: "1000GP", chr: "chr21"], + file(params.pipelines_testdata_base_path + "hum_data/panel/chr21/1000GP.chr21_chunks.txt", checkIfExist:true), + ]) + input[1] = "quilt" + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() }, + ) + } + } + + test("Prepare chunks channel error tool") { + when { + params { + max_cpus = 2 + max_memory = '2.GB' + } + workflow { + """ + input[0] = Channel.of([ + [id: "1000GP", chr: "chr21"], + file(params.pipelines_testdata_base_path + "hum_data/panel/chr21/1000GP.chr21_chunks.txt", checkIfExist:true), + ]) + input[1] = "glimpse2" + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert snapshot(workflow.stdout).match() } + ) + } + } +} diff --git a/subworkflows/local/chunk_prepare_channel/tests/main.nf.test.snap b/subworkflows/local/chunk_prepare_channel/tests/main.nf.test.snap new file mode 100644 index 00000000..974d2ce6 --- /dev/null +++ b/subworkflows/local/chunk_prepare_channel/tests/main.nf.test.snap @@ -0,0 +1,112 @@ +{ + "Prepare chunks channel error tool": { + "content": [ + [ + "Only 'glimpse' and 'quilt' output format are supported. Got glimpse2" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-06T15:30:55.291994433" + }, + "Prepare chunks channel Quilt": { + "content": [ + { + "0": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21", + "16570070", + "16595525" + ], + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21", + "16585483", + "16609998" + ] + ], + "chunks": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21", + "16570070", + "16595525" + ], + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21", + "16585483", + "16609998" + ] + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-07-18T09:57:11.37570123" + }, + "Prepare chunks channel Glimpse": { + "content": [ + { + "0": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:16570070-16595525", + "chr21:16570070-16590513" + ], + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:16585483-16609998", + "chr21:16590521-16609998" + ] + ], + "chunks": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:16570070-16595525", + "chr21:16570070-16590513" + ], + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:16585483-16609998", + "chr21:16590521-16609998" + ] + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-07-18T09:57:06.040215496" + } +} \ No newline at end of file diff --git a/subworkflows/local/chunk_prepare_channel/tests/tags.yml b/subworkflows/local/chunk_prepare_channel/tests/tags.yml new file mode 100644 index 00000000..6c94f5a6 --- /dev/null +++ b/subworkflows/local/chunk_prepare_channel/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/chunk_prepare_channel: + - subworkflows/local/chunk_prepare_channel/** diff --git a/subworkflows/local/get_region/main.nf b/subworkflows/local/get_region/main.nf new file mode 100644 index 00000000..08aa15ab --- /dev/null +++ b/subworkflows/local/get_region/main.nf @@ -0,0 +1,31 @@ +workflow GET_REGION { + take: + input_region // Region string to use ["all", "chr1", "chr1:0-1000"] + ch_fasta // [[genome], fasta, fai] + + main: + ch_versions = Channel.empty() + + // Gather regions to use and create the meta map + if (input_region ==~ '^(chr)?[0-9XYM]+$' || input_region == "all") { + ch_regions = ch_fasta.map{it -> it[2]} + .splitCsv(header: ["chr", "size", "offset", "lidebase", "linewidth", "qualoffset"], sep: "\t") + .map{it -> [chr:it.chr, region:"0-"+it.size]} + if (input_region != "all") { + ch_regions = ch_regions.filter{it.chr == input_region} + } + ch_regions = ch_regions + .map{ [[chr: it.chr, region: it.chr + ":" + it.region], it.chr + ":" + it.region]} + } else { + if (input_region ==~ '^chr[0-9XYM]+:[0-9]+-[0-9]+$') { + ch_regions = Channel.from([input_region]) + .map{ [[chr: it.split(":")[0], "region": it], it]} + } else { + error "Invalid input_region: ${input_region}" + } + } + + emit: + regions = ch_regions // channel: [ [chr, region], region ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/get_region/tests/main.nf.test b/subworkflows/local/get_region/tests/main.nf.test new file mode 100644 index 00000000..640d1801 --- /dev/null +++ b/subworkflows/local/get_region/tests/main.nf.test @@ -0,0 +1,96 @@ +nextflow_workflow { + + name "Test Workflow GET_REGION" + script "../main.nf" + workflow "GET_REGION" + tag "subworkflows" + tag "get_region" + tag "subworkflows/get_region" + + tag "samtools" + tag "samtools/faidx" + + test("Should run with 'all'") { + + when { + workflow { + """ + input[0] = "all" + input[1] = Channel.of([ + [genome:"GRCh37"], + file("https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/reference_genome/GRCh38.s.fa.gz", checkIfExists: true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/reference_genome/GRCh38.s.fa.gz.fai", checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out.regions).match() } + ) + } + } + + test("Should run with specified chr") { + + when { + workflow { + """ + input[0] = "chr22" + input[1] = Channel.of([ + [genome:"GRCh37"], + file("https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/reference_genome/GRCh38.s.fa.gz", checkIfExists: true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/reference_genome/GRCh38.s.fa.gz.fai", checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out.regions).match() } + ) + } + } + + test("Should run with specified region without fasta") { + + when { + workflow { + """ + input[0] = "chr22:0-4000" + input[1] = Channel.of([[],[],[]]) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out.regions).match() } + ) + } + } + + test("Should run with an error") { + + when { + workflow { + """ + input[0] = "chr22:0-4000:4648" + input[1] = Channel.of([[],[],[]]) + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert snapshot(workflow.stdout).match() } + ) + } + } +} diff --git a/subworkflows/local/get_region/tests/main.nf.test.snap b/subworkflows/local/get_region/tests/main.nf.test.snap new file mode 100644 index 00000000..9f9e1934 --- /dev/null +++ b/subworkflows/local/get_region/tests/main.nf.test.snap @@ -0,0 +1,75 @@ +{ + "Should run with specified region without fasta": { + "content": [ + [ + [ + { + "chr": "chr22", + "region": "chr22:0-4000" + }, + "chr22:0-4000" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-17T15:20:16.458964814" + }, + "Should run with an error": { + "content": [ + [ + "Invalid input_region: chr22:0-4000:4648" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-06T15:31:15.872517093" + }, + "Should run with specified chr": { + "content": [ + [ + [ + { + "chr": "chr22", + "region": "chr22:0-50818468" + }, + "chr22:0-50818468" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-07-18T09:59:50.83303021" + }, + "Should run with 'all'": { + "content": [ + [ + [ + { + "chr": "chr21", + "region": "chr21:0-46709983" + }, + "chr21:0-46709983" + ], + [ + { + "chr": "chr22", + "region": "chr22:0-50818468" + }, + "chr22:0-50818468" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-07-18T09:59:45.078764008" + } +} \ No newline at end of file diff --git a/subworkflows/local/get_region/tests/tags.yml b/subworkflows/local/get_region/tests/tags.yml new file mode 100644 index 00000000..d1ff83bf --- /dev/null +++ b/subworkflows/local/get_region/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/get_region: + - subworkflows/local/get_region/** diff --git a/subworkflows/local/utils_nfcore_chrcheck_pipeline/main.nf b/subworkflows/local/utils_nfcore_chrcheck_pipeline/main.nf new file mode 100644 index 00000000..0c1c7c67 --- /dev/null +++ b/subworkflows/local/utils_nfcore_chrcheck_pipeline/main.nf @@ -0,0 +1,51 @@ +// +// Check if the contig names in the input files match the reference contig names. +// +def checkChr(ch_chr, ch_input){ + def chr_checked = ch_chr + .combine(ch_input, by:0) + .map{meta, chr, file, index, lst -> + [ + meta, file, index, + chr.readLines()*.split(' ').collect{it[0]}, + lst + ] + } + .branch{ meta, file, index, chr, lst -> + def lst_diff = diffChr(chr, lst, file) + def diff = lst_diff[0] + def prefix = lst_diff[1] + no_rename: diff.size() == 0 + return [meta, file, index] + to_rename: true + return [meta, file, index, diff, prefix] + } + return chr_checked +} + +// +// Check if the contig names can be solved by adding/removing the `chr` prefix. +// +def diffChr(chr_target, chr_ref, file) { + def diff = chr_ref - chr_target + def prefix = (chr_ref - chr_target) =~ "chr" ? "chr" : "nochr" + if (diff.size() != 0) { + // Ensure that by adding/removing the prefix we can solve the problem + def new_chr = [] + def to_rename = [] + if (prefix == "chr") { + chr_target.each{ new_chr += "chr${it}" } + diff.each{ to_rename += it.replace('chr', '') } + } else { + chr_target.each{ new_chr += it.replace('chr', '') } + diff.each{ to_rename += "chr${it}" } + } + def new_diff = diff - new_chr + if (new_diff.size() != 0) { + def chr_names = new_diff.size() > params.max_chr_names ? new_diff[0..params.max_chr_names - 1] + ['...'] : new_diff + error "Contig names: ${chr_names} absent from file: ${file} and cannot be solved by adding or removing the `chr` prefix." + } + diff = to_rename + } + return [diff, prefix] +} diff --git a/subworkflows/local/utils_nfcore_chrcheck_pipeline/tests/function.nf.test b/subworkflows/local/utils_nfcore_chrcheck_pipeline/tests/function.nf.test new file mode 100644 index 00000000..e2b8d093 --- /dev/null +++ b/subworkflows/local/utils_nfcore_chrcheck_pipeline/tests/function.nf.test @@ -0,0 +1,88 @@ +nextflow_function { + + name "Test function chrcheck" + script "../main.nf" + function "diffChr" + tag "function" + tag "diffChr" + + test("All ref in target") { + when { + function { + """ + input[0] = ["chr1", "chr2", "chr3"] + input[1] = ["chr2", "chr1"] + input[2] = "123.vcf" + """ + } + } + then { + assert function.success + assert function.result == [[], 'nochr'] + } + } + + test("ref no chr & target chr") { + when { + function { + """ + input[0] = ["chr1", "chr2", "chr3"] + input[1] = ["2", "1"] + input[2] = "chr123.vcf" + """ + } + } + then { + assert function.success + assert snapshot(function.result).match() + } + } + + test("ref chr & target no chr") { + when { + function { + """ + input[0] = ["1", "2", "3"] + input[1] = ["chr2", "chr1"] + input[2] = "123.vcf" + """ + } + } + then { + assert function.success + assert snapshot(function.result).match() + } + } + + test("ref chr & target chr but missing contig") { + when { + function { + """ + input[0] = ["1", "2", "3"] + input[1] = ["chr2", "chr1", "chr4"] + input[2] = "123.vcf" + """ + } + } + then { + assert function.failed + assert snapshot(function.stdout).match() + } + } + + test("Mix file with and withouth chr") { + when { + function { + """ + input[0] = ["chr1", "chr2", "chr3", "GL000207.1", "GL000226.1", "GL000229.1", "GL000231.1" ] + input[1] = ["2", "1", "GL000207.1", "GL000226.1", "GL000229.1", "GL000231.1" ] + input[2] = "123.vcf" + """ + } + } + then { + assert function.success + assert snapshot(function.result).match() + } + } +} diff --git a/subworkflows/local/utils_nfcore_chrcheck_pipeline/tests/function.nf.test.snap b/subworkflows/local/utils_nfcore_chrcheck_pipeline/tests/function.nf.test.snap new file mode 100644 index 00000000..ff0bf78b --- /dev/null +++ b/subworkflows/local/utils_nfcore_chrcheck_pipeline/tests/function.nf.test.snap @@ -0,0 +1,62 @@ +{ + "ref chr & target chr but missing contig": { + "content": [ + [ + "Contig names: [chr4] absent from file: 123.vcf and cannot be solved by adding or removing the `chr` prefix." + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-06T15:31:34.992652974" + }, + "Mix file with and withouth chr": { + "content": [ + [ + [ + "chr2", + "chr1" + ], + "nochr" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-06T15:31:39.923011027" + }, + "ref no chr & target chr": { + "content": [ + [ + [ + "chr2", + "chr1" + ], + "nochr" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-06T15:31:25.618460553" + }, + "ref chr & target no chr": { + "content": [ + [ + [ + "2", + "1" + ], + "chr" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-06T15:31:30.499438797" + } +} \ No newline at end of file diff --git a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf new file mode 100644 index 00000000..ed268603 --- /dev/null +++ b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf @@ -0,0 +1,719 @@ +// +// Subworkflow with functionality specific to the nf-core/phaseimpute pipeline +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { UTILS_NFSCHEMA_PLUGIN } from '../../nf-core/utils_nfschema_plugin' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { samplesheetToList } from 'plugin/nf-schema' +include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { imNotification } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' +include { GET_REGION } from '../get_region' +include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/faidx' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW TO INITIALISE PIPELINE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow PIPELINE_INITIALISATION { + + take: + version // boolean: Display version and exit + validate_params // boolean: Boolean whether to validate parameters against the schema at runtime + _monochrome_logs // boolean: Do not use coloured log outputs + nextflow_cli_args // array: List of positional nextflow CLI args + outdir // string: The output directory where the results will be saved + + main: + + ch_versions = Channel.empty() + + // + // Print version and exit if required and dump pipeline parameters to JSON file + // + UTILS_NEXTFLOW_PIPELINE ( + version, + true, + outdir, + workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 + ) + + + // + // Validate parameters and generate parameter summary to stdout + // + UTILS_NFSCHEMA_PLUGIN ( + workflow, + validate_params, + null + ) + + + // + // Check config provided to the pipeline + // + UTILS_NFCORE_PIPELINE ( + nextflow_cli_args + ) + + // + // Custom validation for pipeline parameters + // + validateInputParameters() + + // + // Create fasta channel + // + + genome = params.genome ? params.genome : file(params.fasta, checkIfExists:true).getBaseName() + if (params.genome) { + genome = params.genome + ch_fasta = Channel.of([[genome:genome], getGenomeAttribute('fasta')]) + fai = getGenomeAttribute('fai') + if (fai == null) { + SAMTOOLS_FAIDX(ch_fasta, Channel.of([[], []])) + ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions.first()) + fai = SAMTOOLS_FAIDX.out.fai.map{ it[1] } + } else { + fai = Channel.of(file(fai, checkIfExists:true)) + } + } else if (params.fasta) { + genome = file(params.fasta, checkIfExists:true).getBaseName() + ch_fasta = Channel.of([[genome:genome], file(params.fasta, checkIfExists:true)]) + if (params.fasta_fai) { + fai = Channel.of(file(params.fasta_fai, checkIfExists:true)) + } else { + SAMTOOLS_FAIDX(ch_fasta, Channel.of([[], []])) + ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions.first()) + fai = SAMTOOLS_FAIDX.out.fai.map{ it[1] } + } + } + ch_ref_gen = ch_fasta.combine(fai).collect() + + // + // Create channel from input file provided through params.input + // + if (params.input) { + ch_input = Channel + .fromList(samplesheetToList(params.input, "${projectDir}/assets/schema_input.json")) + .map { samplesheet -> + validateInputSamplesheet(samplesheet) + } + .map { meta, file, index -> [meta + [batch: 0], file, index] } // Set batch to 0 by default + } else { + ch_input = Channel.of([[], [], []]) + } + + // Check that the batch size and extension is compatible with the tools + validateInputBatchTools( + ch_input, + params.batch_size, + getFilesSameExt(ch_input), + params.tools ? params.tools.split(',') : [] + ) + + // + // Create channel from input file provided through params.input_truth + // + if (params.input_truth) { + if (params.input_truth.endsWith("csv")) { + ch_input_truth = Channel + .fromList(samplesheetToList(params.input_truth, "${projectDir}/assets/schema_input.json")) + .map { + meta, file, index -> + [ meta, file, index ] + } + // Check if all extension are identical + getFilesSameExt(ch_input_truth) + } else { + // #TODO Wait for `oneOf()` to be supported in the nextflow_schema.json + error "Panel file provided is of another format than CSV (not yet supported). Please separate your panel by chromosome and use the samplesheet format." + } + } else { + ch_input_truth = Channel.of([[], [], []]) + } + + // + // Create channel for panel + // + if (params.panel) { + if (params.panel.endsWith("csv")) { + println "Panel file provided as input is a samplesheet" + ch_panel = Channel.fromList(samplesheetToList( + params.panel, "${projectDir}/assets/schema_input_panel.json" + )) + } else { + // #TODO Wait for `oneOf()` to be supported in the nextflow_schema.json + error "Panel file provided is of another format than CSV (not yet supported). Please separate your panel by chromosome and use the samplesheet format." + } + } else { + // #TODO check if panel is required + ch_panel = Channel.of([[],[],[]]) + } + + // + // Create channel from region input + // + if (params.input_region == null){ + // #TODO Add support for string input + GET_REGION ("all", ch_ref_gen) + ch_versions = ch_versions.mix(GET_REGION.out.versions) + ch_regions = GET_REGION.out.regions + } else if (params.input_region.endsWith(".csv")) { + println "Region file provided as input is a csv file" + ch_regions = Channel.fromList(samplesheetToList( + params.input_region, "${projectDir}/assets/schema_input_region.json" + )) + .map{ chr, start, end -> + assert end >= start : "End position must be greater than or equal to start position" + [["chr": chr], chr + ":" + start + "-" + end] + } + .map{ metaC, region -> [metaC + ["region": region], region]} + } else { + error "Region file provided is of another format than CSV (not yet supported). Please separate your reference genome by chromosome and use the samplesheet format." + } + + // + // Create map channel + // + if (params.map) { + if (params.map.endsWith(".csv")) { + println "Map file provided as input is a samplesheet" + ch_map = Channel.fromList(samplesheetToList(params.map, "${projectDir}/assets/schema_map.json")) + } else { + error "Map file provided is of another format than CSV (not yet supported). Please separate your reference genome by chromosome and use the samplesheet format." + } + } else { + ch_map = ch_regions + .map{ metaCR, _regions -> [metaCR.subMap("chr"), []] } + } + + // + // Create depth channel + // + if (params.depth) { + ch_depth = Channel.of([[depth: params.depth], params.depth]) + } else { + ch_depth = Channel.of([[],[]]) + } + + // + // Create genotype array channel + // + if (params.genotype) { + ch_genotype = Channel.of([[gparray: params.genotype], params.genotype]) + } else { + ch_genotype = Channel.of([[],[]]) + } + + // + // Create posfile channel + // + if (params.posfile) { + ch_posfile = Channel // ["panel", "chr", "vcf", "index", "hap", "legend"] + .fromList(samplesheetToList(params.posfile, "${projectDir}/assets/schema_posfile.json")) + } else { + ch_posfile = Channel.of([[],[],[],[],[]]) + } + + if (!params.steps.split(',').contains("panelprep") & !params.steps.split(',').contains("all")) { + validatePosfileTools( + ch_posfile, + params.tools ? params.tools.split(','): [], + params.steps.split(',') + ) + } + + // + // Create chunks channel + // + if (params.chunks) { + ch_chunks = Channel + .fromList(samplesheetToList(params.chunks, "${projectDir}/assets/schema_chunks.json")) + } else { + ch_chunks = Channel.of([[],[]]) + } + + // + // Check contigs name in different meta map + // + // Collect all chromosomes names in all different inputs + chr_ref = ch_ref_gen.map { _meta, _fasta, fai_file -> [fai_file.readLines()*.split('\t').collect{it[0]}] } + chr_regions = extractChr(ch_regions) + + // Check that the chromosomes names that will be used are all present in different inputs + chr_ref_mis = checkMetaChr(chr_regions, chr_ref, "reference genome") + chr_chunks_mis = checkMetaChr(chr_regions, extractChr(ch_chunks), "chromosome chunks") + chr_map_mis = checkMetaChr(chr_regions, extractChr(ch_map), "genetic map") + chr_panel_mis = checkMetaChr(chr_regions, extractChr(ch_panel), "reference panel") + chr_posfile_mis = checkMetaChr(chr_regions, extractChr(ch_posfile), "position") + + // Compute the intersection of all chromosomes names + chr_all_mis = chr_ref_mis.concat(chr_chunks_mis, chr_map_mis, chr_panel_mis, chr_posfile_mis) + .unique() + .toList() + .subscribe{ chr -> + if (chr.size() > 0) { + def chr_names = chr.size() > params.max_chr_names ? chr[0..params.max_chr_names - 1] + ['...'] : chr + log.warn "The following contigs are absent from at least one file : ${chr_names} and therefore won't be used" } } + + ch_regions = ch_regions + .combine(chr_all_mis.toList()) + .filter { meta, _regions, chr_mis -> + !(meta.chr in chr_mis) + } + .map { meta, regions, _chr_mis -> [meta, regions] } + .ifEmpty { error "No regions left to process" } + + ch_regions + .map { it[1] } + .collect() + .subscribe { log.info "The following contigs will be processed: ${it}" } + + // Remove other contigs from panel and posfile files + ch_panel = ch_panel + .combine(ch_regions.collect{ it[0]["chr"]}.toList()) + .filter { meta, _vcf, _index, chrs -> + meta.chr in chrs + } + .map {meta, vcf, index, _chrs -> + [meta, vcf, index] + } + + ch_posfile = ch_posfile + .combine(ch_regions.collect{ it[0]["chr"]}.toList()) + .filter { meta, _vcf, _index, _hap, _legend, chrs -> + meta.chr in chrs + } + .map {meta, vcf, index, hap, legend, _chrs -> + [meta, vcf, index, hap, legend] + } + + // Check that all input files have the correct index + checkFileIndex(ch_input.mix(ch_input_truth, ch_ref_gen, ch_panel)) + + // Chunk model + chunk_model = params.chunk_model + + emit: + input = ch_input // [ [meta], file, index ] + input_truth = ch_input_truth // [ [meta], file, index ] + fasta = ch_ref_gen // [ [genome], fasta, fai ] + panel = ch_panel // [ [panel, chr], vcf, index ] + depth = ch_depth // [ [depth], depth ] + regions = ch_regions // [ [chr, region], region ] + gmap = ch_map // [ [map], map ] + posfile = ch_posfile // [ [panel, chr], vcf, index, hap, legend ] + chunks = ch_chunks // [ [chr], txt ] + chunk_model = chunk_model + versions = ch_versions +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW FOR PIPELINE COMPLETION +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow PIPELINE_COMPLETION { + + take: + email // string: email address + email_on_fail // string: email address sent on pipeline failure + plaintext_email // boolean: Send plain-text email instead of HTML + + outdir // path: Path to output directory where results will be published + monochrome_logs // boolean: Disable ANSI colour codes in log output + hook_url // string: hook URL for notifications + multiqc_report // string: Path to MultiQC report + + main: + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + + // + // Completion email and summary + // + workflow.onComplete { + if (email || email_on_fail) { + completionEmail( + summary_params, + email, + email_on_fail, + plaintext_email, + outdir, + monochrome_logs, + multiqc_report.toList() + ) + } + + completionSummary(monochrome_logs) + if (hook_url) { + imNotification(summary_params, hook_url) + } + } + + workflow.onError { + log.error "Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting" + } +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ +// +// Check and validate pipeline parameters +// +def validateInputParameters() { + genomeExistsError() + // Check that only genome or fasta is provided + assert (params.genome == null || params.fasta == null) : "Either --genome or --fasta must be provided" + assert !(params.genome == null && params.fasta == null) : "Only one of --genome or --fasta must be provided" + + // Check that a steps is provided + assert params.steps : "A step must be provided" + + // Check that at least one tool is provided + if (params.steps.split(',').contains("impute")) { + assert params.tools : "No tools provided" + } + + // Check that input is provided for all steps, except panelprep + if (params.steps.split(',').contains("all") || params.steps.split(',').contains("impute") || params.steps.split(',').contains("simulate") || params.steps.split(',').contains("validate")) { + assert params.input : "No input provided" + } + + // Check that posfile and chunks are provided when running impute only. Steps with panelprep generate those files. + if (params.steps.split(',').contains("impute") && !params.steps.split(',').find { it in ["all", "panelprep"] }) { + // Required by all tools except glimpse2 + if (!params.tools.split(',').find { it in ["glimpse2"] }) { + assert params.posfile : "No --posfile provided for --steps impute" + } + // Required by all tools except STITCH + if (params.tools != "stitch") { + assert params.chunks : "No --chunks provided for --steps impute" + } + // Required by GLIMPSE1 and GLIMPSE2 only + if (params.tools.split(',').contains("glimpse")) { + assert params.panel : "No --panel provided for imputation with GLIMPSE" + } + + // Check that input_truth is provided when running validate + if (params.steps.split(',').find { it in ["all", "validate"] } ) { + assert params.input_truth : "No --input_truth was provided for --steps validate" + } + } + + // Emit a warning if both panel and (chunks || posfile) are used as input + if (params.panel && params.chunks && params.steps.split(',').find { it in ["all", "panelprep"]} ) { + log.warn("Both `--chunks` and `--panel` have been provided. Provided `--chunks` will override `--panel` generated chunks in `--steps impute` mode.") + } + if (params.panel && params.posfile && params.steps.split(',').find { it in ["all", "panelprep"]} ) { + log.warn("Both `--posfile` and `--panel` have been provided. Provided `--posfile` will override `--panel` generated posfile in `--steps impute` mode.") + } + + // Emit an info message when using external panel and impute only + if (params.panel && params.steps.split(',').find { it in ["impute"] } && !params.steps.split(',').find { it in ["all", "panelprep"] } ) { + log.info("Provided `--panel` will be used in `--steps impute`. Make sure it has been previously prepared with `--steps panelprep`") + } + + // Emit an error if normalizing step is ignored but samples need to be removed from reference panel + if (params.steps.split(',').find { it in ["all", "panelprep"] } && params.remove_samples) { + if (!params.normalize) { + error("To use `--remove_samples` you need to include `--normalize`.") + } + } + + // Check that the chunk model is provided + assert params.chunk_model : "No chunk model provided" + + return null +} + +// +// Check compatibility between input files size, extension and tools +// +def validateInputBatchTools(ch_input, batch_size, extension, tools) { + ch_input + .count() + .map{ nb_input -> + if (extension ==~ "(vcf|bcf)(.gz)?") { + if (tools.contains("stitch") || tools.contains("quilt")) { + error "Stitch or Quilt software cannot run with VCF or BCF files. Please provide alignment files (i.e. BAM or CRAM)." + } + if (nb_input > 1) { + error "When using a Variant Calling Format file as input, only one file can be provided. If you have multiple single-sample VCF files, please merge them into a single multisample VCF file." + } + } + + if (nb_input > batch_size) { + if (tools.contains("glimpse2") || tools.contains("quilt")) { + log.warn("Glimpse2 or Quilt software is selected and the number of input files (${nb_input}) is less than the batch size (${batch_size}). The input files will be processed in ${Math.ceil(nb_input / batch_size) as int} batches.") + } + if (tools.contains("stitch") || tools.contains("glimpse1")) { + error "Stitch or Glimpse1 software is selected and the number of input files (${nb_input}) is less than the batch size (${batch_size}). Splitting the input files in batches would induce batch effect." + } + } + } + return null +} + +// +// Check if posfile is compatible with tools and steps selected +// +def validatePosfileTools(ch_posfile, tools, steps){ + ch_posfile + .map{ _meta, vcf, index, hap, legend -> + if (tools.contains("glimpse1")) { + assert legend : "Glimpse1 tool needs a legend file provided in the posfile. This file can be created through the panelprep step." + } + if (tools.contains("stitch")) { + assert legend : "Stitch tool needs a legend file provided in the posfile. This file can be created through the panelprep step." + } + if (tools.contains("quilt")) { + assert legend : "Quilt tool needs a legend file provided in the posfile. This file can be created through the panelprep step." + assert hap : "Quilt tool needs a hap file provided in the posfile. This file can be created through the panelprep step." + } + if (steps.contains("validate")) { + assert vcf : "Validation step needs a vcf file provided in the posfile for the allele frequency. This file can be created through the panelprep step." + assert index : "Validation step needs an index file provided in the posfile for the allele frequency. This file can be created through the panelprep step." + } + } + return null +} + +// +// Extract contig names from channel meta map +// +def extractChr(ch_input) { + ch_input.map { [it[0].chr] } + .collect() + .toList() +} + +// +// Check if all contigs in a are present in b +// Give back the intersection of a and b +// +def checkMetaChr(chr_a, chr_b, name){ + def intersect = chr_a + .combine(chr_b) + .map{ + a, b -> + if (b != [[]] && !(a - b).isEmpty()) { + def chr_names = (a - b).size() > params.max_chr_names ? (a - b)[0..params.max_chr_names - 1] + ['...'] : (a - b) + def verb = (a - b).size() == 1 ? "is" : "are" + log.warn "Chr : ${chr_names} ${verb} missing from ${name}" + return (a-b) + } + return [] + } + .flatten() + return intersect +} + +// +// Get file extension +// +def getFileExtension(file) { + def file_name = "" + + if (file instanceof Path || file instanceof nextflow.file.http.XPath) { + file_name = file.name + } else if (file instanceof CharSequence) { + file_name = file.toString() + } else if (file instanceof List) { + return file.collect { getFileExtension(it) } + } else { + error "Type not supported: ${file.getClass()}" + } + + // Remove .gz if present and get the last part after splitting by "." + return file_name.replace(".gz", "").split("\\.").last() +} + +// +// Check if all input files have the same extension +// +def getFilesSameExt(ch_input) { + return ch_input + .map { getFileExtension(it[1]) } // Extract files extensions + .toList() // Collect extensions into a list + .map { extensions -> + if (extensions.unique().size() > 1) { + error "All input files must have the same extension: ${extensions.unique()}" + } + return extensions[0] + } +} + +// +// Check correspondance file / index +// +def checkFileIndex(ch_input) { + ch_input.map { + meta, file, index -> + def file_ext = getFileExtension(file) + def index_ext = getFileExtension(index) + if (file_ext in ["vcf", "bcf"] && !(index_ext in ["tbi", "csi"]) ) { + log.info("File: ${file} ${file_ext}, Index: ${index} ${index_ext}") + error "${meta}: Index file for [.vcf, .vcf.gz, bcf] must have the extension [.tbi, .csi]" + } + if (file_ext == "bam" && index_ext != "bai") { + error "${meta}: Index file for .bam must have the extension .bai" + } + if (file_ext == "cram" && index_ext != "crai") { + error "${meta}: Index file for .cram must have the extension .crai" + } + if (file_ext in ["fa", "fasta"] && index_ext != "fai") { + error "${meta}: Index file for [fa, fasta] must have the extension .fai" + } + } + return null +} + +// +// Export a channel to a CSV file with correct paths +// +def exportCsv(ch_files, metas, header, name, outdir) { + ch_files.collectFile(keepHeader: true, skip: 1, sort: true, storeDir: "${params.outdir}/${outdir}") { it -> + def meta = "" + def file = "" + metas.each { i -> + meta += "${it[0][i]}," + } + it[1].each { i -> + file += "${params.outdir}/${i.value}/${it[i.key].fileName}," + } + file = file.substring(0, file.length() - 1) // remove last comma + ["${name}", "${header}\n${meta}${file}\n"] + } + return null +} + +// +// Validate channels from input samplesheet +// +def validateInputSamplesheet(input) { + def (meta, bam, bai) = input + // Check that individual IDs are unique + // No check for the moment + + return [meta, bam, bai] +} + +// +// Get attribute from genome config file e.g. fasta +// +def getGenomeAttribute(attribute) { + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + if (params.genomes[ params.genome ].containsKey(attribute)) { + return params.genomes[ params.genome ][ attribute ] + } + } + return null +} + +// +// Exit pipeline if incorrect --genome key provided +// +def genomeExistsError() { + if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + + " Currently, the available genome keys are:\n" + + " ${params.genomes.keySet().join(", ")}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + error(error_string) + } +} + +// +// Generate methods description for MultiQC +// +def toolCitationText() { + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def citation_text = [ + "Tools used in the workflow included:", + "BCFtools (Danecek et al. 2021),", + params.tools ? params.tools.split(',').contains("glimpse") ? "GLIMPSE (Rubinacci et al. 2020)," : "" : "", + params.tools ? params.tools.split(',').contains("glimpse2") ? "GLIMPSE2 (Rubinacci et al. 2023)," : "": "", + params.tools ? params.tools.split(',').contains("quilt") ? "QUILT (Davies et al. 2021)," : "": "", + "SAMtools (Li et al. 2009),", + params.tools ? params.phase ? "SHAPEIT5 (Hofmeister et al. 2023)," : "": "", + params.tools ? params.phase ? "BEDtools (Quinlan and Hall 2010)," : "": "", + params.tools ? params.tools.split(',').contains("stitch") ? "STITCH (Davies et al. 2016)," : "": "", + "Tabix (Li et al. 2011),", + params.tools ? params.compute_freq ? "VCFlib (Garrison et al. 2022)," : "": "", + "." + ].join(' ').trim() + + return citation_text +} + +def toolBibliographyText() { + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + params.phase ? "
  • Quinlan AR, Hall IM (2010). BEDTools: a flexible suite of utilities for comparing genomic features. Bioinformatics. 2010 Mar 15;26(6):841-2. doi:10.1093/bioinformatics/btq033.
  • ": "", + "
  • Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. (2009). The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. doi:10.1093/bioinformatics/btp352.
  • ", + "
  • Li H. (2011). Tabix: fast retrieval of sequence features from generic TAB-delimited files. Bioinformatics. 2011 Mar 1;27(5):718-9. doi:10.1093/bioinformatics/btq671.
  • ", + params.tools ? params.tools.split(',').contains("quilt") ? "
  • Davies RW, Kucka M, Su D, Shi S, Flanagan M, Cunniff CM, Chan YF, & Myers S. (2021). Rapid genotype imputation from sequence with reference panels. Nature Genetics. doi:10.1038/s41588-021-00877-0.
  • " : "": "", + params.tools ? params.tools.split(',').contains("glimpse") ? "
  • Rubinacci S, Ribeiro DM, Hofmeister RJ, & Delaneau O. (2021). Efficient phasing and imputation of low-coverage sequencing data using large reference panels. Nature Genetics. doi:10.1038/s41588-020-00756-0.
  • " : "": "", + params.tools ? params.tools.split(',').contains("glimpse2") ? "
  • Rubinacci S, Hofmeister RJ, Sousa da Mota B, & Delaneau O. (2023). Imputation of low-coverage sequencing data from 150,119 UK Biobank genomes. Nature Genetics. doi:10.1038/s41588-023-01438-3.
  • " : "": "", + params.phase ? "
  • Hofmeister RJ, Ribeiro DM, Rubinacci S, Delaneau O. (2023). Accurate rare variant phasing of whole-genome and whole-exome sequencing data in the UK Biobank. Nat Genet. 2023 Jul;55(7):1243-1249. doi:10.1038/s41588-023-01415-w.
  • " : "", + params.tools ? params.tools.split(',').contains("stitch") ? "
  • Davies RW, Flint J, Myers S, & Mott R. (2016). Rapid genotype imputation from sequence without reference panels. Nature Genetics.
  • " : "": "", + params.compute_freq ? "
  • Garrison E, Kronenberg ZN, Dawson ET, Pedersen BS, Prins P. (2022). A spectrum of free software tools for processing the VCF variant call format: vcflib, bio-vcf, cyvcf2, hts-nim and slivar. PLoS Comput Biol 18(5).
  • " : "", + ].join(' ').trim() + + return reference_text +} + +def methodsDescriptionText(mqc_methods_yaml) { + // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = workflow.toMap() + meta["manifest_map"] = workflow.manifest.toMap() + + // Pipeline DOI + if (meta.manifest_map.doi) { + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + def temp_doi_ref = "" + def manifest_doi = meta.manifest_map.doi.tokenize(",") + manifest_doi.each { doi_ref -> + temp_doi_ref += "(doi: ${doi_ref.replace("https://doi.org/", "").replace(" ", "")}), " + } + meta["doi_text"] = temp_doi_ref.substring(0, temp_doi_ref.length() - 2) + } else meta["doi_text"] = "" + meta["nodoi_text"] = meta.manifest_map.doi ? "" : "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + meta["tool_bibliography"] = toolBibliographyText() + + + def methods_text = mqc_methods_yaml.text + + def engine = new groovy.text.SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html.toString() +} + diff --git a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/function.nf.test b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/function.nf.test new file mode 100644 index 00000000..bf8d7b2b --- /dev/null +++ b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/function.nf.test @@ -0,0 +1,407 @@ +nextflow_function { + + name "Test function phaseimpute" + script "../main.nf" + tag "function" + + test ("Test getFileExtension") { + function "getFileExtension" + tag "getFileExtension" + when { + function { + """ + input[0] = file(params.pipelines_testdata_base_path + "hum_data/panel/chr21/1000GP.chr21.s.norel.vcf.gz", checkIfExists: true) + """ + } + } + then { + assertAll( + { assert function.success }, + { assert function.result == "vcf" } + ) + } + } + + test ("Test getFileExtension non empty list") { + function "getFileExtension" + tag "getFileExtension" + when { + function { + """ + input[0] = [ + file(params.pipelines_testdata_base_path + "hum_data/panel/chr21/1000GP.chr21.s.norel.vcf.gz", checkIfExists: true), + "test.myfile.txt.gz" + ] + """ + } + } + then { + assertAll( + { assert function.success }, + { assert function.result == ['vcf', 'txt'] }, + { assert snapshot(function.result).match() } + ) + } + } + + test ("Test getFileExtension empty list") { + function "getFileExtension" + tag "getFileExtension" + when { + function { + """ + input[0] = [] + """ + } + } + then { + assertAll( + { assert function.success }, + { assert function.result == [] } + ) + } + } + + test("Test checkFileIndex no error with empty channel") { + function "checkFileIndex" + tag "checkFileIndex" + when { + function { + """ + input[0] = Channel.fromList([ + [[], [], []], + [[id: "input"], file("input.vcf"), file("input.csi")], + [[], [], []], + [[id: "input3"], file("input3.bcf"), file("input3.csi")], + [[], [], []], + [[id: "input5"], file("input5.vcf.gz"), file("input5.csi")], + [[], [], []], + [[id: "input7"], file("input7.bam"), file("input5.bai")], + [[], [], []], + [[id: "input9"], file("input9.fa"), file("input9.fai")], + [[], [], []] + ]) + """ + } + } + then { + assert function.success + assert snapshot(function.result).match() + } + } + + test("Test checkFileIndex no error") { + function "checkFileIndex" + tag "checkFileIndex" + when { + function { + """ + input[0] = Channel.fromList([ + [[id: "input"], file("input.vcf"), file("input.csi")], + [[id: "input2"], file("input2.vcf"), file("input2.tbi")], + [[id: "input3"], file("input3.bcf"), file("input3.csi")], + [[id: "input4"], file("input4.bcf"), file("input4.tbi")], + [[id: "input5"], file("input5.vcf.gz"), file("input5.csi")], + [[id: "input6"], file("input6.vcf.gz"), file("input6.tbi")], + [[id: "input7"], file("input7.bam"), file("input5.bai")], + [[id: "input8"], file("input8.cram"), file("input6.crai")], + [[id: "input9"], file("input9.fa"), file("input9.fai")], + [[id: "input10"], file("input10.fa"), file("input10.fai")] + ]) + """ + } + } + then { + assert function.success + } + } + + test("Test checkFileIndex bam bai") { + function "checkFileIndex" + tag "checkFileIndex" + when { + function { + """ + input[0] = Channel.fromList([ + [[id: "input7"], file("input7.bam"), file("input5.csi")], + [[id: "input6"], file("input6.vcf.gz"), file("input6.tbi")], + [[id: "input8"], file("input8.bam"), file("input8.bai")] + ]) + """ + } + } + then { + assert function.failed + assert function.stdout.contains("[id:input7]: Index file for .bam must have the extension .bai") + } + } + + test("Test checkFileIndex cram crai") { + function "checkFileIndex" + tag "checkFileIndex" + when { + function { + """ + input[0] = Channel.fromList([ + [[id: "input7"], file("input7.cram"), file("input7.tbi")], + [[id: "input6"], file("input6.vcf.gz"), file("input6.tbi")], + [[id: "input8"], file("input8.bam"), file("input8.bai")] + ]) + """ + } + } + then { + assert function.failed + assert function.stdout.contains("[id:input7]: Index file for .cram must have the extension .crai") + } + } + + test("Test checkFileIndex bcf csi") { + function "checkFileIndex" + tag "checkFileIndex" + when { + function { + """ + input[0] = Channel.fromList([ + [[id: "input7"], file("input7.bcf"), file("input7.txt")], + [[id: "input6"], file("input6.vcf.gz"), file("input6.tbi")], + [[id: "input8"], file("input8.bam"), file("input8.bai")] + ]) + """ + } + } + then { + assert function.failed + assert function.stdout.contains("[id:input7]: Index file for [.vcf, .vcf.gz, bcf] must have the extension [.tbi, .csi]") + } + } + + test("Test checkFileIndex vcf csi") { + function "checkFileIndex" + tag "checkFileIndex" + when { + function { + """ + input[0] = Channel.fromList([ + [[id: "input7"], file("input7.vcf"), file("input7.bai")], + [[id: "input6"], file("input6.vcf.gz"), file("input6.tbi")], + [[id: "input8"], file("input8.bam"), file("input8.bai")] + ]) + """ + } + } + then { + assert function.failed + assert function.stdout.contains("[id:input7]: Index file for [.vcf, .vcf.gz, bcf] must have the extension [.tbi, .csi]") + } + } + + test("Test checkFileIndex vcf.gz csi") { + function "checkFileIndex" + tag "checkFileIndex" + when { + function { + """ + input[0] = Channel.fromList([ + [[id: "input7"], file("input7.vcf.gz"), file("input7.bai")], + [[id: "input6"], file("input6.vcf.gz"), file("input6.tbi")], + [[id: "input8"], file("input8.bam"), file("input8.bai")] + ]) + """ + } + } + then { + assert function.failed + assert function.stdout.contains("[id:input7]: Index file for [.vcf, .vcf.gz, bcf] must have the extension [.tbi, .csi]") + } + } + + test("Test checkFileIndex fa fai") { + function "checkFileIndex" + tag "checkFileIndex" + when { + function { + """ + input[0] = Channel.fromList([ + [[id: "input7"], file("input7.fa"), file("input7.tbi")], + [[id: "input6"], file("input6.vcf.gz"), file("input6.tbi")], + [[id: "input8"], file("input8.bam"), file("input8.bai")] + ]) + """ + } + } + then { + assert function.failed + assert function.stdout.contains("[id:input7]: Index file for [fa, fasta] must have the extension .fai") + } + } + + test("Test checkFileIndex fasta fai") { + function "checkFileIndex" + tag "checkFileIndex" + when { + function { + """ + input[0] = Channel.fromList([ + [[id: "input7"], file("input7.fasta"), file("input6.fia")], + [[id: "input6"], file("input6.vcf.gz"), file("input6.tbi")], + [[id: "input8"], file("input8.bam"), file("input8.bai")] + ]) + """ + } + } + then { + assert function.failed + assert function.stdout.contains("[id:input7]: Index file for [fa, fasta] must have the extension .fai") + } + } + + test("Test Function exportCsv") { + function "exportCsv" + tag "exportCsv" + when { + params { + outdir = "results" + } + function { + """ + // define inputs of the function here. Example: + input[0] = Channel.of( [ [id:'test'], [2:"vcf",3:"index"], file("test1.vcf"), file("test2.csi") ] ) + input[1] = ["id"] + input[2] = "id,vcf,index" + input[3] = "impute.csv" + input[4] = "imputation/csv" + """ + } + } + + then { + with(function) { + assert success + assert snapshot(result).match() + } + } + } + + test ("Test checkMetaChr") { + function "checkMetaChr" + tag "checkMetaChr" + when { + function { + """ + input[0] = Channel.of([["chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10", "chrX" ]]) + input[1] = Channel.of([["chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10", "chrX" ]]) + input[2] = "test" + """ + } + } + then { + assert function.success + } + } + + test ("Test checkMetaChr warning") { + function "checkMetaChr" + tag "checkMetaChr" + when { + function { + """ + input[0] = Channel.of([["chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10", "chrX", "chrY" ]]) + input[1] = Channel.of([["chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10", "chrX" ]]) + input[2] = "test" + """ + } + } + then { + assertAll ( + { assert function.success }, + { assert snapshot(function.stdout).match() }, + { assert function.stdout.contains("WARN: Chr : [chrY] is missing from test") } + ) + } + } + + test ("Test validateInputBatchTools vcf only for glimpse") { + function "validateInputBatchTools" + tag "validateInputBatchTools" + when { + function { + """ + input[0] = Channel.of("A") + input[1] = 60 + input[2] = "vcf.gz" + input[3] = ["glimpse2", "quilt"] + """ + } + } + then { + assertAll ( + { assert function.failed }, + { assert snapshot(function.stdout).match() } + ) + } + } + + test ("Test validateInputBatchTools only one vcf") { + function "validateInputBatchTools" + tag "validateInputBatchTools" + when { + function { + """ + input[0] = Channel.of("A", "B", "C", "D", "E") + input[1] = 60 + input[2] = "vcf.gz" + input[3] = ["glimpse2", "glimpse1"] + """ + } + } + then { + assertAll ( + { assert function.failed }, + { assert snapshot(function.stdout).match() } + ) + } + } + + test ("Test validateInputBatchTools error batch") { + function "validateInputBatchTools" + tag "validateInputBatchTools" + when { + function { + """ + input[0] = Channel.of(0..150) + input[1] = 60 + input[2] = "cram" + input[3] = ["glimpse1", "quilt"] + """ + } + } + then { + assertAll ( + { assert function.failed }, + { assert snapshot(function.stdout).match() } + ) + } + } + + test ("Test validateInputBatchTools success batch") { + function "validateInputBatchTools" + tag "validateInputBatchTools" + when { + function { + """ + input[0] = Channel.of(0..150) + input[1] = 60 + input[2] = "cram" + input[3] = ["quilt", "glimpse2"] + """ + } + } + then { + assertAll ( + { assert function.success }, + { assert snapshot(function.stdout).match() } + ) + } + } +} diff --git a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/function.nf.test.snap b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/function.nf.test.snap new file mode 100644 index 00000000..a0aaa167 --- /dev/null +++ b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/function.nf.test.snap @@ -0,0 +1,136 @@ +{ + "Test checkMetaChr error": { + "content": [ + [ + "N E X T F L O W ~ version 24.04.3", + "Launching `/mnt/s/Users/Main/Documents/Louis/Travail/IGDR/Repositories/phaseimpute/.nf-test/tests/ed90b74f1c9354d03a69767c5df96bbc/meta/mock.nf` [cheesy_golick] DSL2 - revision: 184c64ccc5", + "WARN: Chr : [chrY] is missing from test" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-16T15:31:57.920377701" + }, + "Test getFileExtension": { + "content": [ + "vcf" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-12T17:12:47.079086374" + }, + "Test checkFileIndex no error with empty channel": { + "content": null, + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-06T15:32:04.356171769" + }, + "Test validateInputBatchTools only one vcf": { + "content": [ + [ + "N E X T F L O W ~ version 24.10.0", + "Launching `/mnt/c/Users/llenezet/Documents/Repositories/phaseimpute/.nf-test-d46c3ae4355f57c428b5399cb25385a3.nf` [deadly_lamarck] DSL2 - revision: 89cd7edbc5", + "When using a Variant Calling Format file as input, only one file can be provided. If you have multiple single-sample VCF files, please merge them into a single multisample VCF file." + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T16:40:38.015152671" + }, + "Test checkMetaChr warning": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T14:03:45.047353204" + }, + "Test Function exportCsv": { + "content": null, + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-06T15:32:58.039856651" + }, + "Test validateInputBatchTools vcf only for glimpse": { + "content": [ + [ + "N E X T F L O W ~ version 24.10.0", + "Launching `/mnt/c/Users/llenezet/Documents/Repositories/phaseimpute/.nf-test-ee1562fb7668b97a4ee2fa78f0a93342.nf` [stupefied_gautier] DSL2 - revision: 33e883fb97", + "Stitch or Quilt software cannot run with VCF or BCF files. Please provide alignment files (i.e. BAM or CRAM)." + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T16:40:24.343659957" + }, + "Test validateInputBatchTools success batch": { + "content": [ + [ + "N E X T F L O W ~ version 24.10.0", + "Launching `/mnt/c/Users/llenezet/Documents/Repositories/phaseimpute/.nf-test-a882e6f88e44c954568a3552c82f8d3c.nf` [compassionate_keller] DSL2 - revision: edfeedf8f1", + "WARN: Glimpse2 or Quilt software is selected and the number of input files (151) is less than the batch size (60). The input files will be processed in 3 batches." + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T16:41:05.86605167" + }, + "Test getFileExtension non empty list": { + "content": [ + [ + "vcf", + "txt" + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-31T13:15:08.856706046" + }, + "Test getFileExtension empty list": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-30T20:00:55.32420517" + }, + "Test validateInputBatchTools error batch": { + "content": [ + [ + "N E X T F L O W ~ version 24.10.0", + "Launching `/mnt/c/Users/llenezet/Documents/Repositories/phaseimpute/.nf-test-87470b2a8c60f98b5dc561cea5e9bc6f.nf` [prickly_euclid] DSL2 - revision: 45ae3df33f", + "WARN: Glimpse2 or Quilt software is selected and the number of input files (151) is less than the batch size (60). The input files will be processed in 3 batches.", + "Stitch or Glimpse1 software is selected and the number of input files (151) is less than the batch size (60). Splitting the input files in batches would induce batch effect." + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T16:40:50.034843333" + } +} \ No newline at end of file diff --git a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/main.nf.test b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/main.nf.test new file mode 100644 index 00000000..9760477c --- /dev/null +++ b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/main.nf.test @@ -0,0 +1,101 @@ +nextflow_workflow { + + name "Test Workflow PIPELINE_INITIALISATION" + script "../main.nf" + workflow "PIPELINE_INITIALISATION" + tag 'subworkflows' + tag 'utils_nfcore_phaseimpute_pipeline' + tag 'subworkflows/utils_nfcore_phaseimpute_pipeline' + + test("Should run with no error") { + when { + params { + fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/reference_genome/GRCh38.s.fa.gz" + fasta_fai = "https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/reference_genome/GRCh38.s.fa.gz.fai" + steps = "panelprep" + map = "../../../tests/csv/map.csv" + chunks = "../../../tests/csv/chunks.csv" + posfile = "../../../tests/csv/posfile.csv" + panel = "../../../tests/csv/panel.csv" + chunk_model = "recursive" + } + workflow { + """ + input[0] = false + input[1] = false + input[2] = false + input[3] = [] + input[4] = "results" + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should give a warning all file given") { + when { + params { + fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/reference_genome/GRCh38.s.fa.gz" + fasta_fai = "https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/reference_genome/GRCh38.s.fa.gz.fai" + steps = "panelprep" + map = "../../../tests/csv/map.csv" + chunks = "../../../tests/csv/chunks.csv" + posfile = "../../../tests/csv/posfile.csv" + panel = "../../../tests/csv/panel.csv" + input_region = "$moduleTestDir/region.csv" + chunk_model = "sequential" + } + workflow { + """ + input[0] = false + input[1] = false + input[2] = false + input[3] = [] + input[4] = "results" + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.stdout.contains("WARN: The following contigs are absent from at least one file : [chr2] and therefore won't be used")} + ) + } + } + + test("Should give a warning with missing files") { + when { + params { + fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/reference_genome/GRCh38.s.fa.gz" + fasta_fai = "https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/reference_genome/GRCh38.s.fa.gz.fai" + steps = "panelprep" + chunks = "../../../tests/csv/chunks.csv" + panel = "../../../tests/csv/panel.csv" + input_region = "$moduleTestDir/region.csv" + chunk_model = "recursive" + } + workflow { + """ + input[0] = false + input[1] = false + input[2] = false + input[3] = [] + input[4] = "results" + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.stdout.contains("WARN: The following contigs are absent from at least one file : [chr2] and therefore won't be used")} + ) + } + } +} diff --git a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/region.csv b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/region.csv new file mode 100644 index 00000000..425887e3 --- /dev/null +++ b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/region.csv @@ -0,0 +1,4 @@ +chr,start,end +chr21,16570000,16610000 +chr22,16570000,16610000 +chr2,16570000,16610000 diff --git a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/tags.yml b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/tags.yml new file mode 100644 index 00000000..1cd8d673 --- /dev/null +++ b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfcore_phaseimpute_pipeline: + - subworkflows/local/utils_nfcore_phaseimpute_pipeline/** diff --git a/subworkflows/local/vcf_chr_rename_bcftools/main.nf b/subworkflows/local/vcf_chr_rename_bcftools/main.nf new file mode 100644 index 00000000..e10351c6 --- /dev/null +++ b/subworkflows/local/vcf_chr_rename_bcftools/main.nf @@ -0,0 +1,52 @@ +include { BCFTOOLS_ANNOTATE } from '../../../modules/nf-core/bcftools/annotate' + +workflow VCF_CHR_RENAME_BCFTOOLS { + take: + ch_vcf // channel: [ [id], vcf, index, diff, prefix ] + + main: + + ch_versions = Channel.empty() + + // Check that prefix is either "chr" or "nochr" + ch_vcf = ch_vcf.map{ + meta, vcf, index, diff, prefix -> + if (prefix != "chr" && prefix != "nochr") { + error "Invalid chr_prefix: ${prefix}" + } + [meta, vcf, index, diff, prefix] + } + + // Generate the chromosome renaming file + ch_rename_file = ch_vcf + .collectFile{ meta, _vcf, _index, diff, prefix -> + def chr = diff.collect { i -> + prefix == "chr" ? "${i} chr${i}" : + "${i} ${i.replace('chr', '')}" + }.join('\n') + ["${meta.id}.txt", "${chr}\n"] + } + .map{ file -> [[id: file.getBaseName()], file] } + + // Add the chromosome renaming file to the input channel + ch_annotate_input = ch_vcf.map { + meta, vcf, index, _diff, _prefix -> + [[id: meta.id], meta, vcf, index] + } // channel: [ [id], vcf, index ] + .combine(ch_rename_file, by:0) + .map { + _metaI, meta, vcf, index, rename_file -> + [meta, vcf, index, [], [], [], rename_file] + } + + // Rename the chromosome without prefix + BCFTOOLS_ANNOTATE(ch_annotate_input) + ch_versions = ch_versions.mix(BCFTOOLS_ANNOTATE.out.versions.first()) + + ch_vcf_renamed = BCFTOOLS_ANNOTATE.out.vcf + .join(BCFTOOLS_ANNOTATE.out.tbi) + + emit: + vcf_renamed = ch_vcf_renamed // [ [id], vcf, csi ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/vcf_chr_rename_bcftools/tests/main.nf.test b/subworkflows/local/vcf_chr_rename_bcftools/tests/main.nf.test new file mode 100644 index 00000000..e2784f52 --- /dev/null +++ b/subworkflows/local/vcf_chr_rename_bcftools/tests/main.nf.test @@ -0,0 +1,98 @@ +nextflow_workflow { + + name "Test Subworkflow VCF_CHR_RENAME_BCFTOOLS" + script "../main.nf" + + config "./nextflow.config" + + workflow "VCF_CHR_RENAME_BCFTOOLS" + + tag "subworkflows" + tag "subworkflows_local" + tag "subworkflows/vcf_chr_rename_bcftools" + tag "vcf_chr_rename_bcftools" + + tag "bcftools" + tag "bcftools/annotate" + tag "bcftools/index" + tag "gawk" + + test("Should run without error") { + when { + workflow { + """ + input[0] = Channel.fromList([ + [ + [id: "multi"], + file( params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz",checkIfExist:true), + file( params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz.tbi",checkIfExist:true), + ["21", "22"], + "chr" + ], + [ + [id: "21"], + file( params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz",checkIfExist:true), + file( params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz.tbi",checkIfExist:true), + ["21"], + "nochr" + ], + [ + [id: "chr21"], + file( params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz",checkIfExist:true), + file( params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz.tbi",checkIfExist:true), + ["chr21"], + "nochr" + ] + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() }, + { assert snapshot(workflow.out.vcf_renamed.collect{ + path(it[1]).vcf.getVariantsAsStrings(2) + }).match("headerwithchr") + } + ) + } + } + + test("Should run with same id") { + when { + workflow { + """ + input[0] = Channel.fromList([ + [ + [id: "input1", chr: "21"], + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz",checkIfExist:true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz.tbi",checkIfExist:true), + ["21"], + "nochr" + ], + [ + [id: "input1", chr: "22"], + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz",checkIfExist:true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz.tbi",checkIfExist:true), + ["22"], + "nochr" + ] + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() }, + { assert snapshot(workflow.out.vcf_renamed.collect{ + path(it[1]).vcf.getVariantsAsStrings(2) + }).match("header21and22") + } + ) + } + } +} diff --git a/subworkflows/local/vcf_chr_rename_bcftools/tests/main.nf.test.snap b/subworkflows/local/vcf_chr_rename_bcftools/tests/main.nf.test.snap new file mode 100644 index 00000000..1050bd1e --- /dev/null +++ b/subworkflows/local/vcf_chr_rename_bcftools/tests/main.nf.test.snap @@ -0,0 +1,160 @@ +{ + "header21and22": { + "content": [ + [ + [ + "chr21\t6257460\tDUP3SURVIVOR\tN\t\t.\tLowQual\tPRECISE;SVTYPE=DUP;SVMETHOD=SURVIVOR_sim;CHR2=chr21;END=6258270;SVLEN=810\tGT\t1/1", + "chr21\t6279548\tDUP135SURVIVOR\tN\t\t.\tLowQual\tPRECISE;SVTYPE=DUP;SVMETHOD=SURVIVOR_sim;CHR2=chr21;END=6280162;SVLEN=614\tGT\t1/1" + ], + [ + "chr21\t6257460\tDUP3SURVIVOR\tN\t\t.\tLowQual\tPRECISE;SVTYPE=DUP;SVMETHOD=SURVIVOR_sim;CHR2=chr21;END=6258270;SVLEN=810\tGT\t1/1", + "chr21\t6279548\tDUP135SURVIVOR\tN\t\t.\tLowQual\tPRECISE;SVTYPE=DUP;SVMETHOD=SURVIVOR_sim;CHR2=chr21;END=6280162;SVLEN=614\tGT\t1/1" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-17T11:54:12.209449942" + }, + "Should run with same id": { + "content": [ + { + "0": [ + [ + { + "id": "input1", + "chr": "21" + }, + "input1_chrrename.vcf.gz:md5,39cd8e316cd9b9282b8289d69d81260b", + "input1_chrrename.vcf.gz.tbi:md5,8a1453dff16c43c61861c310d2ee2f10" + ], + [ + { + "id": "input1", + "chr": "22" + }, + "input1_chrrename.vcf.gz:md5,39cd8e316cd9b9282b8289d69d81260b", + "input1_chrrename.vcf.gz.tbi:md5,8a1453dff16c43c61861c310d2ee2f10" + ] + ], + "1": [ + "versions.yml:md5,0e44bc472b61396e89549eaa606c6205" + ], + "vcf_renamed": [ + [ + { + "id": "input1", + "chr": "21" + }, + "input1_chrrename.vcf.gz:md5,39cd8e316cd9b9282b8289d69d81260b", + "input1_chrrename.vcf.gz.tbi:md5,8a1453dff16c43c61861c310d2ee2f10" + ], + [ + { + "id": "input1", + "chr": "22" + }, + "input1_chrrename.vcf.gz:md5,39cd8e316cd9b9282b8289d69d81260b", + "input1_chrrename.vcf.gz.tbi:md5,8a1453dff16c43c61861c310d2ee2f10" + ] + ], + "versions": [ + "versions.yml:md5,0e44bc472b61396e89549eaa606c6205" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-16T18:13:42.892263614" + }, + "Should run without error": { + "content": [ + { + "0": [ + [ + { + "id": "21" + }, + "21_chrrename.vcf.gz:md5,39cd8e316cd9b9282b8289d69d81260b", + "21_chrrename.vcf.gz.tbi:md5,8a1453dff16c43c61861c310d2ee2f10" + ], + [ + { + "id": "chr21" + }, + "chr21_chrrename.vcf.gz:md5,22785a5d7ec1132f766efae5f8e00adf", + "chr21_chrrename.vcf.gz.tbi:md5,8659a5bb7cb0140be5e6d493d6fe226a" + ], + [ + { + "id": "multi" + }, + "multi_chrrename.vcf.gz:md5,5f6f1ca261270d55eec054368f3d9587", + "multi_chrrename.vcf.gz.tbi:md5,7194cd264e0673a740b4de06bbe62a4e" + ] + ], + "1": [ + "versions.yml:md5,0e44bc472b61396e89549eaa606c6205" + ], + "vcf_renamed": [ + [ + { + "id": "21" + }, + "21_chrrename.vcf.gz:md5,39cd8e316cd9b9282b8289d69d81260b", + "21_chrrename.vcf.gz.tbi:md5,8a1453dff16c43c61861c310d2ee2f10" + ], + [ + { + "id": "chr21" + }, + "chr21_chrrename.vcf.gz:md5,22785a5d7ec1132f766efae5f8e00adf", + "chr21_chrrename.vcf.gz.tbi:md5,8659a5bb7cb0140be5e6d493d6fe226a" + ], + [ + { + "id": "multi" + }, + "multi_chrrename.vcf.gz:md5,5f6f1ca261270d55eec054368f3d9587", + "multi_chrrename.vcf.gz.tbi:md5,7194cd264e0673a740b4de06bbe62a4e" + ] + ], + "versions": [ + "versions.yml:md5,0e44bc472b61396e89549eaa606c6205" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-16T18:13:32.768716353" + }, + "headerwithchr": { + "content": [ + [ + [ + "chr21\t6257460\tDUP3SURVIVOR\tN\t\t.\tLowQual\tPRECISE;SVTYPE=DUP;SVMETHOD=SURVIVOR_sim;CHR2=chr21;END=6258270;SVLEN=810\tGT\t1/1", + "chr21\t6279548\tDUP135SURVIVOR\tN\t\t.\tLowQual\tPRECISE;SVTYPE=DUP;SVMETHOD=SURVIVOR_sim;CHR2=chr21;END=6280162;SVLEN=614\tGT\t1/1" + ], + [ + "21\t6257460\tDUP3SURVIVOR\tN\t\t.\tLowQual\tPRECISE;SVTYPE=DUP;SVMETHOD=SURVIVOR_sim;CHR2=chr21;END=6258270;SVLEN=810\tGT\t1/1", + "21\t6279548\tDUP135SURVIVOR\tN\t\t.\tLowQual\tPRECISE;SVTYPE=DUP;SVMETHOD=SURVIVOR_sim;CHR2=chr21;END=6280162;SVLEN=614\tGT\t1/1" + ], + [ + "chr22\t16849764\tSV_845_2\tN\tN[10:38880001[\t25\tPASS\tSVTYPE=BND;REGIONA=38879850,38880055;REGIONB=16849354,16850051;LFA=7,0;LFB=7,0;LTE=6,0;CTG=.\tGT:CN:COV:DV:RV:LQ:RR:DR\t0/1:.:26,0,25:6:0:0,0.0466667:19,21:23,27", + "chr22\t16884368\tSV_846_2\tN\tN[10:38908615[\t80\tPASS\tSVTYPE=BND;REGIONA=38908615,38909107;REGIONB=16883928,16884368;LFA=15,0;LFB=15,0;LTE=14,0;CTG=.\tGT:CN:COV:DV:RV:LQ:RR:DR\t1/1:.:25,0,45:14:0:0,0.0240964:7,23:0,23" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-16T15:50:28.012191227" + } +} \ No newline at end of file diff --git a/subworkflows/local/vcf_chr_rename_bcftools/tests/nextflow.config b/subworkflows/local/vcf_chr_rename_bcftools/tests/nextflow.config new file mode 100644 index 00000000..3c49d95b --- /dev/null +++ b/subworkflows/local/vcf_chr_rename_bcftools/tests/nextflow.config @@ -0,0 +1,14 @@ +params { + max_memory = '7.GB' +} + +process { + withName: BCFTOOLS_ANNOTATE { + ext.args = [ + "-Oz", + "--no-version", + "--write-index=tbi" + ].join(' ') + ext.prefix = { "${meta.id}_chrrename" } + } +} diff --git a/subworkflows/local/vcf_chr_rename_bcftools/tests/tags.yml b/subworkflows/local/vcf_chr_rename_bcftools/tests/tags.yml new file mode 100644 index 00000000..8f03fc5c --- /dev/null +++ b/subworkflows/local/vcf_chr_rename_bcftools/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/vcf_chr_rename_bcftools: + - subworkflows/local/vcf_chr_rename_bcftools/** diff --git a/subworkflows/local/vcf_chunk_glimpse/main.nf b/subworkflows/local/vcf_chunk_glimpse/main.nf new file mode 100644 index 00000000..264b1b83 --- /dev/null +++ b/subworkflows/local/vcf_chunk_glimpse/main.nf @@ -0,0 +1,66 @@ +include { GLIMPSE_CHUNK } from '../../../modules/nf-core/glimpse/chunk' +include { GLIMPSE2_CHUNK } from '../../../modules/nf-core/glimpse2/chunk' + +workflow VCF_CHUNK_GLIMPSE { + + take: + ch_reference // channel: [ [panel, chr], vcf, csi ] + ch_map // channel (optional): [ [chr], map ] + chunk_model // channel : model + + main: + + ch_versions = Channel.empty() + // Add chromosome to channel + ch_vcf_csi_chr = ch_reference + .map{metaPC, vcf, csi -> [metaPC, vcf, csi, metaPC.chr]} + + // Make chunks with Glimpse1 + GLIMPSE_CHUNK(ch_vcf_csi_chr) + ch_versions = ch_versions.mix(GLIMPSE_CHUNK.out.versions) + + // Rearrange chunks into channel for QUILT + ch_chunks_quilt = GLIMPSE_CHUNK.out.chunk_chr + .splitText() + .map { metaPC, line -> + def fields = line.split("\t") + def startEnd = fields[2].split(':')[1].split('-') + [metaPC, metaPC.chr, startEnd[0], startEnd[1]] + } + + // Rearrange chunks into channel for GLIMPSE1 and GLIMPSE2 + ch_chunks_glimpse1 = GLIMPSE_CHUNK.out.chunk_chr + .splitCsv( + header: ['ID', 'Chr', 'RegionIn', 'RegionOut', 'Size1', 'Size2'], + sep: "\t", skip: 0 + ) + .map { metaPC, it -> [metaPC, it["RegionIn"], it["RegionOut"]]} + + ch_input_glimpse2 = ch_vcf_csi_chr + .map{ + metaPC, vcf, csi, chr -> [metaPC.subMap("chr"), metaPC, vcf, csi, chr] + } + .join(ch_map) + .map{ + _metaC, metaPC, vcf, csi, chr, gmap -> [metaPC, vcf, csi, chr, gmap] + } + GLIMPSE2_CHUNK ( ch_input_glimpse2, chunk_model ) + ch_versions = ch_versions.mix( GLIMPSE2_CHUNK.out.versions.first() ) + + // Rearrange channels + ch_chunks_glimpse2 = GLIMPSE2_CHUNK.out.chunk_chr + .splitCsv( + header: [ + 'ID', 'Chr', 'RegionBuf', 'RegionCnk', 'WindowCm', + 'WindowMb', 'NbTotVariants', 'NbComVariants' + ], sep: "\t", skip: 0 + ) + .map { metaPC, it -> [metaPC, it["RegionBuf"], it["RegionCnk"]]} + + emit: + chunks = GLIMPSE_CHUNK.out.chunk_chr // channel: [ [panel, chr], txt ] + chunks_quilt = ch_chunks_quilt // channel: [ [panel, chr], chr, start, end ] + chunks_glimpse1 = ch_chunks_glimpse1 // channel: [ [panel, chr], chr, region1, region2 ] + chunks_glimpse2 = ch_chunks_glimpse2 // channel: [ [panel, chr], chr, region1, region2 ] // channel: [ [panel, chr], bin] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/vcf_chunk_glimpse/tests/main.nf.test b/subworkflows/local/vcf_chunk_glimpse/tests/main.nf.test new file mode 100644 index 00000000..9be51fbf --- /dev/null +++ b/subworkflows/local/vcf_chunk_glimpse/tests/main.nf.test @@ -0,0 +1,154 @@ +nextflow_workflow { + + name "Test Subworkflow VCF_CHUNK_GLIMPSE" + script "../main.nf" + + config "./nextflow.config" + + workflow "VCF_CHUNK_GLIMPSE" + + tag "subworkflows" + tag "subworkflows_local" + tag "subworkflows/vcf_chunk_glimpse" + tag "vcf_chunk_glimpse" + + tag "glimpse" + tag "glimpse/chunk" + tag "glimpse2" + tag "glimpse2/chunk" + + test("Chunks with Map") { + when { + params { + max_cpus = 2 + max_memory = '2.GB' + } + workflow { + """ + input[0] = Channel.of( + [ + [id: "1000GP", chr: "chr22"], + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz.csi", checkIfExist:true), + ], + [ + [id: "1000GP", chr: "chr21"], + file(params.pipelines_testdata_base_path + "hum_data/panel/chr21/1000GP.chr21.s.norel.vcf.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/panel/chr21/1000GP.chr21.s.norel.vcf.gz.csi", checkIfExist:true), + ] + ) + input[1] = Channel.of( + [ + [chr: "chr22"], + file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38_22.map", checkIfExist:true) + ], + [ + [chr: "chr21"], + file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38_21.map", checkIfExist:true) + ] + ) + input[2] = "recursive" + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out, + workflow.out.chunks.collect{ + path(it[1]).readLines() + } + ).match() + } + ) + } + } + + test("Chunks without Map") { + when { + params { + max_cpus = 2 + max_memory = '2.GB' + } + workflow { + """ + input[0] = Channel.of( + [ + [id: "1000GP", chr: "chr22"], + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz.csi", checkIfExist:true), + ], + [ + [id: "1000GP", chr: "chr21"], + file(params.pipelines_testdata_base_path + "hum_data/panel/chr21/1000GP.chr21.s.norel.vcf.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/panel/chr21/1000GP.chr21.s.norel.vcf.gz.csi", checkIfExist:true), + ] + ) + input[1] = Channel.of( + [[chr: "chr22"], []], + [[chr: "chr21"], []] + ) + input[2] = "recursive" + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out, + workflow.out.chunks.collect{ + path(it[1]).readLines() + } + ).match() + } + ) + } + } + + test("Chunks with sequential model") { + when { + params { + max_cpus = 2 + max_memory = '2.GB' + } + workflow { + """ + input[0] = Channel.of( + [ + [id: "1000GP", chr: "chr22"], + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz.csi", checkIfExist:true), + ], + [ + [id: "1000GP", chr: "chr21"], + file(params.pipelines_testdata_base_path + "hum_data/panel/chr21/1000GP.chr21.s.norel.vcf.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/panel/chr21/1000GP.chr21.s.norel.vcf.gz.csi", checkIfExist:true), + ] + ) + input[1] = Channel.of( + [[chr: "chr22"], []], + [[chr: "chr21"], []] + ) + input[2] = "sequential" + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out, + workflow.out.chunks.collect{ + path(it[1]).readLines() + } + ).match() + } + ) + } + } +} diff --git a/subworkflows/local/vcf_chunk_glimpse/tests/main.nf.test.snap b/subworkflows/local/vcf_chunk_glimpse/tests/main.nf.test.snap new file mode 100644 index 00000000..90a62932 --- /dev/null +++ b/subworkflows/local/vcf_chunk_glimpse/tests/main.nf.test.snap @@ -0,0 +1,837 @@ +{ + "Chunks without Map": { + "content": [ + { + "0": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "1000GP_chr21.txt:md5,64ca4a1655363e4a4e558836e5ac12f9" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "1000GP_chr22.txt:md5,3344e171251722cf58ae31136da223ac" + ] + ], + "1": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21", + "16570070", + "16595525" + ], + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21", + "16585483", + "16609998" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22", + "16570065", + "16597215" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22", + "16587172", + "16609999" + ] + ], + "2": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:16570070-16595525", + "chr21:16570070-16590513" + ], + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:16585483-16609998", + "chr21:16590521-16609998" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22:16570065-16597215", + "chr22:16570065-16592216" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22:16587172-16609999", + "chr22:16592229-16609999" + ] + ], + "3": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:16570070-16609998", + "chr21:16570070-16590517" + ], + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:16570070-16609998", + "chr21:16590518-16609998" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22:16570065-16609999", + "chr22:16570065-16592222" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22:16570065-16609999", + "chr22:16592223-16609999" + ] + ], + "4": [ + [ + [ + + ] + ] + ], + "5": [ + "versions.yml:md5,1ae67fb38ed979f02f47d5ecb8a85f0d", + "versions.yml:md5,1ae67fb38ed979f02f47d5ecb8a85f0d", + "versions.yml:md5,7d277747b107043dd31d3aef18045eef" + ], + "binary": [ + [ + [ + + ] + ] + ], + "chunks": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "1000GP_chr21.txt:md5,64ca4a1655363e4a4e558836e5ac12f9" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "1000GP_chr22.txt:md5,3344e171251722cf58ae31136da223ac" + ] + ], + "chunks_glimpse1": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:16570070-16595525", + "chr21:16570070-16590513" + ], + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:16585483-16609998", + "chr21:16590521-16609998" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22:16570065-16597215", + "chr22:16570065-16592216" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22:16587172-16609999", + "chr22:16592229-16609999" + ] + ], + "chunks_glimpse2": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:16570070-16609998", + "chr21:16570070-16590517" + ], + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:16570070-16609998", + "chr21:16590518-16609998" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22:16570065-16609999", + "chr22:16570065-16592222" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22:16570065-16609999", + "chr22:16592223-16609999" + ] + ], + "chunks_quilt": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21", + "16570070", + "16595525" + ], + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21", + "16585483", + "16609998" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22", + "16570065", + "16597215" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22", + "16587172", + "16609999" + ] + ], + "versions": [ + "versions.yml:md5,1ae67fb38ed979f02f47d5ecb8a85f0d", + "versions.yml:md5,1ae67fb38ed979f02f47d5ecb8a85f0d", + "versions.yml:md5,7d277747b107043dd31d3aef18045eef" + ] + }, + [ + [ + "0\tchr21\tchr21:16570070-16595525\tchr21:16570070-16590513\t20444\t419", + "1\tchr21\tchr21:16585483-16609998\tchr21:16590521-16609998\t19478\t417" + ], + [ + "0\tchr22\tchr22:16570065-16597215\tchr22:16570065-16592216\t22152\t452", + "1\tchr22\tchr22:16587172-16609999\tchr22:16592229-16609999\t17771\t451" + ] + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-10T18:36:03.086912114" + }, + "Chunks with Map": { + "content": [ + { + "0": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "1000GP_chr21.txt:md5,64ca4a1655363e4a4e558836e5ac12f9" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "1000GP_chr22.txt:md5,3344e171251722cf58ae31136da223ac" + ] + ], + "1": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21", + "16570070", + "16595525" + ], + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21", + "16585483", + "16609998" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22", + "16570065", + "16597215" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22", + "16587172", + "16609999" + ] + ], + "2": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:16570070-16595525", + "chr21:16570070-16590513" + ], + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:16585483-16609998", + "chr21:16590521-16609998" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22:16570065-16597215", + "chr22:16570065-16592216" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22:16587172-16609999", + "chr22:16592229-16609999" + ] + ], + "3": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:16570070-16609998", + "chr21:16570070-16609998" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22:16570065-16609999", + "chr22:16570065-16609999" + ] + ], + "4": [ + [ + [ + + ] + ] + ], + "5": [ + "versions.yml:md5,1ae67fb38ed979f02f47d5ecb8a85f0d", + "versions.yml:md5,1ae67fb38ed979f02f47d5ecb8a85f0d", + "versions.yml:md5,7d277747b107043dd31d3aef18045eef" + ], + "binary": [ + [ + [ + + ] + ] + ], + "chunks": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "1000GP_chr21.txt:md5,64ca4a1655363e4a4e558836e5ac12f9" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "1000GP_chr22.txt:md5,3344e171251722cf58ae31136da223ac" + ] + ], + "chunks_glimpse1": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:16570070-16595525", + "chr21:16570070-16590513" + ], + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:16585483-16609998", + "chr21:16590521-16609998" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22:16570065-16597215", + "chr22:16570065-16592216" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22:16587172-16609999", + "chr22:16592229-16609999" + ] + ], + "chunks_glimpse2": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:16570070-16609998", + "chr21:16570070-16609998" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22:16570065-16609999", + "chr22:16570065-16609999" + ] + ], + "chunks_quilt": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21", + "16570070", + "16595525" + ], + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21", + "16585483", + "16609998" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22", + "16570065", + "16597215" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22", + "16587172", + "16609999" + ] + ], + "versions": [ + "versions.yml:md5,1ae67fb38ed979f02f47d5ecb8a85f0d", + "versions.yml:md5,1ae67fb38ed979f02f47d5ecb8a85f0d", + "versions.yml:md5,7d277747b107043dd31d3aef18045eef" + ] + }, + [ + [ + "0\tchr21\tchr21:16570070-16595525\tchr21:16570070-16590513\t20444\t419", + "1\tchr21\tchr21:16585483-16609998\tchr21:16590521-16609998\t19478\t417" + ], + [ + "0\tchr22\tchr22:16570065-16597215\tchr22:16570065-16592216\t22152\t452", + "1\tchr22\tchr22:16587172-16609999\tchr22:16592229-16609999\t17771\t451" + ] + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-10T18:35:46.374947355" + }, + "Chunks with sequential model": { + "content": [ + { + "0": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "1000GP_chr21.txt:md5,64ca4a1655363e4a4e558836e5ac12f9" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "1000GP_chr22.txt:md5,3344e171251722cf58ae31136da223ac" + ] + ], + "1": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21", + "16570070", + "16595525" + ], + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21", + "16585483", + "16609998" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22", + "16570065", + "16597215" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22", + "16587172", + "16609999" + ] + ], + "2": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:16570070-16595525", + "chr21:16570070-16590513" + ], + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:16585483-16609998", + "chr21:16590521-16609998" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22:16570065-16597215", + "chr22:16570065-16592216" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22:16587172-16609999", + "chr22:16592229-16609999" + ] + ], + "3": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:1-16609998", + "chr21:1-16590145" + ], + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:16570070-1248956422", + "chr21:16590146-1248956422" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22:1-16609999", + "chr22:1-16590520" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22:16570065-1248956422", + "chr22:16590521-1248956422" + ] + ], + "4": [ + [ + [ + + ] + ] + ], + "5": [ + "versions.yml:md5,1ae67fb38ed979f02f47d5ecb8a85f0d", + "versions.yml:md5,1ae67fb38ed979f02f47d5ecb8a85f0d", + "versions.yml:md5,7d277747b107043dd31d3aef18045eef" + ], + "binary": [ + [ + [ + + ] + ] + ], + "chunks": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "1000GP_chr21.txt:md5,64ca4a1655363e4a4e558836e5ac12f9" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "1000GP_chr22.txt:md5,3344e171251722cf58ae31136da223ac" + ] + ], + "chunks_glimpse1": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:16570070-16595525", + "chr21:16570070-16590513" + ], + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:16585483-16609998", + "chr21:16590521-16609998" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22:16570065-16597215", + "chr22:16570065-16592216" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22:16587172-16609999", + "chr22:16592229-16609999" + ] + ], + "chunks_glimpse2": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:1-16609998", + "chr21:1-16590145" + ], + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21:16570070-1248956422", + "chr21:16590146-1248956422" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22:1-16609999", + "chr22:1-16590520" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22:16570065-1248956422", + "chr22:16590521-1248956422" + ] + ], + "chunks_quilt": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21", + "16570070", + "16595525" + ], + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "chr21", + "16585483", + "16609998" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22", + "16570065", + "16597215" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "chr22", + "16587172", + "16609999" + ] + ], + "versions": [ + "versions.yml:md5,1ae67fb38ed979f02f47d5ecb8a85f0d", + "versions.yml:md5,1ae67fb38ed979f02f47d5ecb8a85f0d", + "versions.yml:md5,7d277747b107043dd31d3aef18045eef" + ] + }, + [ + [ + "0\tchr21\tchr21:16570070-16595525\tchr21:16570070-16590513\t20444\t419", + "1\tchr21\tchr21:16585483-16609998\tchr21:16590521-16609998\t19478\t417" + ], + [ + "0\tchr22\tchr22:16570065-16597215\tchr22:16570065-16592216\t22152\t452", + "1\tchr22\tchr22:16587172-16609999\tchr22:16592229-16609999\t17771\t451" + ] + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-10T18:36:16.974727308" + } +} \ No newline at end of file diff --git a/subworkflows/local/vcf_chunk_glimpse/tests/nextflow.config b/subworkflows/local/vcf_chunk_glimpse/tests/nextflow.config new file mode 100644 index 00000000..5a412491 --- /dev/null +++ b/subworkflows/local/vcf_chunk_glimpse/tests/nextflow.config @@ -0,0 +1,15 @@ +process { + resourceLimits = [ + memory : "2.GB", + cpus : 2, + time : "1h" + ] + withName: GLIMPSE2_CHUNK { + ext.args = ["--window-mb 0.01", "--window-cm 0.01", "--window-count 200", "--buffer-mb 0.005", "--buffer-cm 0.005", "--buffer-count 30"].join(' ') + ext.prefix = { "${meta.id}_${meta.chr}" } + } + withName: GLIMPSE_CHUNK { + ext.args = ["--window-size 10000", "--window-count 400", "--buffer-size 5000", "--buffer-count 30"].join(' ') + ext.prefix = { "${meta.id}_${meta.chr}" } + } +} diff --git a/subworkflows/local/vcf_chunk_glimpse/tests/tags.yml b/subworkflows/local/vcf_chunk_glimpse/tests/tags.yml new file mode 100644 index 00000000..202e2c2c --- /dev/null +++ b/subworkflows/local/vcf_chunk_glimpse/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/vcf_chunk_glimpse: + - subworkflows/local/vcf_chunk_glimpse/** diff --git a/subworkflows/local/vcf_concatenate_bcftools/main.nf b/subworkflows/local/vcf_concatenate_bcftools/main.nf new file mode 100644 index 00000000..812481e9 --- /dev/null +++ b/subworkflows/local/vcf_concatenate_bcftools/main.nf @@ -0,0 +1,37 @@ +include { BCFTOOLS_CONCAT } from '../../../modules/nf-core/bcftools/concat' + +workflow VCF_CONCATENATE_BCFTOOLS { + + take: + ch_vcf_tbi // channel: [ [id, panel, chr, tools], vcf, tbi ] + + main: + + ch_versions = Channel.empty() + + // Keep only id from meta + ch_vcf_tbi_grouped = ch_vcf_tbi + .map{ metaIPTC, vcf, tbi -> [metaIPTC.subMap("id", "tools", "panel", "batch") + ["chr": "all"], vcf, tbi] } + .groupTuple( by:0 ) + .map{ metaIPTC, vcf, tbi -> [metaIPTC, vcf, tbi, vcf.size() ] } // Compute number of records + .branch{ + one: it[3] == 1 + more: it[3] > 1 + } + + // Ligate and concatenate chunks + BCFTOOLS_CONCAT(ch_vcf_tbi_grouped.more.map{ [it[0], it[1], it[2]] }) + ch_versions = ch_versions.mix(BCFTOOLS_CONCAT.out.versions.first()) + + // Join VCFs and TBIs + ch_vcf_tbi_concat = BCFTOOLS_CONCAT.out.vcf + .join(BCFTOOLS_CONCAT.out.tbi) + + ch_vcf_tbi_join = ch_vcf_tbi_grouped.one + .map{ [it[0], it[1][0], it[2][0]] } + .mix(ch_vcf_tbi_concat) + + emit: + vcf_tbi = ch_vcf_tbi_join // channel: [ [id], vcf, tbi ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/vcf_concatenate_bcftools/tests/main.nf.test b/subworkflows/local/vcf_concatenate_bcftools/tests/main.nf.test new file mode 100644 index 00000000..bf59d2af --- /dev/null +++ b/subworkflows/local/vcf_concatenate_bcftools/tests/main.nf.test @@ -0,0 +1,60 @@ +nextflow_workflow { + + name "Test Subworkflow VCF_CONCATENATE_BCFTOOLS" + script "../main.nf" + + config "./nextflow.config" + + workflow "VCF_CONCATENATE_BCFTOOLS" + + tag "subworkflows" + tag "subworkflows_local" + tag "subworkflows/vcf_concatenate_bcftools" + tag "vcf_concatenate_bcftools" + + tag "bcftools" + tag "bcftools/concatenate" + + + test("Concatenate vcf") { + when { + params { + max_cpus = 2 + max_memory = '2.GB' + } + workflow { + """ + input[0] = Channel.of( + [ + [id: "1000GP", chr: "chr22"], + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz.csi", checkIfExist:true), + ], + [ + [id: "1000GP", chr: "chr21"], + file(params.pipelines_testdata_base_path + "hum_data/panel/chr21/1000GP.chr21.s.norel.vcf.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/panel/chr21/1000GP.chr21.s.norel.vcf.gz.csi", checkIfExist:true), + ] + ) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.versions, + workflow.out.vcf_tbi.collect{[ + it[0], + path(it[1]).getFileName().toString(), + path(it[2]).getFileName().toString() + ] }, + workflow.out.vcf_tbi.collect{ + path(it[1]).vcf.summary + } + ).match() } + ) + } + } +} diff --git a/subworkflows/local/vcf_concatenate_bcftools/tests/main.nf.test.snap b/subworkflows/local/vcf_concatenate_bcftools/tests/main.nf.test.snap new file mode 100644 index 00000000..f9977675 --- /dev/null +++ b/subworkflows/local/vcf_concatenate_bcftools/tests/main.nf.test.snap @@ -0,0 +1,27 @@ +{ + "Concatenate vcf": { + "content": [ + [ + "versions.yml:md5,0370775d4b565f1cf95dcbea437fa36a" + ], + [ + [ + { + "id": "1000GP", + "chr": "all" + }, + "1000GP_all.vcf.gz", + "1000GP_all.vcf.gz.tbi" + ] + ], + [ + "VcfFile [chromosomes=[chr21, chr22], sampleCount=3196, variantCount=1739, phased=true, phasedAutodetect=true]" + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T14:07:30.535498349" + } +} \ No newline at end of file diff --git a/subworkflows/local/vcf_concatenate_bcftools/tests/nextflow.config b/subworkflows/local/vcf_concatenate_bcftools/tests/nextflow.config new file mode 100644 index 00000000..daebb081 --- /dev/null +++ b/subworkflows/local/vcf_concatenate_bcftools/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + withName: BCFTOOLS_CONCAT { + ext.args = "--write-index=tbi" + ext.prefix = { "${meta.id}_${meta.chr}" } + } +} diff --git a/subworkflows/local/vcf_concatenate_bcftools/tests/tags.yml b/subworkflows/local/vcf_concatenate_bcftools/tests/tags.yml new file mode 100644 index 00000000..a6fb77de --- /dev/null +++ b/subworkflows/local/vcf_concatenate_bcftools/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/vcf_concatenate_bcftools: + - subworkflows/local/vcf_concatenate_bcftools/** diff --git a/subworkflows/local/vcf_concordance_glimpse2/main.nf b/subworkflows/local/vcf_concordance_glimpse2/main.nf new file mode 100644 index 00000000..8545d6a0 --- /dev/null +++ b/subworkflows/local/vcf_concordance_glimpse2/main.nf @@ -0,0 +1,76 @@ +include { GLIMPSE2_CONCORDANCE } from '../../../modules/nf-core/glimpse2/concordance' +include { GAWK } from '../../../modules/nf-core/gawk' +include { ADD_COLUMNS } from '../../../modules/local/add_columns' +include { GUNZIP } from '../../../modules/nf-core/gunzip' +include { GAWK as GAWK_ERROR_SPL } from '../../../modules/nf-core/gawk' +include { GAWK as GAWK_RSQUARE_SPL } from '../../../modules/nf-core/gawk' + +workflow VCF_CONCORDANCE_GLIMPSE2 { + + take: + ch_vcf_emul // VCF file with imputed genotypes [ [id, panel, tool], vcf, csi] + ch_vcf_truth // VCF file with truth genotypes [ [id, panel], vcf, csi] + ch_vcf_freq // VCF file with panel frequencies [ [panel, chr], vcf, csi] + ch_region // Regions to process [ [chr, region], region] + + main: + + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() + + ch_concordance = ch_vcf_emul + .map{metaIPTC, vcf, csi -> [metaIPTC.subMap("id"), metaIPTC, vcf, csi]} + .combine(ch_vcf_truth + .map{metaIPC, vcf, csi -> [ metaIPC.subMap("id"), vcf, csi ]} + , by: 0 + ) + .combine(ch_vcf_freq) + .combine(ch_region.map{[it[1]]}.collect().toList()) + .map{ _metaI, metaIPTC, emul, e_csi, truth, t_csi, _metaP, freq, f_csi, regions -> + [metaIPTC, emul, e_csi, truth, t_csi, freq, f_csi, [], regions] + } + + GLIMPSE2_CONCORDANCE ( + ch_concordance, + [[], [], params.bins, [], [], params.min_val_gl, params.min_val_dp] + ) + ch_versions = ch_versions.mix(GLIMPSE2_CONCORDANCE.out.versions.first()) + + GAWK_ERROR_SPL( + GLIMPSE2_CONCORDANCE.out.errors_spl, + [] + ) + ch_versions = ch_versions.mix(GAWK_ERROR_SPL.out.versions.first()) + + GAWK_RSQUARE_SPL( + GLIMPSE2_CONCORDANCE.out.rsquare_spl, + [] + ) + ch_versions = ch_versions.mix(GAWK_ERROR_SPL.out.versions.first()) + + ch_multiqc_files = ch_multiqc_files.mix(GLIMPSE2_CONCORDANCE.out.errors_cal.map{ _meta, txt -> [txt]}) + ch_multiqc_files = ch_multiqc_files.mix(GLIMPSE2_CONCORDANCE.out.errors_grp.map{ _meta, txt -> [txt]}) + ch_multiqc_files = ch_multiqc_files.mix(GAWK_ERROR_SPL.out.output.map{ _meta, txt -> [txt]}) + ch_multiqc_files = ch_multiqc_files.mix(GLIMPSE2_CONCORDANCE.out.rsquare_grp.map{ _meta, txt -> [txt]}) + ch_multiqc_files = ch_multiqc_files.mix(GAWK_RSQUARE_SPL.out.output.map{ _meta, txt -> [txt]}) + ch_multiqc_files = ch_multiqc_files.mix(GLIMPSE2_CONCORDANCE.out.rsquare_per_site.map{ _meta, txt -> [txt]}) + + GUNZIP(GLIMPSE2_CONCORDANCE.out.errors_grp) + ch_versions = ch_versions.mix(GUNZIP.out.versions.first()) + + ADD_COLUMNS(GUNZIP.out.gunzip) + ch_versions = ch_versions.mix(ADD_COLUMNS.out.versions.first()) + + GAWK( + ADD_COLUMNS.out.txt + .map{ _meta, txt -> [["id":"AllSamples"], txt]} + .groupTuple(), + [] + ) + ch_versions = ch_versions.mix(GAWK.out.versions.first()) + + emit: + stats = GAWK.out.output // [ [all], txt ] + versions = ch_versions // channel: [ versions.yml ] + multiqc_files = ch_multiqc_files +} diff --git a/subworkflows/local/vcf_concordance_glimpse2/tests/main.nf.test b/subworkflows/local/vcf_concordance_glimpse2/tests/main.nf.test new file mode 100644 index 00000000..f348898c --- /dev/null +++ b/subworkflows/local/vcf_concordance_glimpse2/tests/main.nf.test @@ -0,0 +1,64 @@ +nextflow_workflow { + + name "Test Subworkflow VCF_CONCORDANCE_GLIMPSE2" + script "../main.nf" + config "./nextflow.config" + + workflow "VCF_CONCORDANCE_GLIMPSE2" + + tag "subworkflows" + tag "subworkflows_local" + tag "subworkflows/vcf_concordance_glimpse2" + tag "vcf_concordance_glimpse2" + + tag "bcftools" + tag "bcftools/index" + tag "glimpse" + tag "glimpse/phase" + tag "glimpse/concordance" + + test("vcf_concordance_glimpse2") { + when { + workflow { + """ + allele_freq = Channel.fromList([ + [ + [panel:'1000GP', chr:'21'], // meta map + file(params.pipelines_testdata_base_path + "hum_data/panel/chr21/1000GP.chr21.sites.vcf.gz",checkIfExists:true), + file(params.pipelines_testdata_base_path + "hum_data/panel/chr21/1000GP.chr21.sites.vcf.gz.csi",checkIfExists:true) + ] + ]) + truth = Channel.fromList([ + [[id:'NA12878'], // meta map + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bcf",checkIfExists:true), + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bcf.csi",checkIfExists:true)], + [[id:'NA19401'], // meta map + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA19401/NA19401.s.bcf",checkIfExists:true), + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA19401/NA19401.s.bcf.csi",checkIfExists:true)], + ]) + estimate = Channel.fromList([ + [[id:'NA12878'], // meta map + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s_imputed.bcf",checkIfExists:true), + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s_imputed.bcf.csi",checkIfExists:true)], + [[id:'NA19401'], // meta map + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA19401/NA19401.s_imputed.bcf",checkIfExists:true), + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA19401/NA19401.s_imputed.bcf.csi",checkIfExists:true)] + ]) + region = Channel.of([ [chr:'21', region:'chr21:16570000-16610000'], "chr21:16570000-16610000"]) + input[0] = estimate + input[1] = truth + input[2] = allele_freq + input[3] = region + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + + } +} diff --git a/subworkflows/local/vcf_concordance_glimpse2/tests/main.nf.test.snap b/subworkflows/local/vcf_concordance_glimpse2/tests/main.nf.test.snap new file mode 100644 index 00000000..7fdea09f --- /dev/null +++ b/subworkflows/local/vcf_concordance_glimpse2/tests/main.nf.test.snap @@ -0,0 +1,105 @@ +{ + "vcf_concordance_glimpse2": { + "content": [ + { + "0": [ + [ + { + "id": "TestQuality" + }, + "TestQuality.txt:md5,0e199fabdd479db15849394e148bd1e1" + ] + ], + "1": [ + "versions.yml:md5,11b3f37c4921334a4e4881b5fb466802", + "versions.yml:md5,185cbca93dea56aa1c36636716a6ac89", + "versions.yml:md5,8bc5d870dd6bf1b13e84535e0b71ce39", + "versions.yml:md5,cb85a5c4bc22951d41b21bc660e89637" + ], + "2": [ + [ + "NA12878.error.cal.txt.gz:md5,33c10b18d109bc6fd53fc2594e18a844" + ], + [ + "NA12878.error.grp.txt.gz:md5,36f3fc826367dc6e89e95b58d90e94eb" + ], + [ + "NA12878.error.spl.txt.gz:md5,e7b0730fba0dc331de80ed911fcb6dcf" + ], + [ + "NA12878.rsquare.grp.txt.gz:md5,4e2d341610d908d3f78685388f7d41b8" + ], + [ + "NA12878.rsquare.spl.txt.gz:md5,b9c53f1ad2c5fc5243ccbddb4a8b4260" + ], + [ + "NA19401.error.cal.txt.gz:md5,46a6d42d469328096a66b8b172bce6d7" + ], + [ + "NA19401.error.grp.txt.gz:md5,0a37ae2f1a5ce339a798f0438d80fd82" + ], + [ + "NA19401.error.spl.txt.gz:md5,efd6bd3bf0c5530f0cf9b5ad055abe49" + ], + [ + "NA19401.rsquare.grp.txt.gz:md5,8161d852ad5a77d5b83236960497cfe1" + ], + [ + "NA19401.rsquare.spl.txt.gz:md5,fb9ff57181626f06a1947b62ad18f97d" + ] + ], + "multiqc_files": [ + [ + "NA12878.error.cal.txt.gz:md5,33c10b18d109bc6fd53fc2594e18a844" + ], + [ + "NA12878.error.grp.txt.gz:md5,36f3fc826367dc6e89e95b58d90e94eb" + ], + [ + "NA12878.error.spl.txt.gz:md5,e7b0730fba0dc331de80ed911fcb6dcf" + ], + [ + "NA12878.rsquare.grp.txt.gz:md5,4e2d341610d908d3f78685388f7d41b8" + ], + [ + "NA12878.rsquare.spl.txt.gz:md5,b9c53f1ad2c5fc5243ccbddb4a8b4260" + ], + [ + "NA19401.error.cal.txt.gz:md5,46a6d42d469328096a66b8b172bce6d7" + ], + [ + "NA19401.error.grp.txt.gz:md5,0a37ae2f1a5ce339a798f0438d80fd82" + ], + [ + "NA19401.error.spl.txt.gz:md5,efd6bd3bf0c5530f0cf9b5ad055abe49" + ], + [ + "NA19401.rsquare.grp.txt.gz:md5,8161d852ad5a77d5b83236960497cfe1" + ], + [ + "NA19401.rsquare.spl.txt.gz:md5,fb9ff57181626f06a1947b62ad18f97d" + ] + ], + "stats": [ + [ + { + "id": "TestQuality" + }, + "TestQuality.txt:md5,0e199fabdd479db15849394e148bd1e1" + ] + ], + "versions": [ + "versions.yml:md5,11b3f37c4921334a4e4881b5fb466802", + "versions.yml:md5,185cbca93dea56aa1c36636716a6ac89", + "versions.yml:md5,8bc5d870dd6bf1b13e84535e0b71ce39", + "versions.yml:md5,cb85a5c4bc22951d41b21bc660e89637" + ] + } + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T14:08:13.256185601" + } +} \ No newline at end of file diff --git a/subworkflows/local/vcf_concordance_glimpse2/tests/nextflow.config b/subworkflows/local/vcf_concordance_glimpse2/tests/nextflow.config new file mode 100644 index 00000000..2d7ac3cf --- /dev/null +++ b/subworkflows/local/vcf_concordance_glimpse2/tests/nextflow.config @@ -0,0 +1,10 @@ +params { + max_memory = '7.GB' +} + +process { + withName: 'VCF_CONCORDANCE_GLIMPSE2:GAWK' { + ext.args2 = "'(NR == 1) || (FNR > 1)'" // Skip header line + ext.suffix = "txt" + } +} diff --git a/subworkflows/local/vcf_concordance_glimpse2/tests/tags.yml b/subworkflows/local/vcf_concordance_glimpse2/tests/tags.yml new file mode 100644 index 00000000..35cfc8a3 --- /dev/null +++ b/subworkflows/local/vcf_concordance_glimpse2/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/vcf_concordance_glimpse2: + - subworkflows/local/vcf_concordance_glimpse2/** diff --git a/subworkflows/local/vcf_impute_glimpse1/main.nf b/subworkflows/local/vcf_impute_glimpse1/main.nf new file mode 100644 index 00000000..25177676 --- /dev/null +++ b/subworkflows/local/vcf_impute_glimpse1/main.nf @@ -0,0 +1,67 @@ + +include { GLIMPSE_PHASE } from '../../../modules/nf-core/glimpse/phase' +include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_1 } from '../../../modules/nf-core/bcftools/index' +include { GLIMPSE_LIGATE } from '../../../modules/nf-core/glimpse/ligate' +include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_2 } from '../../../modules/nf-core/bcftools/index' + +workflow VCF_IMPUTE_GLIMPSE1 { + + take: + ch_input // channel (mandatory): [ [id], vcf, tbi ] + ch_panel // channel (mandatory): [ [panel, chr], vcf, tbi ] + ch_chunks // channel (optional): [ [panel, chr], region1, region2 ] + + main: + + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() + + samples_file = Channel.of([[]]).collect() + gmap_file = Channel.of([[]]).collect() + + // Combine chunks with panel + ch_chunks_panel = ch_chunks + .combine(ch_panel, by:0) + .map{ metaPC, regionin, regionout, panel, index -> + [["panel": metaPC.id, "chr": metaPC.chr], regionin, regionout, panel, index] + } + + // Join input and chunks reference + ch_phase_input = ch_input + .map{ metaIPC, vcf, index -> [metaIPC.subMap("panel", "chr"), metaIPC, vcf, index] } + .combine(samples_file) + .combine(ch_chunks_panel, by: 0) + .combine(gmap_file) + .map{ _metaPC, metaIPC, bam, bai, samples, regionin, regionout, panel, panel_index, gmap -> + [metaIPC + ["chunk": regionout], + bam, bai, samples, regionin, regionout, panel, panel_index, gmap] + } + + GLIMPSE_PHASE ( ch_phase_input ) // [meta, vcf, index, sample, regionin, regionout, ref, ref_index, map] + ch_versions = ch_versions.mix(GLIMPSE_PHASE.out.versions ) + + BCFTOOLS_INDEX_1 ( GLIMPSE_PHASE.out.phased_variants ) + ch_versions = ch_versions.mix( BCFTOOLS_INDEX_1.out.versions ) + + // Ligate all phased files in one and index it + ligate_input = GLIMPSE_PHASE.out.phased_variants + .join( BCFTOOLS_INDEX_1.out.csi ) + .map{ metaIPCR, vcf, index -> [metaIPCR.subMap("id", "panel", "chr", "batch"), vcf, index] } + .groupTuple() + + GLIMPSE_LIGATE ( ligate_input ) + ch_versions = ch_versions.mix(GLIMPSE_LIGATE.out.versions ) + + BCFTOOLS_INDEX_2 ( GLIMPSE_LIGATE.out.merged_variants ) + ch_versions = ch_versions.mix( BCFTOOLS_INDEX_2.out.versions ) + + // Join imputed and index files + ch_imputed_vcf_tbi = GLIMPSE_LIGATE.out.merged_variants + .join(BCFTOOLS_INDEX_2.out.tbi) + .map{ metaIPC, vcf, index -> [metaIPC + [tools: "glimpse1"], vcf, index] } + + emit: + vcf_tbi = ch_imputed_vcf_tbi // channel: [ [id, panel, chr, tool], vcf, tbi ] + versions = ch_versions // channel: [ versions.yml ] + multiqc_files = ch_multiqc_files // channel: [ multiqc_files.yml ] +} diff --git a/subworkflows/local/vcf_impute_glimpse1/tests/main.nf.test b/subworkflows/local/vcf_impute_glimpse1/tests/main.nf.test new file mode 100644 index 00000000..a9a626d5 --- /dev/null +++ b/subworkflows/local/vcf_impute_glimpse1/tests/main.nf.test @@ -0,0 +1,107 @@ +nextflow_workflow { + + name "Test Subworkflow BAM_IMPUTE_GLIMPSE1" + script "../main.nf" + config "./nextflow.config" + + workflow "VCF_IMPUTE_GLIMPSE1" + + tag "subworkflows" + tag "subworkflows_local" + tag "subworkflows/vcf_impute_glimpse1" + tag "vcf_impute_glimpse1" + tag "bam_gl_bcftools" + + tag "glimpse" + tag "glimpse/phase" + tag "glimpse/ligate" + tag "bcftools" + tag "bcftools/index" + + test("Impute with glimpse1 two vcf") { + when { + workflow { + """ + input[0] = Channel.fromList([ + [ + [id: "NA12878", panel: "1000GP", chr: "22"], + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bcf", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bcf.csi", checkIfExist:true), + ], + [ + [id: "NA19401", panel: "1000GP", chr: "22"], + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA19401/NA19401.s.bcf", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA19401/NA19401.s.bcf.csi", checkIfExist:true), + ], + ]) + input[1] = Channel.of([ + [id: "1000GP", chr: "22"], + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz.csi", checkIfExist:true), + ]).collect() + input[2] = Channel.of( + [[chr: "22", id: "1000GP"], "chr22:16570065-16597215", "chr22:16570065-16592216"], + [[chr: "22", id: "1000GP"], "chr22:16587172-16609999", "chr22:16592229-16609999"] + ) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.versions, + workflow.out.vcf_tbi.collect{[ + it[0], + path(it[1]).getFileName().toString(), + path(it[2]).getFileName().toString() + ] }, + workflow.out.vcf_tbi.collect{ path(it[1]).vcf.summary }, + workflow.out.vcf_tbi.collect{ path(it[1]).vcf.header.getGenotypeSamples().sort() } + ).match() } + ) + } + } + + test("Impute with glimpse1 one vcf") { + when { + workflow { + """ + input[0] = Channel.fromList([ + [ + [id: "NA12878", panel: "1000GP", chr: "22"], + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bcf", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s.bcf.csi", checkIfExist:true), + ] + ]) + input[1] = Channel.of([ + [id: "1000GP", chr: "22"], + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz.csi", checkIfExist:true), + ]).collect() + input[2] = Channel.of( + [[chr: "22", id: "1000GP"], "chr22:16570065-16597215", "chr22:16570065-16592216"], + [[chr: "22", id: "1000GP"], "chr22:16587172-16609999", "chr22:16592229-16609999"] + ) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.versions, + workflow.out.vcf_tbi.collect{[ + it[0], + path(it[1]).getFileName().toString(), + path(it[2]).getFileName().toString() + ] }, + workflow.out.vcf_tbi.collect{ path(it[1]).vcf.summary }, + workflow.out.vcf_tbi.collect{ path(it[1]).vcf.header.getGenotypeSamples().sort() } + ).match() } + ) + } + } +} diff --git a/subworkflows/local/vcf_impute_glimpse1/tests/main.nf.test.snap b/subworkflows/local/vcf_impute_glimpse1/tests/main.nf.test.snap new file mode 100644 index 00000000..f6481644 --- /dev/null +++ b/subworkflows/local/vcf_impute_glimpse1/tests/main.nf.test.snap @@ -0,0 +1,96 @@ +{ + "Impute with glimpse1 one vcf": { + "content": [ + [ + "versions.yml:md5,2f8b6f8bb3b3972bb0375abf1de08807", + "versions.yml:md5,2f8b6f8bb3b3972bb0375abf1de08807", + "versions.yml:md5,8c79d92b93fe75a9094499ce5d5eaca2", + "versions.yml:md5,9d78fceae32ec42b1818bb5890e9a6ba", + "versions.yml:md5,bf4a479eb73467775a4afeebd25841aa", + "versions.yml:md5,bf4a479eb73467775a4afeebd25841aa" + ], + [ + [ + { + "id": "NA12878", + "panel": "1000GP", + "chr": "22", + "tools": "glimpse1" + }, + "NA12878_22_ligate.vcf.gz", + "NA12878_22_ligate.vcf.gz.tbi" + ] + ], + [ + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=903, phased=false, phasedAutodetect=false]" + ], + [ + [ + "NA12878" + ] + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T12:31:45.012960487" + }, + "Impute with glimpse1 two vcf": { + "content": [ + [ + "versions.yml:md5,2f8b6f8bb3b3972bb0375abf1de08807", + "versions.yml:md5,2f8b6f8bb3b3972bb0375abf1de08807", + "versions.yml:md5,2f8b6f8bb3b3972bb0375abf1de08807", + "versions.yml:md5,2f8b6f8bb3b3972bb0375abf1de08807", + "versions.yml:md5,8c79d92b93fe75a9094499ce5d5eaca2", + "versions.yml:md5,8c79d92b93fe75a9094499ce5d5eaca2", + "versions.yml:md5,9d78fceae32ec42b1818bb5890e9a6ba", + "versions.yml:md5,9d78fceae32ec42b1818bb5890e9a6ba", + "versions.yml:md5,bf4a479eb73467775a4afeebd25841aa", + "versions.yml:md5,bf4a479eb73467775a4afeebd25841aa", + "versions.yml:md5,bf4a479eb73467775a4afeebd25841aa", + "versions.yml:md5,bf4a479eb73467775a4afeebd25841aa" + ], + [ + [ + { + "id": "NA12878", + "panel": "1000GP", + "chr": "22", + "tools": "glimpse1" + }, + "NA12878_22_ligate.vcf.gz", + "NA12878_22_ligate.vcf.gz.tbi" + ], + [ + { + "id": "NA19401", + "panel": "1000GP", + "chr": "22", + "tools": "glimpse1" + }, + "NA19401_22_ligate.vcf.gz", + "NA19401_22_ligate.vcf.gz.tbi" + ] + ], + [ + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=903, phased=false, phasedAutodetect=false]", + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=903, phased=false, phasedAutodetect=false]" + ], + [ + [ + "NA12878" + ], + [ + "NA19401" + ] + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T12:31:10.854492318" + } +} \ No newline at end of file diff --git a/subworkflows/local/vcf_impute_glimpse1/tests/nextflow.config b/subworkflows/local/vcf_impute_glimpse1/tests/nextflow.config new file mode 100644 index 00000000..cbf35847 --- /dev/null +++ b/subworkflows/local/vcf_impute_glimpse1/tests/nextflow.config @@ -0,0 +1,60 @@ +process { + + resourceLimits = [ + cpus: 4, + memory: '2.GB', + time: '6.h' + ] + + withName: GAWK { + ext.args2 = "'NR>1 { split(\$1, a, \"[:-_]\"); print a[1], \$2, \$3 \",\" \$4 }'" + ext.suffix = "txt" + } + + withName: BCFTOOLS_MPILEUP { + ext.args = [ + "-I", + "-E", + "-a 'FORMAT/DP'" + ].join(' ') + ext.args2 = [ + "-Aim", + "-C alleles" + ].join(' ') + ext.prefix = { "${meta.id}" } + } + + withName: BCFTOOLS_MERGE { + ext.args = [ + "--write-index=tbi", + ].join(' ') + ext.prefix = { "${meta.id}" } + } + + withName: BCFTOOLS_ANNOTATE { + ext.args = ["--set-id '%CHROM:%POS:%REF:%ALT'", "-Oz", "--write-index=tbi"].join(' ') + ext.prefix = { "${meta.id}.annotate" } + } + + withName: GLIMPSE_PHASE { + ext.args = ["--impute-reference-only-variants"].join(' ') + ext.prefix = { "${meta.id}_${meta.chunk.replace(':','_')}_phase" } + ext.suffix = "bcf" + publishDir = [ enabled: false ] + } + + withName: BCFTOOLS_INDEX_1 { + ext.args = "--csi" + publishDir = [ enabled: false ] + } + + withName: GLIMPSE_LIGATE { + ext.prefix = { "${meta.id}_${meta.chr}_ligate" } + publishDir = [ enabled: false ] + } + + withName: BCFTOOLS_INDEX_2 { + ext.args = "--tbi" + publishDir = [ enabled: false ] + } +} diff --git a/subworkflows/local/vcf_impute_glimpse1/tests/tags.yml b/subworkflows/local/vcf_impute_glimpse1/tests/tags.yml new file mode 100644 index 00000000..5fdd8262 --- /dev/null +++ b/subworkflows/local/vcf_impute_glimpse1/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/bam_impute_glimpse1: + - subworkflows/local/bam_impute_glimpse1/** diff --git a/subworkflows/local/vcf_normalize_bcftools/main.nf b/subworkflows/local/vcf_normalize_bcftools/main.nf new file mode 100644 index 00000000..b90cb6aa --- /dev/null +++ b/subworkflows/local/vcf_normalize_bcftools/main.nf @@ -0,0 +1,51 @@ +include { BCFTOOLS_NORM } from '../../../modules/nf-core/bcftools/norm' +include { BCFTOOLS_VIEW } from '../../../modules/nf-core/bcftools/view' +include { VCFLIB_VCFFIXUP } from '../../../modules/nf-core/vcflib/vcffixup/main' +include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index' + + +workflow VCF_NORMALIZE_BCFTOOLS { + take: + ch_vcf_tbi // channel: [ [id, chr], vcf, index ] + ch_fasta // channel: [ [genome], fasta, fai ] + + main: + + ch_versions = Channel.empty() + ch_fasta = ch_fasta.map { meta, fasta, _fai -> [meta, fasta] } + + // Join duplicated biallelic sites into multiallelic records + if (params.normalize) { + BCFTOOLS_NORM(ch_vcf_tbi, ch_fasta) + ch_versions = ch_versions.mix(BCFTOOLS_NORM.out.versions) + + // Join multiallelic VCF and TBI + ch_multiallelic_vcf_tbi = BCFTOOLS_NORM.out.vcf + .join(BCFTOOLS_NORM.out.tbi) + + // Remove all multiallelic records and samples specified in the `--remove_samples` command: + BCFTOOLS_VIEW(ch_multiallelic_vcf_tbi, [], [], []) + ch_versions = ch_versions.mix(BCFTOOLS_VIEW.out.versions) + + // Join biallelic VCF and TBI + ch_vcf_tbi = BCFTOOLS_VIEW.out.vcf + .join(BCFTOOLS_VIEW.out.tbi) + } + + // (Optional) Fix panel (When AC/AN INFO fields in VCF are inconsistent with GT field) + if (params.compute_freq == true) { + VCFLIB_VCFFIXUP(ch_vcf_tbi) + ch_versions = ch_versions.mix(VCFLIB_VCFFIXUP.out.versions) + + // Index fixed panel + BCFTOOLS_INDEX(VCFLIB_VCFFIXUP.out.vcf) + ch_versions = ch_versions.mix(BCFTOOLS_INDEX.out.versions) + + // Join fixed vcf and tbi + ch_vcf_tbi = VCFLIB_VCFFIXUP.out.vcf + .join(BCFTOOLS_INDEX.out.tbi) + } + emit: + vcf_tbi = ch_vcf_tbi // channel: [ [id, chr], vcf, tbi ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/vcf_normalize_bcftools/meta.yml b/subworkflows/local/vcf_normalize_bcftools/meta.yml new file mode 100644 index 00000000..30d793f9 --- /dev/null +++ b/subworkflows/local/vcf_normalize_bcftools/meta.yml @@ -0,0 +1,44 @@ +name: "vcf_normalize_bcftools" +description: Normalize VCF files (bcftools norm -m +any). Combine records spanning different lines into a single line. + Keep only biallelic SNPs and remove multiallelic records (bcftools view -m 2 -M 2 -v snps). + Convert to hap/legend format (bcftools convert --haplegendsample). + Optionally, remove samples from the reference panel (bcftools view -s ^SAMPLENAME). +keywords: + - bcftools + - norm + - view +components: + - bcftools/norm + - bcftools/view + - bcftools/index + - bcftools/convert +input: + - ch_vcf: + type: file + description: | + Reference panel of haplotypes in VCF/BCF format. + Index file of the Reference panel file. + Target region, usually a full chromosome (e.g. chr20:1000000-2000000 or chr20). + The file could possibly be without GT field (for efficiency reasons a file containing only the positions is recommended). + Structure: [ meta, vcf, csi, region ] + - ch_fasta: + type: file + description: | + Reference genome in fasta format. + Reference genome index in fai format + Structure: [ meta, fasta, fai ] +output: + - vcf_tbi: + type: file + description: | + Output VCF/BCF file for the normalized, only biallelic SNPs. + Structure: [meta, vcf, tbi] + - hap_legend: + type: file + description: | + Output Hap/Legend files for the normalized, only biallelic SNPs. + Structure: [meta, hap, legend] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" diff --git a/subworkflows/local/vcf_normalize_bcftools/tests/main.nf.test b/subworkflows/local/vcf_normalize_bcftools/tests/main.nf.test new file mode 100644 index 00000000..6fa38fbd --- /dev/null +++ b/subworkflows/local/vcf_normalize_bcftools/tests/main.nf.test @@ -0,0 +1,129 @@ +nextflow_workflow { + + name "Test Subworkflow VCF_NORMALIZE_BCFTOOLS" + script "../main.nf" + + config "./nextflow.config" + + workflow "VCF_NORMALIZE_BCFTOOLS" + + tag "subworkflows" + tag "subworkflows_local" + tag "subworkflows/vcf_normalize_bcftools" + tag "vcf_normalize_bcftools" + + tag "bcftools" + tag "bcftools/norm" + tag "bcftools/view" + tag "bcftools/index" + tag "vcflib" + tag "vcflib/vcffixup" + + + test("Normalize vcf without computing frequencies nor removing samples") { + when { + params { + max_cpus = 2 + max_memory = '2.GB' + compute_freq = false + remove_samples = null + } + workflow { + """ + input[0] = Channel.of( + [ + [id: "1000GP", chr: "chr22"], + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz.csi", checkIfExist:true), + ], + [ + [id: "1000GP", chr: "chr21"], + file(params.pipelines_testdata_base_path + "hum_data/panel/chr21/1000GP.chr21.s.norel.vcf.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/panel/chr21/1000GP.chr21.s.norel.vcf.gz.csi", checkIfExist:true), + ] + ) + input[1] = Channel.of([ + [id: "GRCh38"], + file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz.fai", checkIfExist:true), + ]).collect() + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.versions, + workflow.out.vcf_tbi.collect{[ + it[0], + path(it[1]).getFileName().toString(), + path(it[2]).getFileName().toString() + ] }, + workflow.out.vcf_tbi.collect{ + path(it[1]).vcf.summary + }, + workflow.out.vcf_tbi.collect{ + path(it[1]).vcf.header.getGenotypeSamples().size() + } + ).match() }, + { workflow.out.vcf_tbi.collect{ + assert path(it[1]).vcf.variantCount == path(it[1]).vcf.noSnps + }} + ) + } + } + + test("Normalize vcf with computing frequencies after removing samples") { + when { + params { + max_cpus = 2 + max_memory = '2.GB' + compute_freq = true + remove_samples = "HG00109,HG00110,HG00111,HG00112,HG00113,HG00114" + } + workflow { + """ + input[0] = Channel.of( + [ + [id: "1000GP", chr: "chr22"], + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz.csi", checkIfExist:true), + ], + [ + [id: "1000GP", chr: "chr21"], + file(params.pipelines_testdata_base_path + "hum_data/panel/chr21/1000GP.chr21.s.norel.vcf.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/panel/chr21/1000GP.chr21.s.norel.vcf.gz.csi", checkIfExist:true), + ] + ) + input[1] = Channel.of([ + [id: "GRCh38"], + file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz.fai", checkIfExist:true), + ]).collect() + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.versions, + workflow.out.vcf_tbi.collect{[ + it[0], + path(it[1]).getFileName().toString(), + path(it[2]).getFileName().toString() + ] }, + workflow.out.vcf_tbi.collect{ + path(it[1]).vcf.summary + }, + workflow.out.vcf_tbi.collect{ + path(it[1]).vcf.header.getGenotypeSamples().size() + } + ).match() } + ) + } + } +} diff --git a/subworkflows/local/vcf_normalize_bcftools/tests/main.nf.test.snap b/subworkflows/local/vcf_normalize_bcftools/tests/main.nf.test.snap new file mode 100644 index 00000000..ae68d16e --- /dev/null +++ b/subworkflows/local/vcf_normalize_bcftools/tests/main.nf.test.snap @@ -0,0 +1,88 @@ +{ + "Normalize vcf with computing frequencies after removing samples": { + "content": [ + [ + "versions.yml:md5,3bba75c8e94eb2271ad38e3a1ee06ad8", + "versions.yml:md5,3bba75c8e94eb2271ad38e3a1ee06ad8", + "versions.yml:md5,bdf2c1ea1cace63b301158243f3fd12f", + "versions.yml:md5,bdf2c1ea1cace63b301158243f3fd12f", + "versions.yml:md5,c1918a7ebe19e200e8981e12c8fd52d4", + "versions.yml:md5,c1918a7ebe19e200e8981e12c8fd52d4", + "versions.yml:md5,da9510d939fc9cb45b897127f0d5e490", + "versions.yml:md5,da9510d939fc9cb45b897127f0d5e490" + ], + [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "1000GP_chr21_fixed.vcf.gz", + "1000GP_chr21_fixed.vcf.gz.tbi" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "1000GP_chr22_fixed.vcf.gz", + "1000GP_chr22_fixed.vcf.gz.tbi" + ] + ], + [ + "VcfFile [chromosomes=[chr21], sampleCount=3190, variantCount=836, phased=true, phasedAutodetect=true]", + "VcfFile [chromosomes=[chr22], sampleCount=3190, variantCount=903, phased=true, phasedAutodetect=true]" + ], + [ + 3190, + 3190 + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T14:11:08.9049564" + }, + "Normalize vcf without computing frequencies nor removing samples": { + "content": [ + [ + "versions.yml:md5,c1918a7ebe19e200e8981e12c8fd52d4", + "versions.yml:md5,c1918a7ebe19e200e8981e12c8fd52d4", + "versions.yml:md5,da9510d939fc9cb45b897127f0d5e490", + "versions.yml:md5,da9510d939fc9cb45b897127f0d5e490" + ], + [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "1000GP_chr21_biallelic_snps.vcf.gz", + "1000GP_chr21_biallelic_snps.vcf.gz.tbi" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "1000GP_chr22_biallelic_snps.vcf.gz", + "1000GP_chr22_biallelic_snps.vcf.gz.tbi" + ] + ], + [ + "VcfFile [chromosomes=[chr21], sampleCount=3196, variantCount=836, phased=true, phasedAutodetect=true]", + "VcfFile [chromosomes=[chr22], sampleCount=3196, variantCount=903, phased=true, phasedAutodetect=true]" + ], + [ + 3196, + 3196 + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T14:10:11.989583075" + } +} \ No newline at end of file diff --git a/subworkflows/local/vcf_normalize_bcftools/tests/nextflow.config b/subworkflows/local/vcf_normalize_bcftools/tests/nextflow.config new file mode 100644 index 00000000..a22fcf04 --- /dev/null +++ b/subworkflows/local/vcf_normalize_bcftools/tests/nextflow.config @@ -0,0 +1,23 @@ +process { + withName: BCFTOOLS_NORM { + ext.args = ["-m +any", "--no-version", "--output-type z", "--write-index=tbi"].join(' ') + ext.prefix = { "${meta.id}_${meta.chr}_multiallelic" } + } + + withName: BCFTOOLS_VIEW { + ext.args = [ + "-v snps", "-m 2", "-M 2", + params.remove_samples ? "-s^${params.remove_samples}" : '', + "--output-type z", "--write-index=tbi" + ].join(' ') + ext.prefix = { "${meta.id}_${meta.chr}_biallelic_snps" } + } + + withName: VCFLIB_VCFFIXUP { + ext.prefix = { "${meta.id}_${meta.chr}" } + } + + withName: BCFTOOLS_INDEX { + ext.args = "--tbi" + } +} diff --git a/subworkflows/local/vcf_normalize_bcftools/tests/tags.yml b/subworkflows/local/vcf_normalize_bcftools/tests/tags.yml new file mode 100644 index 00000000..1d7fc677 --- /dev/null +++ b/subworkflows/local/vcf_normalize_bcftools/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/vcf_normalize_bcftools: + - subworkflows/local/vcf_normalize_bcftools/** diff --git a/subworkflows/local/vcf_phase_shapeit5/main.nf b/subworkflows/local/vcf_phase_shapeit5/main.nf new file mode 100644 index 00000000..a8950adb --- /dev/null +++ b/subworkflows/local/vcf_phase_shapeit5/main.nf @@ -0,0 +1,91 @@ +include { GLIMPSE2_CHUNK } from '../../../modules/nf-core/glimpse2/chunk' +include { SHAPEIT5_PHASECOMMON } from '../../../modules/nf-core/shapeit5/phasecommon' +include { SHAPEIT5_LIGATE } from '../../../modules/nf-core/shapeit5/ligate' +include { BCFTOOLS_INDEX as VCF_BCFTOOLS_INDEX_1 } from '../../../modules/nf-core/bcftools/index' +include { BCFTOOLS_INDEX as VCF_BCFTOOLS_INDEX_2 } from '../../../modules/nf-core/bcftools/index' + +workflow VCF_PHASE_SHAPEIT5 { + + take: + ch_vcf // channel (mandatory) : [ [id, chr], vcf, csi, pedigree ] + ch_region // channel (mandatory) : [ [chr, region], region ] + ch_ref // channel (optional) : [ [id, chr], ref, csi ] + ch_scaffold // channel (optional) : [ [id, chr], scaffold, csi ] + ch_map // channel (mandatory) : [ [chr], map] + chunk_model // channel (mandatory) : [ model ] + + main: + + ch_versions = Channel.empty() + + // Chunk with Glimpse2 + ch_input_glimpse2 = ch_vcf + .map{ + metaIC, vcf, csi, _pedigree -> [metaIC.subMap("chr"), metaIC, vcf, csi] + } + .combine(ch_region.map{ metaCR, region -> [metaCR.subMap("chr"), region]}, by:0) + .join(ch_map) + .map{ + _metaC, metaIC, vcf, csi, region, gmap -> [metaIC, vcf, csi, region, gmap] + } + GLIMPSE2_CHUNK ( ch_input_glimpse2, chunk_model ) + ch_versions = ch_versions.mix( GLIMPSE2_CHUNK.out.versions.first() ) + + // Rearrange channels + ch_chunks_glimpse2 = GLIMPSE2_CHUNK.out.chunk_chr + .splitCsv( + header: [ + 'ID', 'Chr', 'RegionBuf', 'RegionCnk', 'WindowCm', + 'WindowMb', 'NbTotVariants', 'NbComVariants' + ], sep: "\t", skip: 0 + ) + .map { metaIC, it -> [metaIC, it["RegionBuf"], it["RegionCnk"]]} + + ch_phase_input = ch_vcf + .combine(ch_chunks_glimpse2, by:0) + .map{ + metaIC, vcf, csi, pedigree, regionbuf, regioncnk -> [metaIC.subMap("chr"), metaIC, vcf, csi, pedigree, regionbuf, regioncnk] + } + .combine(ch_map, by:0) + .map { _metaC, metaIC, vcf, index, pedigree, regionbuf, regioncnk, gmap -> + [metaIC + [chunk: regioncnk], vcf, index, pedigree, regionbuf, gmap] + } + + SHAPEIT5_PHASECOMMON ( + ch_phase_input, ch_ref, + ch_scaffold + ) + ch_versions = ch_versions.mix(SHAPEIT5_PHASECOMMON.out.versions.first()) + + VCF_BCFTOOLS_INDEX_1(SHAPEIT5_PHASECOMMON.out.phased_variant) + ch_versions = ch_versions.mix(VCF_BCFTOOLS_INDEX_1.out.versions.first()) + + ch_ligate_input = SHAPEIT5_PHASECOMMON.out.phased_variant + .join(VCF_BCFTOOLS_INDEX_1.out.csi, failOnMismatch:true, failOnDuplicate:true) + .map{ meta, vcf, csi -> [meta.subMap("id", "chr"), [vcf, meta.chunk], csi]} + .groupTuple() + .map{ meta, vcf, csi -> + [ meta, + vcf + .sort { a, b -> + def aStart = a.last().split("-")[-1].toInteger() + def bStart = b.last().split("-")[-1].toInteger() + aStart <=> bStart + } + .collect{it.first()}, + csi]} + + SHAPEIT5_LIGATE(ch_ligate_input) + ch_versions = ch_versions.mix(SHAPEIT5_LIGATE.out.versions.first()) + + VCF_BCFTOOLS_INDEX_2(SHAPEIT5_LIGATE.out.merged_variants) + ch_versions = ch_versions.mix(VCF_BCFTOOLS_INDEX_2.out.versions.first()) + + ch_vcf_tbi_join = SHAPEIT5_LIGATE.out.merged_variants + .join(VCF_BCFTOOLS_INDEX_2.out.csi) + + emit: + vcf_tbi = ch_vcf_tbi_join // channel: [ [id, chr], vcf, csi ] + versions = ch_versions // channel: [ versions.yml ] +} + diff --git a/subworkflows/local/vcf_phase_shapeit5/tests/main.nf.test b/subworkflows/local/vcf_phase_shapeit5/tests/main.nf.test new file mode 100644 index 00000000..622331e8 --- /dev/null +++ b/subworkflows/local/vcf_phase_shapeit5/tests/main.nf.test @@ -0,0 +1,188 @@ +nextflow_workflow { + + name "Test Subworkflow VCF_PHASE_SHAPEIT5" + script "../main.nf" + + config "./nextflow.config" + + workflow "VCF_PHASE_SHAPEIT5" + + tag "subworkflows" + tag "subworkflows_local" + tag "subworkflows/vcf_phase_shapeit5" + tag "vcf_phase_shapeit5" + + tag "glimpse2" + tag "glimpse2/chunk" + tag "shapeit5" + tag "shapeit5/phasecommon" + tag "shapeit5/ligate" + tag "bcftools" + tag "bcftools/index" + + test("Phase vcf with regions, no map, no ref, no scaffold, recursive model") { + when { + params { + max_cpus = 2 + max_memory = '2.GB' + } + workflow { + """ + input[0] = Channel.of( + [ + [id: "1000GP", chr: "chr22"], + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz.csi", checkIfExist:true), + [] + ], + [ + [id: "1000GP", chr: "chr21"], + file(params.pipelines_testdata_base_path + "hum_data/panel/chr21/1000GP.chr21.s.norel.vcf.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/panel/chr21/1000GP.chr21.s.norel.vcf.gz.csi", checkIfExist:true), + [] + ] + ) + input[1] = Channel.of( + [[chr: "chr22", region:"chr22:16570000-16610000"], "chr22:16570000-16610000"], + [[chr: "chr21", region:"chr21:16570000-16610000"], "chr21:16570000-16610000"] + ) + input[2] = Channel.of([[],[],[]]).collect() + input[3] = Channel.of([[],[],[]]).collect() + input[4] = Channel.of( + [[chr: "chr22"],[]], + [[chr: "chr21"], []] + ) + input[5] = "recursive" + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.versions, + workflow.out.vcf_tbi.collect{[ + it[0], + path(it[1]).getFileName().toString(), + path(it[2]).getFileName().toString() + ] }, + workflow.out.vcf_tbi.collect{ + path(it[1]).vcf.summary + } + ).match() }, + { workflow.out.vcf_tbi.collect{ + assert path(it[1]).vcf.phased + }} + ) + } + } + + test("Phase vcf with regions, no map, no ref, no scaffold, sequential model") { + when { + params { + max_cpus = 2 + max_memory = '2.GB' + } + workflow { + """ + input[0] = Channel.of( + [ + [id: "1000GP", chr: "chr22"], + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz.csi", checkIfExist:true), + [] + ], + [ + [id: "1000GP", chr: "chr21"], + file(params.pipelines_testdata_base_path + "hum_data/panel/chr21/1000GP.chr21.s.norel.vcf.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/panel/chr21/1000GP.chr21.s.norel.vcf.gz.csi", checkIfExist:true), + [] + ] + ) + input[1] = Channel.of( + [[chr: "chr22", region:"chr22:16570000-16610000"], "chr22:16570000-16610000"], + [[chr: "chr21", region:"chr21:16570000-16610000"], "chr21:16570000-16610000"] + ) + input[2] = Channel.of([[],[],[]]).collect() + input[3] = Channel.of([[],[],[]]).collect() + input[4] = Channel.of( + [[chr: "chr22"],[]], + [[chr: "chr21"], []] + ) + input[5] = "sequential" + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.versions, + workflow.out.vcf_tbi.collect{[ + it[0], + path(it[1]).getFileName().toString(), + path(it[2]).getFileName().toString() + ] }, + workflow.out.vcf_tbi.collect{ + path(it[1]).vcf.summary + } + ).match() }, + { workflow.out.vcf_tbi.collect{ + assert path(it[1]).vcf.phased + }} + ) + } + } + + /* TODO: Fix this test with https://github.com/odelaneau/shapeit5/issues/96 + test("Phase vcf with regions, with map, no ref, no scaffold") { + when { + params { + max_cpus = 2 + max_memory = '2.GB' + } + workflow { + """ + input[0] = Channel.of( + [ + [id: "1000GP", chr: "chr22"], + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz.csi", checkIfExist:true), + [] + ], + [ + [id: "1000GP", chr: "chr21"], + file(params.pipelines_testdata_base_path + "hum_data/panel/chr21/1000GP.chr21.s.norel.vcf.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/panel/chr21/1000GP.chr21.s.norel.vcf.gz.csi", checkIfExist:true), + [] + ] + ) + input[1] = Channel.of( + [[chr: "chr22", region:"chr22:16570000-16610000"], "chr22:16570000-16610000"], + [[chr: "chr21", region:"chr21:16570000-16610000"], "chr21:16570000-16610000"] + ) + input[2] = Channel.of([[],[],[]]).collect() + input[3] = Channel.of([[],[],[]]).collect() + input[4] = Channel.of( + [ [chr: "chr22"], file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38_22.map", checkIfExist:true)], + [ [chr: "chr21"], file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38_21.map", checkIfExist:true)] + ) + input[5] = "recursive" + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() }, + { assert snapshot(workflow.out.vcf_tbi.collect{ + path(it[1]).vcf.summary + }).match("Phasing content with map") + } + ) + } + }*/ +} diff --git a/subworkflows/local/vcf_phase_shapeit5/tests/main.nf.test.snap b/subworkflows/local/vcf_phase_shapeit5/tests/main.nf.test.snap new file mode 100644 index 00000000..5a00a776 --- /dev/null +++ b/subworkflows/local/vcf_phase_shapeit5/tests/main.nf.test.snap @@ -0,0 +1,78 @@ +{ + "Phase vcf with regions, no map, no ref, no scaffold, sequential model": { + "content": [ + [ + "versions.yml:md5,529c03b8d921c72026e91d71c0321811", + "versions.yml:md5,714bb0db6e2d39cf0042359a64915bc6", + "versions.yml:md5,8ffcda8a9d22f60c90d0e4276da3e714", + "versions.yml:md5,cab7592ebcb3d391afcd3191a175723b", + "versions.yml:md5,ed131d2608f28f8ada06ccc42717575e" + ], + [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "1000GP.vcf.gz", + "1000GP.vcf.gz.csi" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "1000GP.vcf.gz", + "1000GP.vcf.gz.csi" + ] + ], + [ + "VcfFile [chromosomes=[chr21], sampleCount=3196, variantCount=836, phased=true, phasedAutodetect=true]", + "VcfFile [chromosomes=[chr22], sampleCount=3196, variantCount=903, phased=true, phasedAutodetect=true]" + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-12T14:46:00.916017807" + }, + "Phase vcf with regions, no map, no ref, no scaffold, recursive model": { + "content": [ + [ + "versions.yml:md5,529c03b8d921c72026e91d71c0321811", + "versions.yml:md5,714bb0db6e2d39cf0042359a64915bc6", + "versions.yml:md5,8ffcda8a9d22f60c90d0e4276da3e714", + "versions.yml:md5,cab7592ebcb3d391afcd3191a175723b", + "versions.yml:md5,ed131d2608f28f8ada06ccc42717575e" + ], + [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "1000GP.vcf.gz", + "1000GP.vcf.gz.csi" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "1000GP.vcf.gz", + "1000GP.vcf.gz.csi" + ] + ], + [ + "VcfFile [chromosomes=[chr21], sampleCount=3196, variantCount=836, phased=true, phasedAutodetect=true]", + "VcfFile [chromosomes=[chr22], sampleCount=3196, variantCount=903, phased=true, phasedAutodetect=true]" + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-12T14:45:28.783070136" + } +} \ No newline at end of file diff --git a/subworkflows/local/vcf_phase_shapeit5/tests/nextflow.config b/subworkflows/local/vcf_phase_shapeit5/tests/nextflow.config new file mode 100644 index 00000000..c0c00d6f --- /dev/null +++ b/subworkflows/local/vcf_phase_shapeit5/tests/nextflow.config @@ -0,0 +1,10 @@ +process { + withName: GLIMPSE2_CHUNK { + ext.prefix = { "${meta.id}_chunks" } + ext.args = ["--window-mb 0.01", "--window-cm 0.01", "--window-count 200", "--buffer-mb 0.005", "--buffer-cm 0.005", "--buffer-count 30"].join(' ') + } + + withName: SHAPEIT5_PHASECOMMON { + ext.prefix = { "${meta.id}_${meta.chunk.replace(':',"_")}_chunks" } + } +} diff --git a/subworkflows/local/vcf_phase_shapeit5/tests/tags.yml b/subworkflows/local/vcf_phase_shapeit5/tests/tags.yml new file mode 100644 index 00000000..7da3753c --- /dev/null +++ b/subworkflows/local/vcf_phase_shapeit5/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/vcf_phase_shapeit5: + - subworkflows/local/vcf_phase_shapeit5/** diff --git a/subworkflows/local/vcf_sites_extract_bcftools/main.nf b/subworkflows/local/vcf_sites_extract_bcftools/main.nf new file mode 100644 index 00000000..c94dd95e --- /dev/null +++ b/subworkflows/local/vcf_sites_extract_bcftools/main.nf @@ -0,0 +1,31 @@ +include { BCFTOOLS_CONVERT } from '../../../modules/nf-core/bcftools/convert' +include { BCFTOOLS_VIEW } from '../../../modules/nf-core/bcftools/view' + +workflow VCF_SITES_EXTRACT_BCFTOOLS { + take: + ch_vcf // channel: [ [id, chr], vcf, index ] + ch_fasta // channel: [ [genome], fasta, fai ] + + main: + + ch_versions = Channel.empty() + ch_fasta = ch_fasta.map { meta, fasta, _fai -> [meta, fasta] } + + // Convert VCF to Hap and Legend files + BCFTOOLS_CONVERT(ch_vcf, ch_fasta, []) + ch_versions = ch_versions.mix(BCFTOOLS_CONVERT.out.versions) + + // Extract sites positions + BCFTOOLS_VIEW(ch_vcf, [], [], []) + ch_versions = ch_versions.mix(BCFTOOLS_VIEW.out.versions.first()) + + // Join extracted sites and index + ch_posfile = BCFTOOLS_VIEW.out.vcf + .join(BCFTOOLS_VIEW.out.tbi) + .join(BCFTOOLS_CONVERT.out.hap) + .join(BCFTOOLS_CONVERT.out.legend) + + emit: + posfile = ch_posfile // channel: [ [id, chr], vcf, csi, hap, legend ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/vcf_sites_extract_bcftools/meta.yml b/subworkflows/local/vcf_sites_extract_bcftools/meta.yml new file mode 100644 index 00000000..5912b909 --- /dev/null +++ b/subworkflows/local/vcf_sites_extract_bcftools/meta.yml @@ -0,0 +1,38 @@ +name: "vcf_sites_extract_bcftools" +description: Extract only sites (no genotype data) (bcftools view -G -m 2 -M 2 -v). + Convert to glimpse1 TSV format (bcftools query -f'%CHROM\t%POS\t%REF,%ALT\n'). + Convert to stitch/quilt TSV format (bcftools query -f'%CHROM\t%POS\t%REF\t%ALT\n'). +keywords: + - bcftools + - query + - view +components: + - bcftools/view + - bcftools/query +input: + - ch_vcf: + type: file + description: | + Reference panel of haplotypes in VCF/BCF format. + Index file of the Reference panel file. + Target region, usually a full chromosome (e.g. chr20:1000000-2000000 or chr20). + The file could possibly be without GT field (for efficiency reasons a file containing only the positions is recommended). + Structure: [ meta, vcf, csi, region ] +output: + - vcf_tbi: + type: file + description: | + Output VCF/BCF file for the normalized, only biallelic SNPs. + Structure: [meta, vcf, tbi] + - panel_tsv: + type: file + description: | + Compressed panel TSV and index for Glimpse1. + - panel_sites: + type: file + description: | + Panel VCF and index for Glimpse1. + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" diff --git a/subworkflows/local/vcf_sites_extract_bcftools/tests/main.nf.test b/subworkflows/local/vcf_sites_extract_bcftools/tests/main.nf.test new file mode 100644 index 00000000..84b98163 --- /dev/null +++ b/subworkflows/local/vcf_sites_extract_bcftools/tests/main.nf.test @@ -0,0 +1,65 @@ +nextflow_workflow { + + name "Test Subworkflow VCF_SITES_EXTRACT_BCFTOOLS" + script "../main.nf" + + config "./nextflow.config" + + workflow "VCF_SITES_EXTRACT_BCFTOOLS" + + tag "subworkflows" + tag "subworkflows_local" + tag "subworkflows/vcf_sites_extract_bcftools" + tag "vcf_sites_extract_bcftools" + + tag "bcftools" + tag "bcftools/convert" + tag "bcftools/view" + + + test("Extract sites") { + when { + params { + max_cpus = 2 + max_memory = '2.GB' + } + workflow { + """ + input[0] = Channel.of( + [ + [id: "1000GP", chr: "chr22"], + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz.csi", checkIfExist:true), + ], + [ + [id: "1000GP", chr: "chr21"], + file(params.pipelines_testdata_base_path + "hum_data/panel/chr21/1000GP.chr21.s.norel.vcf.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/panel/chr21/1000GP.chr21.s.norel.vcf.gz.csi", checkIfExist:true), + ] + ) + input[1] = Channel.of([ + [id: "GRCh38"], + file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz.fai", checkIfExist:true), + ]).collect() + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() }, + { assert snapshot(workflow.out.posfile.collect{ + path(it[1]).vcf.summary + }).match("Extract sites content") + }, + { assert snapshot(workflow.out.posfile.collect{[ + path(it[3]).linesGzip[5], + path(it[4]).linesGzip[5] + ]}).match("Hap legend content") + } + ) + } + } +} diff --git a/subworkflows/local/vcf_sites_extract_bcftools/tests/main.nf.test.snap b/subworkflows/local/vcf_sites_extract_bcftools/tests/main.nf.test.snap new file mode 100644 index 00000000..f16c41b7 --- /dev/null +++ b/subworkflows/local/vcf_sites_extract_bcftools/tests/main.nf.test.snap @@ -0,0 +1,99 @@ +{ + "Extract sites content": { + "content": [ + [ + "VcfFile [chromosomes=[chr21], sampleCount=0, variantCount=836, phased=true, phasedAutodetect=true]", + "VcfFile [chromosomes=[chr22], sampleCount=0, variantCount=903, phased=true, phasedAutodetect=true]" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-18T18:40:15.082501255" + }, + "Extract sites": { + "content": [ + { + "0": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "1000GP_chr21_glimpse1_sites.vcf.gz:md5,9eff24bfee06f8fe274922ad06ccaf59", + "1000GP_chr21_glimpse1_sites.vcf.gz.tbi:md5,92f4dca4b3ed85b53e88c2ce6caa1ba4", + "1000GP_chr21.hap.gz:md5,64d7fdfc96ce79848cf9c3c220e6774f", + "1000GP_chr21.legend.gz:md5,3f7edc4c153521a0871c9d51f7da2372" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "1000GP_chr22_glimpse1_sites.vcf.gz:md5,2bd01ad6efeb5e33fc9cc0fb59c5ca4d", + "1000GP_chr22_glimpse1_sites.vcf.gz.tbi:md5,89c2c5662e869750dbe0044993b66cb3", + "1000GP_chr22.hap.gz:md5,9500b831dcfee86a98577dde9e2cce73", + "1000GP_chr22.legend.gz:md5,82f6f97f61e68b4872cc14c0125ff037" + ] + ], + "1": [ + "versions.yml:md5,4200660d750728ce3b7ee1f3bce1e18e", + "versions.yml:md5,4200660d750728ce3b7ee1f3bce1e18e", + "versions.yml:md5,a68010a14b99e45c800704d0ebc37d0d" + ], + "posfile": [ + [ + { + "id": "1000GP", + "chr": "chr21" + }, + "1000GP_chr21_glimpse1_sites.vcf.gz:md5,9eff24bfee06f8fe274922ad06ccaf59", + "1000GP_chr21_glimpse1_sites.vcf.gz.tbi:md5,92f4dca4b3ed85b53e88c2ce6caa1ba4", + "1000GP_chr21.hap.gz:md5,64d7fdfc96ce79848cf9c3c220e6774f", + "1000GP_chr21.legend.gz:md5,3f7edc4c153521a0871c9d51f7da2372" + ], + [ + { + "id": "1000GP", + "chr": "chr22" + }, + "1000GP_chr22_glimpse1_sites.vcf.gz:md5,2bd01ad6efeb5e33fc9cc0fb59c5ca4d", + "1000GP_chr22_glimpse1_sites.vcf.gz.tbi:md5,89c2c5662e869750dbe0044993b66cb3", + "1000GP_chr22.hap.gz:md5,9500b831dcfee86a98577dde9e2cce73", + "1000GP_chr22.legend.gz:md5,82f6f97f61e68b4872cc14c0125ff037" + ] + ], + "versions": [ + "versions.yml:md5,4200660d750728ce3b7ee1f3bce1e18e", + "versions.yml:md5,4200660d750728ce3b7ee1f3bce1e18e", + "versions.yml:md5,a68010a14b99e45c800704d0ebc37d0d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-18T18:40:13.586493339" + }, + "Hap legend content": { + "content": [ + [ + [ + "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0", + "chr21:16570144_C_T 16570144 C T" + ], + [ + "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0", + "chr22:16570267_T_C 16570267 T C" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-18T18:40:16.758706342" + } +} \ No newline at end of file diff --git a/subworkflows/local/vcf_sites_extract_bcftools/tests/nextflow.config b/subworkflows/local/vcf_sites_extract_bcftools/tests/nextflow.config new file mode 100644 index 00000000..5d69712d --- /dev/null +++ b/subworkflows/local/vcf_sites_extract_bcftools/tests/nextflow.config @@ -0,0 +1,18 @@ +process { + withName: BCFTOOLS_CONVERT { + ext.args = {"--haplegendsample ${meta.id}_${meta.chr}"} + } + + withName: BCFTOOLS_VIEW { + ext.args = [ + "-G", + "-m 2", + "-M 2", + "-v snps", + "--output-type z", + "--write-index=tbi", + "--no-version" + ].join(' ') + ext.prefix = { "${meta.id}_${meta.chr}_glimpse1_sites" } + } +} diff --git a/subworkflows/local/vcf_sites_extract_bcftools/tests/tags.yml b/subworkflows/local/vcf_sites_extract_bcftools/tests/tags.yml new file mode 100644 index 00000000..2787f51d --- /dev/null +++ b/subworkflows/local/vcf_sites_extract_bcftools/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/vcf_sites_extract_bcftools: + - subworkflows/local/vcf_sites_extract_bcftools/** diff --git a/subworkflows/local/vcf_split_bcftools/main.nf b/subworkflows/local/vcf_split_bcftools/main.nf new file mode 100644 index 00000000..72832214 --- /dev/null +++ b/subworkflows/local/vcf_split_bcftools/main.nf @@ -0,0 +1,29 @@ +include { BCFTOOLS_PLUGINSPLIT } from '../../../modules/nf-core/bcftools/pluginsplit' + +workflow VCF_SPLIT_BCFTOOLS { + take: + ch_vcf // channel: [ [id, chr, tools], vcf, index, samples ] + + main: + + ch_versions = Channel.empty() + + BCFTOOLS_PLUGINSPLIT(ch_vcf, [], [], []) + ch_versions = ch_versions.mix(BCFTOOLS_PLUGINSPLIT.out.versions.first()) + + ch_vcf_samples = BCFTOOLS_PLUGINSPLIT.out.vcf + .transpose() + .map{metaITC, vcf -> [metaITC + [id: vcf.getBaseName().tokenize(".")[0]], vcf]} + + ch_tbi_samples = BCFTOOLS_PLUGINSPLIT.out.tbi + .transpose() + .map{metaITC, tbi -> [metaITC + [id: tbi.getBaseName().tokenize(".")[0]], tbi]} + + ch_vcf_tbi_samples = ch_vcf_samples + .join(ch_tbi_samples) + + emit: + vcf_tbi = ch_vcf_tbi_samples // channel: [ [id, chr, tools], vcf, index ] + versions = ch_versions // channel: [ versions.yml ] + +} diff --git a/subworkflows/local/vcf_split_bcftools/tests/main.nf.test b/subworkflows/local/vcf_split_bcftools/tests/main.nf.test new file mode 100644 index 00000000..b85b2be9 --- /dev/null +++ b/subworkflows/local/vcf_split_bcftools/tests/main.nf.test @@ -0,0 +1,130 @@ +nextflow_workflow { + + name "Test Subworkflow VCF_SPLIT_BCFTOOLS" + script "../main.nf" + + config "./nextflow.config" + + workflow "VCF_SPLIT_BCFTOOLS" + + tag "subworkflows" + tag "subworkflows_local" + tag "subworkflows/vcf_split_bcftools" + tag "vcf_split_bcftools" + + tag "bcftools" + tag "bcftools/split" + + test("Split multiple vcf file - with renaming") { + setup { + run("BCFTOOLS_MERGE") { + script "../../../../modules/nf-core/bcftools/merge/main.nf" + process { + """ + input[0] = Channel.of( + [ + [id: "allSamples.batch0"], + [file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s_imputed.bcf", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA19401/NA19401.s_imputed.bcf", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA20359/NA20359.s_imputed.bcf", checkIfExist:true)], + [file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s_imputed.bcf.csi", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA19401/NA19401.s_imputed.bcf.csi", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA20359/NA20359.s_imputed.bcf.csi", checkIfExist:true)], + [] + ] + ) + input[1] = Channel.of([ + [id: "GRCh38"], + file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz.fai", checkIfExist:true), + ]) + """ + } + } + } + when { + workflow { + """ + renaming_file = channel.of( + "NA12878\tNA12878_test NA12878.myfile", + "NA19401\t-\tNA19401", + "NA20359\tNA20359_2\tNA20359_3" + ).collectFile(name: "samples.txt", newLine: true) + input[0] = BCFTOOLS_MERGE.out.vcf.join(BCFTOOLS_MERGE.out.tbi).combine(renaming_file) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.versions, + workflow.out.vcf_tbi.collect{[ + it[0], + path(it[1]).getFileName().toString(), + path(it[2]).getFileName().toString() + ] }, + workflow.out.vcf_tbi.collect{ + path(it[1]).vcf.summary + } + ).match() }, + { workflow.out.vcf_tbi.collect{ + assert path(it[1]).vcf.sampleCount == 1 + }} + ) + } + } + + test("Split one sample vcf file") { + when { + workflow { + """ + input[0] = Channel.of([ + [id: 'NA12878'], + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s_imputed.bcf", checkIfExist:true), + file(params.pipelines_testdata_base_path + "hum_data/individuals/NA12878/NA12878.s_imputed.bcf.csi", checkIfExist:true), + [] + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.versions, + workflow.out.vcf_tbi.collect{[ + it[0], + path(it[1]).getFileName().toString(), + path(it[2]).getFileName().toString() + ] } + ).match() } + ) + } + } + + test("Split no sample vcf file") { + when { + workflow { + """ + input[0] = Channel.of([ + [id: 'dbsnp_146.hg38'], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz", checkIfExist:true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi", checkIfExist:true), + [] + ]) + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.errorReport.contains("No samples to split: input/dbsnp_146.hg38.vcf.gz") + } + ) + } + } +} diff --git a/subworkflows/local/vcf_split_bcftools/tests/main.nf.test.snap b/subworkflows/local/vcf_split_bcftools/tests/main.nf.test.snap new file mode 100644 index 00000000..5f1d9236 --- /dev/null +++ b/subworkflows/local/vcf_split_bcftools/tests/main.nf.test.snap @@ -0,0 +1,63 @@ +{ + "Split one sample vcf file": { + "content": [ + [ + "versions.yml:md5,6c3351d97e3a99f7a7a3231fc49f92e2" + ], + [ + [ + { + "id": "NA12878" + }, + "NA12878.vcf.gz", + "NA12878.vcf.gz.tbi" + ] + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-22T13:53:09.194659411" + }, + "Split multiple vcf file - with renaming": { + "content": [ + [ + "versions.yml:md5,6c3351d97e3a99f7a7a3231fc49f92e2" + ], + [ + [ + { + "id": "NA12878" + }, + "NA12878.myfile.vcf.gz", + "NA12878.myfile.vcf.gz.tbi" + ], + [ + { + "id": "NA19401" + }, + "NA19401.vcf.gz", + "NA19401.vcf.gz.tbi" + ], + [ + { + "id": "NA20359_3" + }, + "NA20359_3.vcf.gz", + "NA20359_3.vcf.gz.tbi" + ] + ], + [ + "VcfFile [chromosomes=[chr21, chr22], sampleCount=1, variantCount=1739, phased=true, phasedAutodetect=true]", + "VcfFile [chromosomes=[chr21, chr22], sampleCount=1, variantCount=1739, phased=true, phasedAutodetect=true]", + "VcfFile [chromosomes=[chr21, chr22], sampleCount=1, variantCount=1739, phased=true, phasedAutodetect=true]" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-22T13:53:01.127637055" + } +} diff --git a/subworkflows/local/vcf_split_bcftools/tests/nextflow.config b/subworkflows/local/vcf_split_bcftools/tests/nextflow.config new file mode 100644 index 00000000..a2282fbf --- /dev/null +++ b/subworkflows/local/vcf_split_bcftools/tests/nextflow.config @@ -0,0 +1,11 @@ +process { + resourceLimits = [cpus: 2, memory: '2.GB'] + + withName: BCFTOOLS_MERGE { + ext.args = ["--write-index=tbi", "--output-type z"].join(' ') + } + + withName: BCFTOOLS_PLUGINSPLIT { + ext.args = ["--write-index=tbi", "--output-type z"].join(' ') + } +} diff --git a/subworkflows/local/vcf_split_bcftools/tests/tags.yml b/subworkflows/local/vcf_split_bcftools/tests/tags.yml new file mode 100644 index 00000000..e4070711 --- /dev/null +++ b/subworkflows/local/vcf_split_bcftools/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/vcf_split_bcftools: + - subworkflows/local/vcf_split_bcftools/** diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf new file mode 100644 index 00000000..0fcbf7b3 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf @@ -0,0 +1,124 @@ +// +// Subworkflow with functionality that may be useful for any Nextflow pipeline +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW DEFINITION +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow UTILS_NEXTFLOW_PIPELINE { + take: + print_version // boolean: print version + dump_parameters // boolean: dump parameters + outdir // path: base directory used to publish pipeline results + check_conda_channels // boolean: check conda channels + + main: + + // + // Print workflow version and exit on --version + // + if (print_version) { + log.info("${workflow.manifest.name} ${getWorkflowVersion()}") + System.exit(0) + } + + // + // Dump pipeline parameters to a JSON file + // + if (dump_parameters && outdir) { + dumpParametersToJSON(outdir) + } + + // + // When running with Conda, warn if channels have not been set-up appropriately + // + if (check_conda_channels) { + checkCondaChannels() + } + + emit: + dummy_emit = true +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// Generate version string +// +def getWorkflowVersion() { + def version_string = "" as String + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Dump pipeline parameters to a JSON file +// +def dumpParametersToJSON(outdir) { + def timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss') + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") + def jsonStr = groovy.json.JsonOutput.toJson(params) + temp_pf.text = groovy.json.JsonOutput.prettyPrint(jsonStr) + + nextflow.extension.FilesEx.copyTo(temp_pf.toPath(), "${outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() +} + +// +// When running with -profile conda, warn if channels have not been set-up appropriately +// +def checkCondaChannels() { + def parser = new org.yaml.snakeyaml.Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } + catch (NullPointerException e) { + log.warn("Could not verify conda channel configuration.") + return null + } + catch (IOException e) { + log.warn("Could not verify conda channel configuration.") + return null + } + + // Check that all channels are present + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean + + // Check that they are in the right order + def channel_priority_violation = required_channels_in_order != channels.findAll { ch -> ch in required_channels_in_order } + + if (channels_missing | channel_priority_violation) { + log.warn """\ + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + There is a problem with your Conda configuration! + You will need to set-up the conda-forge and bioconda channels correctly. + Please refer to https://bioconda.github.io/ + The observed channel order is + ${channels} + but the following channel order is required: + ${required_channels_in_order} + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + """.stripIndent(true) + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml new file mode 100644 index 00000000..e5c3a0a8 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml @@ -0,0 +1,38 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NEXTFLOW_PIPELINE" +description: Subworkflow with functionality that may be useful for any Nextflow pipeline +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - print_version: + type: boolean + description: | + Print the version of the pipeline and exit + - dump_parameters: + type: boolean + description: | + Dump the parameters of the pipeline to a JSON file + - output_directory: + type: directory + description: Path to output dir to write JSON file to. + pattern: "results/" + - check_conda_channel: + type: boolean + description: | + Check if the conda channel priority is correct. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" + - "@drpatelh" +maintainers: + - "@adamrtalbot" + - "@drpatelh" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test new file mode 100644 index 00000000..68718e4f --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test @@ -0,0 +1,54 @@ + +nextflow_function { + + name "Test Functions" + script "subworkflows/nf-core/utils_nextflow_pipeline/main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Test Function getWorkflowVersion") { + + function "getWorkflowVersion" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dumpParametersToJSON") { + + function "dumpParametersToJSON" + + when { + function { + """ + // define inputs of the function here. Example: + input[0] = "$outputDir" + """.stripIndent() + } + } + + then { + assertAll( + { assert function.success } + ) + } + } + + test("Test Function checkCondaChannels") { + + function "checkCondaChannels" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 00000000..e3f0baf4 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,20 @@ +{ + "Test Function getWorkflowVersion": { + "content": [ + "v9.9.9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:05.308243" + }, + "Test Function checkCondaChannels": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:12.425833" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test new file mode 100644 index 00000000..ca964ce8 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,111 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NEXTFLOW_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + workflow "UTILS_NEXTFLOW_PIPELINE" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Should run no inputs") { + + when { + workflow { + """ + print_version = false + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should print version") { + + when { + workflow { + """ + print_version = true + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.stdout.contains("nextflow_workflow v9.9.9") } + ) + } + } + + test("Should dump params") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = 'results' + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should not create params JSON if no output directory") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = null + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config new file mode 100644 index 00000000..a09572e5 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml new file mode 100644 index 00000000..f8476112 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nextflow_pipeline: + - subworkflows/nf-core/utils_nextflow_pipeline/** diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf new file mode 100644 index 00000000..5cb7bafe --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -0,0 +1,462 @@ +// +// Subworkflow with utility functions specific to the nf-core pipeline template +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW DEFINITION +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow UTILS_NFCORE_PIPELINE { + take: + nextflow_cli_args + + main: + valid_config = checkConfigProvided() + checkProfileProvided(nextflow_cli_args) + + emit: + valid_config +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// Warn if a -profile or Nextflow config has not been provided to run the pipeline +// +def checkConfigProvided() { + def valid_config = true as Boolean + if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { + log.warn( + "[${workflow.manifest.name}] You are attempting to run the pipeline without any custom configuration!\n\n" + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + "Please refer to the quick start section and usage docs for the pipeline.\n " + ) + valid_config = false + } + return valid_config +} + +// +// Exit pipeline if --profile contains spaces +// +def checkProfileProvided(nextflow_cli_args) { + if (workflow.profile.endsWith(',')) { + error( + "The `-profile` option cannot end with a trailing comma, please remove it and re-run the pipeline!\n" + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + ) + } + if (nextflow_cli_args[0]) { + log.warn( + "nf-core pipelines do not accept positional arguments. The positional argument `${nextflow_cli_args[0]}` has been detected.\n" + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + ) + } +} + +// +// Citation string for pipeline +// +def workflowCitation() { + def temp_doi_ref = "" + def manifest_doi = workflow.manifest.doi.tokenize(",") + // Handling multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + manifest_doi.each { doi_ref -> + temp_doi_ref += " https://doi.org/${doi_ref.replace('https://doi.org/', '').replace(' ', '')}\n" + } + return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + "* The pipeline\n" + temp_doi_ref + "\n" + "* The nf-core framework\n" + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + "* Software dependencies\n" + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" +} + +// +// Generate workflow version string +// +def getWorkflowVersion() { + def version_string = "" as String + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Get software versions for pipeline +// +def processVersionsFromYAML(yaml_file) { + def yaml = new org.yaml.snakeyaml.Yaml() + def versions = yaml.load(yaml_file).collectEntries { k, v -> [k.tokenize(':')[-1], v] } + return yaml.dumpAsMap(versions).trim() +} + +// +// Get workflow version for pipeline +// +def workflowVersionToYAML() { + return """ + Workflow: + ${workflow.manifest.name}: ${getWorkflowVersion()} + Nextflow: ${workflow.nextflow.version} + """.stripIndent().trim() +} + +// +// Get channel of software versions used in pipeline in YAML format +// +def softwareVersionsToYAML(ch_versions) { + return ch_versions.unique().map { version -> processVersionsFromYAML(version) }.unique().mix(Channel.of(workflowVersionToYAML())) +} + +// +// Get workflow summary for MultiQC +// +def paramsSummaryMultiqc(summary_params) { + def summary_section = '' + summary_params + .keySet() + .each { group -> + def group_params = summary_params.get(group) + // This gets the parameters of that particular group + if (group_params) { + summary_section += "

    ${group}

    \n" + summary_section += "
    \n" + group_params + .keySet() + .sort() + .each { param -> + summary_section += "
    ${param}
    ${group_params.get(param) ?: 'N/A'}
    \n" + } + summary_section += "
    \n" + } + } + + def yaml_file_text = "id: '${workflow.manifest.name.replace('/', '-')}-summary'\n" as String + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" + + return yaml_file_text +} + +// +// nf-core logo +// +def nfCoreLogo(monochrome_logs=true) { + def colors = logColours(monochrome_logs) as Map + String.format( + """\n + ${dashedLine(monochrome_logs)} + ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} + ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} + ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} + ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} + ${colors.green}`._,._,\'${colors.reset} + ${colors.purple} ${workflow.manifest.name} ${getWorkflowVersion()}${colors.reset} + ${dashedLine(monochrome_logs)} + """.stripIndent() + ) +} + +// +// Return dashed line +// +def dashedLine(monochrome_logs=true) { + def colors = logColours(monochrome_logs) as Map + return "-${colors.dim}----------------------------------------------------${colors.reset}-" +} + +// +// ANSII colours used for terminal logging +// +def logColours(monochrome_logs=true) { + def colorcodes = [:] as Map + + // Reset / Meta + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" + colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" + colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" + colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" + + // Regular Colors + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + + // Bold + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + + // Underline + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + + // High Intensity + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + + // Bold High Intensity + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + + return colorcodes +} + +// +// Attach the multiqc report to email +// +def attachMultiqcReport(multiqc_report) { + def mqc_report = null + try { + if (workflow.success) { + mqc_report = multiqc_report.getVal() + if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { + if (mqc_report.size() > 1) { + log.warn("[${workflow.manifest.name}] Found multiple reports from process 'MULTIQC', will use only one") + } + mqc_report = mqc_report[0] + } + } + } + catch (Exception all) { + if (multiqc_report) { + log.warn("[${workflow.manifest.name}] Could not attach MultiQC report to summary email") + } + } + return mqc_report +} + +// +// Construct and send completion email +// +def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs=true, multiqc_report=null) { + + // Set up the e-mail variables + def subject = "[${workflow.manifest.name}] Successful: ${workflow.runName}" + if (!workflow.success) { + subject = "[${workflow.manifest.name}] FAILED: ${workflow.runName}" + } + + def summary = [:] + summary_params + .keySet() + .sort() + .each { group -> + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) { + misc_fields['Pipeline repository Git URL'] = workflow.repository + } + if (workflow.commitId) { + misc_fields['Pipeline repository Git Commit'] = workflow.commitId + } + if (workflow.revision) { + misc_fields['Pipeline Git branch/tag'] = workflow.revision + } + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + def email_fields = [:] + email_fields['version'] = getWorkflowVersion() + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + + // On success try attach the multiqc report + def mqc_report = attachMultiqcReport(multiqc_report) + + // Check if we are only sending emails on failure + def email_address = email + if (!email && email_on_fail && !workflow.success) { + email_address = email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("${workflow.projectDir}/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("${workflow.projectDir}/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit + def smail_fields = [email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes()] + def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + def colors = logColours(monochrome_logs) as Map + if (email_address) { + try { + if (plaintext_email) { +new org.codehaus.groovy.GroovyException('Send plaintext e-mail, not HTML') } + // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } + ['sendmail', '-t'].execute() << sendmail_html + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.green} Sent summary e-mail to ${email_address} (sendmail)-") + } + catch (Exception all) { + // Catch failures and try with plaintext + def mail_cmd = ['mail', '-s', subject, '--content-type=text/html', email_address] + mail_cmd.execute() << email_html + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.green} Sent summary e-mail to ${email_address} (mail)-") + } + } + + // Write summary e-mail HTML to a file + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + nextflow.extension.FilesEx.copyTo(output_hf.toPath(), "${outdir}/pipeline_info/pipeline_report.html") + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + nextflow.extension.FilesEx.copyTo(output_tf.toPath(), "${outdir}/pipeline_info/pipeline_report.txt") + output_tf.delete() +} + +// +// Print pipeline summary on completion +// +def completionSummary(monochrome_logs=true) { + def colors = logColours(monochrome_logs) as Map + if (workflow.success) { + if (workflow.stats.ignoredCount == 0) { + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.green} Pipeline completed successfully${colors.reset}-") + } + else { + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-") + } + } + else { + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.red} Pipeline completed with errors${colors.reset}-") + } +} + +// +// Construct and send a notification to a web server as JSON e.g. Microsoft Teams and Slack +// +def imNotification(summary_params, hook_url) { + def summary = [:] + summary_params + .keySet() + .sort() + .each { group -> + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) { + misc_fields['repository'] = workflow.repository + } + if (workflow.commitId) { + misc_fields['commitid'] = workflow.commitId + } + if (workflow.revision) { + misc_fields['revision'] = workflow.revision + } + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = getWorkflowVersion() + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + // Different JSON depending on the service provider + // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("${workflow.projectDir}/assets/${json_path}") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection() + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")) + def postRC = post.getResponseCode() + if (!postRC.equals(200)) { + log.warn(post.getErrorStream().getText()) + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml new file mode 100644 index 00000000..d08d2434 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml @@ -0,0 +1,24 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFCORE_PIPELINE" +description: Subworkflow with utility functions specific to the nf-core pipeline template +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - nextflow_cli_args: + type: list + description: | + Nextflow CLI positional arguments +output: + - success: + type: boolean + description: | + Dummy output to indicate success +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test new file mode 100644 index 00000000..1dc317f8 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test @@ -0,0 +1,134 @@ + +nextflow_function { + + name "Test Functions" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Test Function checkConfigProvided") { + + function "checkConfigProvided" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function checkProfileProvided") { + + function "checkProfileProvided" + + when { + function { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function workflowCitation") { + + function "workflowCitation" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function nfCoreLogo") { + + function "nfCoreLogo" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dashedLine") { + + function "dashedLine" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function without logColours") { + + function "logColours" + + when { + function { + """ + input[0] = true + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function with logColours") { + function "logColours" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 00000000..1037232c --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,166 @@ +{ + "Test Function checkProfileProvided": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:03.360873" + }, + "Test Function checkConfigProvided": { + "content": [ + true + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:59.729647" + }, + "Test Function nfCoreLogo": { + "content": [ + "\n\n-\u001b[2m----------------------------------------------------\u001b[0m-\n \u001b[0;32m,--.\u001b[0;30m/\u001b[0;32m,-.\u001b[0m\n\u001b[0;34m ___ __ __ __ ___ \u001b[0;32m/,-._.--~'\u001b[0m\n\u001b[0;34m |\\ | |__ __ / ` / \\ |__) |__ \u001b[0;33m} {\u001b[0m\n\u001b[0;34m | \\| | \\__, \\__/ | \\ |___ \u001b[0;32m\\`-._,-`-,\u001b[0m\n \u001b[0;32m`._,._,'\u001b[0m\n\u001b[0;35m nextflow_workflow v9.9.9\u001b[0m\n-\u001b[2m----------------------------------------------------\u001b[0m-\n" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:10.562934" + }, + "Test Function workflowCitation": { + "content": [ + "If you use nextflow_workflow for your analysis please cite:\n\n* The pipeline\n https://doi.org/10.5281/zenodo.5070524\n\n* The nf-core framework\n https://doi.org/10.1038/s41587-020-0439-x\n\n* Software dependencies\n https://github.com/nextflow_workflow/blob/master/CITATIONS.md" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:07.019761" + }, + "Test Function without logColours": { + "content": [ + { + "reset": "", + "bold": "", + "dim": "", + "underlined": "", + "blink": "", + "reverse": "", + "hidden": "", + "black": "", + "red": "", + "green": "", + "yellow": "", + "blue": "", + "purple": "", + "cyan": "", + "white": "", + "bblack": "", + "bred": "", + "bgreen": "", + "byellow": "", + "bblue": "", + "bpurple": "", + "bcyan": "", + "bwhite": "", + "ublack": "", + "ured": "", + "ugreen": "", + "uyellow": "", + "ublue": "", + "upurple": "", + "ucyan": "", + "uwhite": "", + "iblack": "", + "ired": "", + "igreen": "", + "iyellow": "", + "iblue": "", + "ipurple": "", + "icyan": "", + "iwhite": "", + "biblack": "", + "bired": "", + "bigreen": "", + "biyellow": "", + "biblue": "", + "bipurple": "", + "bicyan": "", + "biwhite": "" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:17.969323" + }, + "Test Function dashedLine": { + "content": [ + "-\u001b[2m----------------------------------------------------\u001b[0m-" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:14.366181" + }, + "Test Function with logColours": { + "content": [ + { + "reset": "\u001b[0m", + "bold": "\u001b[1m", + "dim": "\u001b[2m", + "underlined": "\u001b[4m", + "blink": "\u001b[5m", + "reverse": "\u001b[7m", + "hidden": "\u001b[8m", + "black": "\u001b[0;30m", + "red": "\u001b[0;31m", + "green": "\u001b[0;32m", + "yellow": "\u001b[0;33m", + "blue": "\u001b[0;34m", + "purple": "\u001b[0;35m", + "cyan": "\u001b[0;36m", + "white": "\u001b[0;37m", + "bblack": "\u001b[1;30m", + "bred": "\u001b[1;31m", + "bgreen": "\u001b[1;32m", + "byellow": "\u001b[1;33m", + "bblue": "\u001b[1;34m", + "bpurple": "\u001b[1;35m", + "bcyan": "\u001b[1;36m", + "bwhite": "\u001b[1;37m", + "ublack": "\u001b[4;30m", + "ured": "\u001b[4;31m", + "ugreen": "\u001b[4;32m", + "uyellow": "\u001b[4;33m", + "ublue": "\u001b[4;34m", + "upurple": "\u001b[4;35m", + "ucyan": "\u001b[4;36m", + "uwhite": "\u001b[4;37m", + "iblack": "\u001b[0;90m", + "ired": "\u001b[0;91m", + "igreen": "\u001b[0;92m", + "iyellow": "\u001b[0;93m", + "iblue": "\u001b[0;94m", + "ipurple": "\u001b[0;95m", + "icyan": "\u001b[0;96m", + "iwhite": "\u001b[0;97m", + "biblack": "\u001b[1;90m", + "bired": "\u001b[1;91m", + "bigreen": "\u001b[1;92m", + "biyellow": "\u001b[1;93m", + "biblue": "\u001b[1;94m", + "bipurple": "\u001b[1;95m", + "bicyan": "\u001b[1;96m", + "biwhite": "\u001b[1;97m" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:21.714424" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test new file mode 100644 index 00000000..8940d32d --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,29 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFCORE_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + workflow "UTILS_NFCORE_PIPELINE" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Should run without failures") { + + when { + workflow { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap new file mode 100644 index 00000000..859d1030 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap @@ -0,0 +1,19 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + true + ], + "valid_config": [ + true + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:25.726491" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config new file mode 100644 index 00000000..d0a926bf --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml new file mode 100644 index 00000000..ac8523c9 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfcore_pipeline: + - subworkflows/nf-core/utils_nfcore_pipeline/** diff --git a/subworkflows/nf-core/utils_nfschema_plugin/main.nf b/subworkflows/nf-core/utils_nfschema_plugin/main.nf new file mode 100644 index 00000000..4994303e --- /dev/null +++ b/subworkflows/nf-core/utils_nfschema_plugin/main.nf @@ -0,0 +1,46 @@ +// +// Subworkflow that uses the nf-schema plugin to validate parameters and render the parameter summary +// + +include { paramsSummaryLog } from 'plugin/nf-schema' +include { validateParameters } from 'plugin/nf-schema' + +workflow UTILS_NFSCHEMA_PLUGIN { + + take: + input_workflow // workflow: the workflow object used by nf-schema to get metadata from the workflow + validate_params // boolean: validate the parameters + parameters_schema // string: path to the parameters JSON schema. + // this has to be the same as the schema given to `validation.parametersSchema` + // when this input is empty it will automatically use the configured schema or + // "${projectDir}/nextflow_schema.json" as default. This input should not be empty + // for meta pipelines + + main: + + // + // Print parameter summary to stdout. This will display the parameters + // that differ from the default given in the JSON schema + // + if(parameters_schema) { + log.info paramsSummaryLog(input_workflow, parameters_schema:parameters_schema) + } else { + log.info paramsSummaryLog(input_workflow) + } + + // + // Validate the parameters using nextflow_schema.json or the schema + // given via the validation.parametersSchema configuration option + // + if(validate_params) { + if(parameters_schema) { + validateParameters(parameters_schema:parameters_schema) + } else { + validateParameters() + } + } + + emit: + dummy_emit = true +} + diff --git a/subworkflows/nf-core/utils_nfschema_plugin/meta.yml b/subworkflows/nf-core/utils_nfschema_plugin/meta.yml new file mode 100644 index 00000000..f7d9f028 --- /dev/null +++ b/subworkflows/nf-core/utils_nfschema_plugin/meta.yml @@ -0,0 +1,35 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "utils_nfschema_plugin" +description: Run nf-schema to validate parameters and create a summary of changed parameters +keywords: + - validation + - JSON schema + - plugin + - parameters + - summary +components: [] +input: + - input_workflow: + type: object + description: | + The workflow object of the used pipeline. + This object contains meta data used to create the params summary log + - validate_params: + type: boolean + description: Validate the parameters and error if invalid. + - parameters_schema: + type: string + description: | + Path to the parameters JSON schema. + This has to be the same as the schema given to the `validation.parametersSchema` config + option. When this input is empty it will automatically use the configured schema or + "${projectDir}/nextflow_schema.json" as default. The schema should not be given in this way + for meta pipelines. +output: + - dummy_emit: + type: boolean + description: Dummy emit to make nf-core subworkflows lint happy +authors: + - "@nvnieuwk" +maintainers: + - "@nvnieuwk" diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test new file mode 100644 index 00000000..8fb30164 --- /dev/null +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test @@ -0,0 +1,117 @@ +nextflow_workflow { + + name "Test Subworkflow UTILS_NFSCHEMA_PLUGIN" + script "../main.nf" + workflow "UTILS_NFSCHEMA_PLUGIN" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/utils_nfschema_plugin" + tag "plugin/nf-schema" + + config "./nextflow.config" + + test("Should run nothing") { + + when { + + params { + test_data = '' + } + + workflow { + """ + validate_params = false + input[0] = workflow + input[1] = validate_params + input[2] = "" + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should validate params") { + + when { + + params { + test_data = '' + outdir = null + } + + workflow { + """ + validate_params = true + input[0] = workflow + input[1] = validate_params + input[2] = "" + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.stdout.any { it.contains('ERROR ~ Validation of pipeline parameters failed!') } } + ) + } + } + + test("Should run nothing - custom schema") { + + when { + + params { + test_data = '' + } + + workflow { + """ + validate_params = false + input[0] = workflow + input[1] = validate_params + input[2] = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should validate params - custom schema") { + + when { + + params { + test_data = '' + outdir = null + } + + workflow { + """ + validate_params = true + input[0] = workflow + input[1] = validate_params + input[2] = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.stdout.any { it.contains('ERROR ~ Validation of pipeline parameters failed!') } } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config new file mode 100644 index 00000000..0907ac58 --- /dev/null +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config @@ -0,0 +1,8 @@ +plugins { + id "nf-schema@2.1.0" +} + +validation { + parametersSchema = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" + monochromeLogs = true +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json new file mode 100644 index 00000000..331e0d2f --- /dev/null +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json @@ -0,0 +1,96 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/./master/nextflow_schema.json", + "title": ". pipeline parameters", + "description": "", + "type": "object", + "$defs": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": ["outdir"], + "properties": { + "validate_params": { + "type": "boolean", + "description": "Validate parameters?", + "default": true, + "hidden": true + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open" + }, + "test_data_base": { + "type": "string", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/modules", + "description": "Base for test data directory", + "hidden": true + }, + "test_data": { + "type": "string", + "description": "Fake test data param", + "hidden": true + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "logo": { + "type": "boolean", + "default": true, + "description": "Display nf-core logo in console output.", + "fa_icon": "fas fa-image", + "hidden": true + }, + "singularity_pull_docker_container": { + "type": "boolean", + "description": "Pull Singularity container from Docker?", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Use monochrome_logs", + "hidden": true + } + } + } + }, + "allOf": [ + { + "$ref": "#/$defs/input_output_options" + }, + { + "$ref": "#/$defs/generic_options" + } + ] +} diff --git a/tests/config/env_nf.yml b/tests/config/env_nf.yml new file mode 100644 index 00000000..e3b11408 --- /dev/null +++ b/tests/config/env_nf.yml @@ -0,0 +1,13 @@ +name: env_nf +channels: + - conda-forge + - bioconda + - anaconda + - defaults +dependencies: + - openjdk>=17.0 + - nextflow>=23.10 + - singularity>=3.8 + - nf-core>=2.13.0 + - prettier>=3.0 + - nf-test>=0.8 diff --git a/tests/config/nf-test.config b/tests/config/nf-test.config new file mode 100644 index 00000000..deb46598 --- /dev/null +++ b/tests/config/nf-test.config @@ -0,0 +1,51 @@ +params { + publish_dir_mode = "copy" + modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' +} + +process { + resourceLimits = [ + cpus: 4, + memory: '3.GB', + time: '2.h' + ] +} + +profiles { + singularity { + singularity.enabled = true + singularity.autoMounts = true + } + conda { + conda.enabled = true + } + mamba { + conda.enabled = true + conda.useMamba = true + } + podman { + podman.enabled = true + podman.userEmulation = true + podman.runOptions = "--runtime crun --platform linux/x86_64 --systemd=always" + } + docker { + docker.enabled = true + docker.userEmulation = false + docker.fixOwnership = true + docker.runOptions = '--platform=linux/amd64 -u $(id -u):$(id -g)' + } +} + +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' + +// Increase time available to build Conda environment +conda { createTimeout = "120 min" } + +// Load test_data.config containing paths to test data +// includeConfig 'test_data.config' + +manifest { + nextflowVersion = '!>=23.04.0' +} diff --git a/tests/config/test_data.config b/tests/config/test_data.config new file mode 100644 index 00000000..d514c9c9 --- /dev/null +++ b/tests/config/test_data.config @@ -0,0 +1,729 @@ +// README: +// https://github.com/nf-core/test-datasets/blob/modules/README.md + +params { + // Base directory for test data + test_data_base = "https://raw.githubusercontent.com/nf-core/test-datasets/modules" + + test_data { + 'sarscov2' { + 'genome' { + genome_fasta = "${params.test_data_base}/data/genomics/sarscov2/genome/genome.fasta" + genome_fasta_gz = "${params.test_data_base}/data/genomics/sarscov2/genome/genome.fasta.gz" + genome_fasta_fai = "${params.test_data_base}/data/genomics/sarscov2/genome/genome.fasta.fai" + genome_fasta_txt_zst = "${params.test_data_base}/data/genomics/sarscov2/genome/genome.fasta.txt.zst" + genome_dict = "${params.test_data_base}/data/genomics/sarscov2/genome/genome.dict" + genome_gff3 = "${params.test_data_base}/data/genomics/sarscov2/genome/genome.gff3" + genome_gff3_gz = "${params.test_data_base}/data/genomics/sarscov2/genome/genome.gff3.gz" + genome_gtf = "${params.test_data_base}/data/genomics/sarscov2/genome/genome.gtf" + genome_paf = "${params.test_data_base}/data/genomics/sarscov2/genome/genome.paf" + genome_sizes = "${params.test_data_base}/data/genomics/sarscov2/genome/genome.sizes" + transcriptome_fasta = "${params.test_data_base}/data/genomics/sarscov2/genome/transcriptome.fasta" + proteome_fasta = "${params.test_data_base}/data/genomics/sarscov2/genome/proteome.fasta" + transcriptome_paf = "${params.test_data_base}/data/genomics/sarscov2/genome/transcriptome.paf" + + test_bed = "${params.test_data_base}/data/genomics/sarscov2/genome/bed/test.bed" + test_bed_gz = "${params.test_data_base}/data/genomics/sarscov2/genome/bed/test.bed.gz" + test2_bed = "${params.test_data_base}/data/genomics/sarscov2/genome/bed/test2.bed" + test_bed12 = "${params.test_data_base}/data/genomics/sarscov2/genome/bed/test.bed12" + baits_bed = "${params.test_data_base}/data/genomics/sarscov2/genome/bed/baits.bed" + bed_autosql = "${params.test_data_base}/data/genomics/sarscov2/genome/bed/bed6alt.as" + + reference_cnn = "${params.test_data_base}/data/genomics/sarscov2/genome/cnn/reference.cnn" + + kraken2 = "${params.test_data_base}/data/genomics/sarscov2/genome/db/kraken2" + kraken2_tar_gz = "${params.test_data_base}/data/genomics/sarscov2/genome/db/kraken2.tar.gz" + + kraken2_bracken = "${params.test_data_base}/data/genomics/sarscov2/genome/db/kraken2_bracken" + kraken2_bracken_tar_gz = "${params.test_data_base}/data/genomics/sarscov2/genome/db/kraken2_bracken.tar.gz" + + kaiju = "${params.test_data_base}/data/genomics/sarscov2/genome/db/kaiju" + kaiju_tar_gz = "${params.test_data_base}/data/genomics/sarscov2/genome/db/kaiju.tar.gz" + + kofamscan_profiles_tar_gz = "${params.test_data_base}/data/genomics/sarscov2/genome/db/kofamscan/profiles.tar.gz" + kofamscan_ko_list_gz = "${params.test_data_base}/data/genomics/sarscov2/genome/db/kofamscan/ko_list.gz" + + ncbi_taxmap_zip = "${params.test_data_base}/data/genomics/sarscov2/genome/db/maltextract/ncbi_taxmap.zip" + taxon_list_txt = "${params.test_data_base}/data/genomics/sarscov2/genome/db/maltextract/taxon_list.txt" + + mmseqs_tar_gz = "${params.test_data_base}/data/genomics/sarscov2/genome/db/mmseqs.tar.gz" + + all_sites_fas = "${params.test_data_base}/data/genomics/sarscov2/genome/alignment/all_sites.fas" + informative_sites_fas = "${params.test_data_base}/data/genomics/sarscov2/genome/alignment/informative_sites.fas" + + contigs_genome_maf_gz = "${params.test_data_base}/data/genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz" + contigs_genome_par = "${params.test_data_base}/data/genomics/sarscov2/genome/alignment/last/contigs.genome.par" + lastdb_tar_gz = "${params.test_data_base}/data/genomics/sarscov2/genome/alignment/last/lastdb.tar.gz" + + baits_interval_list = "${params.test_data_base}/data/genomics/sarscov2/genome/picard/baits.interval_list" + targets_interval_list = "${params.test_data_base}/data/genomics/sarscov2/genome/picard/targets.interval_list" + regions_txt = "${params.test_data_base}/data/genomics/sarscov2/genome/graphtyper/regions.txt" + } + 'illumina' { + test_single_end_bam = "${params.test_data_base}/data/genomics/sarscov2/illumina/bam/test.single_end.bam" + test_single_end_sorted_bam = "${params.test_data_base}/data/genomics/sarscov2/illumina/bam/test.single_end.sorted.bam" + test_single_end_sorted_bam_bai = "${params.test_data_base}/data/genomics/sarscov2/illumina/bam/test.single_end.sorted.bam.bai" + test_paired_end_bam = "${params.test_data_base}/data/genomics/sarscov2/illumina/bam/test.paired_end.bam" + test_paired_end_sorted_bam = "${params.test_data_base}/data/genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam" + test_paired_end_sorted_bam_bai = "${params.test_data_base}/data/genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai" + test_paired_end_methylated_bam = "${params.test_data_base}/data/genomics/sarscov2/illumina/bam/test.paired_end.methylated.bam" + test_paired_end_methylated_sorted_bam = "${params.test_data_base}/data/genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam" + test_paired_end_methylated_sorted_bam_bai = "${params.test_data_base}/data/genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam.bai" + test_unaligned_bam = "${params.test_data_base}/data/genomics/sarscov2/illumina/bam/test.unaligned.bam" + + test_1_fastq_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz" + test_2_fastq_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/fastq/test_2.fastq.gz" + test_interleaved_fastq_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz" + test_1_fastq_txt_zst = "${params.test_data_base}/data/genomics/sarscov2/illumina/fastq/test_1.fastq.txt.zst" + test2_1_fastq_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/fastq/test2_1.fastq.gz" + test2_2_fastq_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/fastq/test2_2.fastq.gz" + test_methylated_1_fastq_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/fastq/test.methylated_1.fastq.gz" + test_methylated_2_fastq_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/fastq/test.methylated_2.fastq.gz" + + test_bedgraph = "${params.test_data_base}/data/genomics/sarscov2/illumina/bedgraph/test.bedgraph" + + test_bigwig = "${params.test_data_base}/data/genomics/sarscov2/illumina/bigwig/test.bigwig" + + test_wig_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/wig/test.wig.gz" + + test_baserecalibrator_table = "${params.test_data_base}/data/genomics/sarscov2/illumina/gatk/test.baserecalibrator.table" + + test_computematrix_mat_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/deeptools/test.computeMatrix.mat.gz" + + test_bcf = "${params.test_data_base}/data/genomics/sarscov2/illumina/vcf/test.bcf" + + test_vcf = "${params.test_data_base}/data/genomics/sarscov2/illumina/vcf/test.vcf" + test_vcf_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/vcf/test.vcf.gz" + test_vcf_gz_tbi = "${params.test_data_base}/data/genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi" + test2_vcf = "${params.test_data_base}/data/genomics/sarscov2/illumina/vcf/test2.vcf" + test2_vcf_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/vcf/test2.vcf.gz" + test2_vcf_gz_tbi = "${params.test_data_base}/data/genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi" + test2_vcf_targets_tsv_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/vcf/test2.targets.tsv.gz" + test3_vcf = "${params.test_data_base}/data/genomics/sarscov2/illumina/vcf/test3.vcf" + test3_vcf_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/vcf/test3.vcf.gz" + test3_vcf_gz_tbi = "${params.test_data_base}/data/genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi" + + contigs_fasta = "${params.test_data_base}/data/genomics/sarscov2/illumina/fasta/contigs.fasta" + scaffolds_fasta = "${params.test_data_base}/data/genomics/sarscov2/illumina/fasta/scaffolds.fasta" + + assembly_gfa = "${params.test_data_base}/data/genomics/sarscov2/illumina/gfa/assembly.gfa" + assembly_gfa_bgz = "${params.test_data_base}/data/genomics/sarscov2/illumina/gfa/assembly.gfa.bgz" + assembly_gfa_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/gfa/assembly.gfa.gz" + assembly_gfa_zst = "${params.test_data_base}/data/genomics/sarscov2/illumina/gfa/assembly.gfa.zst" + + test_single_end_bam_readlist_txt = "${params.test_data_base}/data/genomics/sarscov2/illumina/picard/test.single_end.bam.readlist.txt" + + SRR13255544_tar_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/sra/SRR13255544.tar.gz" + SRR11140744_tar_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/sra/SRR11140744.tar.gz" + } + 'nanopore' { + test_sorted_bam = "${params.test_data_base}/data/genomics/sarscov2/nanopore/bam/test.sorted.bam" + test_sorted_bam_bai = "${params.test_data_base}/data/genomics/sarscov2/nanopore/bam/test.sorted.bam.bai" + + fast5_tar_gz = "${params.test_data_base}/data/genomics/sarscov2/nanopore/fast5/fast5.tar.gz" + + test_fastq_gz = "${params.test_data_base}/data/genomics/sarscov2/nanopore/fastq/test.fastq.gz" + + test_sequencing_summary = "${params.test_data_base}/data/genomics/sarscov2/nanopore/sequencing_summary/test.sequencing_summary.txt" + } + 'metagenome' { + classified_reads_assignment = "${params.test_data_base}/data/genomics/sarscov2/metagenome/test_1.kraken2.reads.txt" + kraken_report = "${params.test_data_base}/data/genomics/sarscov2/metagenome/test_1.kraken2.report.txt" + krona_taxonomy = "${params.test_data_base}/data/genomics/sarscov2/metagenome/krona_taxonomy.tab" + seqid2taxid_map = "${params.test_data_base}/data/genomics/sarscov2/metagenome/seqid2taxid.map" + nodes_dmp = "${params.test_data_base}/data/genomics/sarscov2/metagenome/nodes.dmp" + names_dmp = "${params.test_data_base}/data/genomics/sarscov2/metagenome/names.dmp" + } + } + 'mus_musculus' { + 'genome' { + rnaseq_samplesheet = "${params.test_data_base}/data/genomics/mus_musculus/rnaseq_expression/SRP254919.samplesheet.csv" + rnaseq_genemeta = "${params.test_data_base}/data/genomics/mus_musculus/rnaseq_expression/SRP254919.gene_meta.tsv" + rnaseq_contrasts = "${params.test_data_base}/data/genomics/mus_musculus/rnaseq_expression/SRP254919.contrasts.csv" + rnaseq_matrix = "${params.test_data_base}/data/genomics/mus_musculus/rnaseq_expression/SRP254919.salmon.merged.gene_counts.top1000cov.tsv" + deseq_results = "${params.test_data_base}/data/genomics/mus_musculus/rnaseq_expression/SRP254919.salmon.merged.deseq2.results.tsv" + } + 'illumina' { + test_1_fastq_gz = "${params.test_data_base}/data/genomics/mus_musculus/mageck/ERR376998.small.fastq.gz" + test_2_fastq_gz = "${params.test_data_base}/data/genomics/mus_musculus/mageck/ERR376999.small.fastq.gz" + } + 'csv' { + count_table = "${params.test_data_base}/data/genomics/mus_musculus/mageck/count_table.csv" + library = "${params.test_data_base}/data/genomics/mus_musculus/mageck/yusa_library.csv" + } + 'txt' { + design_matrix = "${params.test_data_base}/data/genomics/mus_musculus/mageck/design_matrix.txt" + } + } + 'homo_sapiens' { + '10xgenomics' { + cellranger { + test_10x_10k_pbmc_5fb_fastq_1_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc/fastqs/5gex/5fb/subsampled_sc5p_v2_hs_PBMC_10k_5fb_S1_L001_R1_001.fastq.gz" + test_10x_10k_pbmc_5fb_fastq_2_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc/fastqs/5gex/5fb/subsampled_sc5p_v2_hs_PBMC_10k_5fb_S1_L001_R2_001.fastq.gz" + test_10x_10k_pbmc_5gex_fastq_1_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc/fastqs/5gex/5gex/subsampled_sc5p_v2_hs_PBMC_10k_5gex_S1_L001_R1_001.fastq.gz" + test_10x_10k_pbmc_5gex_fastq_2_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc/fastqs/5gex/5gex/subsampled_sc5p_v2_hs_PBMC_10k_5gex_S1_L001_R2_001.fastq.gz" + test_10x_10k_pbmc_b_fastq_1_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc/fastqs/bcell/subsampled_sc5p_v2_hs_PBMC_10k_b_S1_L001_R1_001.fastq.gz" + test_10x_10k_pbmc_b_fastq_2_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc/fastqs/bcell/subsampled_sc5p_v2_hs_PBMC_10k_b_S1_L001_R2_001.fastq.gz" + test_10x_10k_pbmc_t_fastq_1_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc/fastqs/tcell/subsampled_sc5p_v2_hs_PBMC_10k_t_S1_L001_R1_001.fastq.gz" + test_10x_10k_pbmc_t_fastq_2_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc/fastqs/tcell/subsampled_sc5p_v2_hs_PBMC_10k_t_S1_L001_R2_001.fastq.gz" + test_10x_10k_pbmc_feature_ref_csv = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc/sc5p_v2_hs_PBMC_10k_multi_5gex_5fb_b_t_feature_ref.csv" + + test_10x_10k_pbmc_cmo_cmo_fastq_1_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc_cmo/fastqs/cmo/subsampled_SC3_v3_NextGem_DI_CellPlex_Human_PBMC_10K_1_multiplexing_capture_S1_L001_R1_001.fastq.gz" + test_10x_10k_pbmc_cmo_cmo_fastq_2_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc_cmo/fastqs/cmo/subsampled_SC3_v3_NextGem_DI_CellPlex_Human_PBMC_10K_1_multiplexing_capture_S1_L001_R2_001.fastq.gz" + test_10x_10k_pbmc_cmo_gex1_fastq_1_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc_cmo/fastqs/gex_1/subsampled_SC3_v3_NextGem_DI_CellPlex_Human_PBMC_10K_1_gex_S2_L001_R1_001.fastq.gz" + test_10x_10k_pbmc_cmo_gex1_fastq_2_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc_cmo/fastqs/gex_1/subsampled_SC3_v3_NextGem_DI_CellPlex_Human_PBMC_10K_1_gex_S2_L001_R2_001.fastq.gz" + test_10x_10k_pbmc_cmo_gex2_fastq_1_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc_cmo/fastqs/gex_2/subsampled_SC3_v3_NextGem_DI_CellPlex_Human_PBMC_10K_2_gex_S1_L001_R1_001.fastq.gz" + test_10x_10k_pbmc_cmo_gex2_fastq_2_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc_cmo/fastqs/gex_2/subsampled_SC3_v3_NextGem_DI_CellPlex_Human_PBMC_10K_2_gex_S1_L001_R2_001.fastq.gz" + test_10x_10k_pbmc_cmo_feature_ref_csv = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc_cmo/10k_pbmc_cmo_count_feature_reference.csv" + + test_10x_5k_cmvpos_tcells_ab_fastq_1_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/5k_cmvpos_tcells/fastqs/ab/subsampled_5k_human_antiCMV_T_TBNK_connect_AB_S2_L004_R1_001.fastq.gz" + test_10x_5k_cmvpos_tcells_ab_fastq_2_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/5k_cmvpos_tcells/fastqs/ab/subsampled_5k_human_antiCMV_T_TBNK_connect_AB_S2_L004_R2_001.fastq.gz" + test_10x_5k_cmvpos_tcells_gex1_fastq_1_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/5k_cmvpos_tcells/fastqs/gex_1/subsampled_5k_human_antiCMV_T_TBNK_connect_GEX_1_S1_L001_R1_001.fastq.gz" + test_10x_5k_cmvpos_tcells_gex1_fastq_2_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/5k_cmvpos_tcells/fastqs/gex_1/subsampled_5k_human_antiCMV_T_TBNK_connect_GEX_1_S1_L001_R2_001.fastq.gz" + test_10x_5k_cmvpos_tcells_vdj_fastq_1_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/5k_cmvpos_tcells/fastqs/vdj/subsampled_5k_human_antiCMV_T_TBNK_connect_VDJ_S1_L001_R1_001.fastq.gz" + test_10x_5k_cmvpos_tcells_vdj_fastq_2_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/5k_cmvpos_tcells/fastqs/vdj/subsampled_5k_human_antiCMV_T_TBNK_connect_VDJ_S1_L001_R2_001.fastq.gz" + test_10x_5k_cmvpos_tcells_feature_ref_csv = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/5k_cmvpos_tcells/5k_human_antiCMV_T_TBNK_connect_Multiplex_count_feature_reference.csv" + + test_10x_vdj_ref_json = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/references/vdj/refdata-cellranger-vdj-GRCh38-alts-ensembl-5.0.0/reference.json" + test_10x_vdj_ref_fasta = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/references/vdj/refdata-cellranger-vdj-GRCh38-alts-ensembl-5.0.0/fasta/regions.fa" + test_10x_vdj_ref_suppfasta = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger/references/vdj/refdata-cellranger-vdj-GRCh38-alts-ensembl-5.0.0/fasta/supp_regions.fa" + + test_scATAC_1_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger-atac/test_scATAC_S1_L001_R1_001.fastq.gz" + test_scATAC_2_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger-atac/test_scATAC_S1_L001_R2_001.fastq.gz" + test_scATAC_3_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger-atac/test_scATAC_S1_L001_R3_001.fastq.gz" + test_scATAC_I_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/cellranger-atac/test_scATAC_S1_L001_I1_001.fastq.gz" + } + spaceranger { + test_10x_ffpe_cytassist_fastq_1_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/spaceranger/human-brain-cancer-11-mm-capture-area-ffpe-2-standard_v2_ffpe_cytassist/CytAssist_11mm_FFPE_Human_Glioblastoma_2_S1_L001_R1_001.fastq.gz" + test_10x_ffpe_cytassist_fastq_2_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/spaceranger/human-brain-cancer-11-mm-capture-area-ffpe-2-standard_v2_ffpe_cytassist/CytAssist_11mm_FFPE_Human_Glioblastoma_2_S1_L001_R2_001.fastq.gz" + test_10x_ffpe_cytassist_image = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/spaceranger/human-brain-cancer-11-mm-capture-area-ffpe-2-standard_v2_ffpe_cytassist/CytAssist_11mm_FFPE_Human_Glioblastoma_image.tif" + test_10x_ffpe_cytassist_probeset = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/spaceranger/human-brain-cancer-11-mm-capture-area-ffpe-2-standard_v2_ffpe_cytassist/CytAssist_11mm_FFPE_Human_Glioblastoma_probe_set.csv" + + test_10x_ffpe_v1_fastq_1_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/spaceranger/human-ovarian-cancer-1-standard_v1_ffpe/Visium_FFPE_Human_Ovarian_Cancer_S1_L001_R1_001.fastq.gz" + test_10x_ffpe_v1_fastq_2_gz = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/spaceranger/human-ovarian-cancer-1-standard_v1_ffpe/Visium_FFPE_Human_Ovarian_Cancer_S1_L001_R2_001.fastq.gz" + test_10x_ffpe_v1_image = "${params.test_data_base}/data/genomics/homo_sapiens/10xgenomics/spaceranger/human-ovarian-cancer-1-standard_v1_ffpe/Visium_FFPE_Human_Ovarian_Cancer_image.jpg" + } + } + 'genome' { + genome_elfasta = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.elfasta" + genome_fasta = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.fasta" + genome_fasta_fai = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.fasta.fai" + genome_fasta_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.fasta.gz" + genome_fasta_gz_fai = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.fasta.gz.fai" + genome_fasta_gz_gzi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.fasta.gz.gzi" + genome_strtablefile = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome_strtablefile.zip" + genome_dict = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.dict" + genome_gff3 = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.gff3" + genome_gtf = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.gtf" + genome_interval_list = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.interval_list" + genome_multi_interval_bed = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.multi_intervals.bed" + genome_blacklist_interval_bed = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.blacklist_intervals.bed" + genome_sizes = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.sizes" + genome_bed = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.bed" + genome_header = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.header" + genome_bed_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.bed.gz" + genome_bed_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.bed.gz.tbi" + genome_elsites = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.elsites" + transcriptome_fasta = "${params.test_data_base}/data/genomics/homo_sapiens/genome/transcriptome.fasta" + genome2_fasta = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome2.fasta" + genome_chain_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.chain.gz" + genome_annotated_interval_tsv = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.annotated_intervals.tsv" + genome_mt_gb = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.NC_012920_1.gb" + genome_preprocessed_count_tsv = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.preprocessed_intervals.counts.tsv" + genome_preprocessed_interval_list = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.preprocessed_intervals.interval_list" + genome_ploidy_model = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.ploidy_model.tar.gz" + genome_ploidy_calls = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.ploidy_calls.tar.gz" + genome_germline_cnv_model = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.germline_cnv_model.tar.gz" + genome_germline_cnv_calls = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome.germline_cnv_calls.tar.gz" + genome_motifs = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome_motifs.txt" + genome_config = "${params.test_data_base}/data/genomics/homo_sapiens/genome/genome_config.json" + + genome_1_fasta = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr1/genome.fasta.gz" + genome_1_gtf = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr1/genome.gtf" + + genome_21_sdf = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/genome_sdf.tar.gz" + genome_21_fasta = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta" + genome_21_fasta_fai = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai" + genome_21_gencode_gtf = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/chr21_gencode.gtf" + genome_21_dict = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/genome.dict" + genome_21_sizes = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/genome.sizes" + genome_21_interval_list = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/genome.interval_list" + genome_21_annotated_bed = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/annotated.bed" + genome_21_multi_interval_bed = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed" + genome_21_multi_interval_antitarget_bed = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.antitarget.bed" + genome_21_multi_interval_bed_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed.gz" + genome_21_multi_interval_bed_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed.gz.tbi" + genome_21_chromosomes_dir = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/chromosomes.tar.gz" + genome_21_reference_cnn = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/reference_chr21.cnn" + genome_21_eigenstrat_snp = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/chr_21.snp" + genome_21_stitch_posfile = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/sequence/dbsnp_138.hg38.first_10_biallelic_sites.tsv" + + dbsnp_146_hg38_elsites = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.elsites" + dbsnp_146_hg38_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz" + dbsnp_146_hg38_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi" + gnomad_r2_1_1_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz" + gnomad_r2_1_1_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz.tbi" + mills_and_1000g_indels_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz" + mills_and_1000g_indels_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz.tbi" + syntheticvcf_short_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/syntheticvcf_short.vcf.gz" + syntheticvcf_short_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/syntheticvcf_short.vcf.gz.tbi" + syntheticvcf_short_score = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/syntheticvcf_short.score" + gnomad_r2_1_1_sv_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1-sv.vcf.gz" + gnomad2_r2_1_1_sv_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/gnomAD2.r2.1.1-sv.vcf.gz" + + hapmap_3_3_hg38_21_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/hapmap_3.3.hg38.vcf.gz" + hapmap_3_3_hg38_21_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/hapmap_3.3.hg38.vcf.gz.tbi" + res_1000g_omni2_5_hg38_21_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/1000G_omni2.5.hg38.vcf.gz" + res_1000g_omni2_5_hg38_21_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/1000G_omni2.5.hg38.vcf.gz.tbi" + res_1000g_phase1_snps_hg38_21_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/1000G_phase1.snps.hg38.vcf.gz" + res_1000g_phase1_snps_hg38_21_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/1000G_phase1.snps.hg38.vcf.gz.tbi" + dbsnp_138_hg38_21_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz" + dbsnp_138_hg38_21_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz.tbi" + gnomad_r2_1_1_21_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz" + gnomad_r2_1_1_21_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz.tbi" + mills_and_1000g_indels_21_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/mills_and_1000G.indels.hg38.vcf.gz" + mills_and_1000g_indels_21_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/mills_and_1000G.indels.hg38.vcf.gz.tbi" + haplotype_map = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/haplotype_map.txt" + dbNSFP_4_1a_21_hg38_txt_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/dbNSFP4.1a.21.txt.gz" + dbNSFP_4_1a_21_hg38_txt_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/dbNSFP4.1a.21.txt.gz.tbi" + + index_salmon = "${params.test_data_base}/data/genomics/homo_sapiens/genome/index/salmon" + repeat_expansions = "${params.test_data_base}/data/genomics/homo_sapiens/genome/loci/repeat_expansions.json" + justhusky_ped = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/ped/justhusky.ped" + justhusky_minimal_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/ped/justhusky_minimal.vcf.gz" + justhusky_minimal_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/ped/justhusky_minimal.vcf.gz.tbi" + + vcfanno_tar_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/vcfanno/vcfanno_grch38_module_test.tar.gz" + vcfanno_toml = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/vcfanno/vcfanno.toml" + updsites_bed = "${params.test_data_base}/data/genomics/homo_sapiens/genome/updsites.bed" + + prg_input = "${params.test_data_base}/data/genomics/homo_sapiens/genome/PRG_test.zip" + crispr_functional_counts = "${params.test_data_base}/data/genomics/homo_sapiens/genome/tsv/functional_genomics_counts.tsv" + crispr_functional_library = "${params.test_data_base}/data/genomics/homo_sapiens/genome/tsv/library_functional_genomics.tsv" + + vep_cache = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vep.tar.gz" + affy_array_samplesheet = "${params.test_data_base}/data/genomics/homo_sapiens/array_expression/GSE38751.csv" + affy_array_celfiles_tar = "${params.test_data_base}/data/genomics/homo_sapiens/array_expression/GSE38751_RAW.tar" + + } + 'pangenome' { + pangenome_fa = "${params.test_data_base}/data/pangenomics/homo_sapiens/pangenome.fa" + pangenome_fa_bgzip = "${params.test_data_base}/data/pangenomics/homo_sapiens/pangenome.fa.gz" + pangenome_fa_bgzip_fai = "${params.test_data_base}/data/pangenomics/homo_sapiens/pangenome.fa.gz.fai" + pangenome_fa_bgzip_gzi = "${params.test_data_base}/data/pangenomics/homo_sapiens/pangenome.fa.gz.gzi" + pangenome_paf = "${params.test_data_base}/data/pangenomics/homo_sapiens/pangenome.paf" + pangenome_paf_gz = "${params.test_data_base}/data/pangenomics/homo_sapiens/pangenome.paf.gz" + pangenome_seqwish_gfa = "${params.test_data_base}/data/pangenomics/homo_sapiens/pangenome.seqwish.gfa" + pangenome_smoothxg_gfa = "${params.test_data_base}/data/pangenomics/homo_sapiens/pangenome.smoothxg.gfa" + pangenome_gfaffix_gfa = "${params.test_data_base}/data/pangenomics/homo_sapiens/pangenome.gfaffix.gfa" + 'odgi' { + pangenome_og = "${params.test_data_base}/data/pangenomics/homo_sapiens/odgi/pangenome.og" + pangenome_lay = "${params.test_data_base}/data/pangenomics/homo_sapiens/odgi/pangenome.lay" + } + } + 'illumina' { + test_paired_end_sorted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam" + test_paired_end_sorted_bam_bai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai" + test_paired_end_name_sorted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test.paired_end.name.sorted.bam" + test_paired_end_markduplicates_sorted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.bam" + test_paired_end_markduplicates_sorted_bam_bai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.bam.bai" + test_paired_end_markduplicates_sorted_referencesn_txt = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.referencesn.txt" + test_paired_end_recalibrated_sorted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam" + test_paired_end_recalibrated_sorted_bam_bai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai" + test_paired_end_umi_consensus_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_consensus.bam" + test_paired_end_umi_converted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_converted.bam" + test_paired_end_umi_grouped_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_grouped.bam" + test_paired_end_umi_histogram_txt = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_histogram.txt" + test_paired_end_umi_unsorted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_unsorted.bam" + test_paired_end_umi_unsorted_tagged_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.unsorted_tagged.bam" + test_paired_end_hla = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/example_hla_pe.bam" + test_paired_end_hla_sorted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/example_hla_pe.sorted.bam" + test_paired_end_hla_sorted_bam_bai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/example_hla_pe.sorted.bam.bai" + test_rna_paired_end_sorted_chr6_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test.rna.paired_end.sorted.chr6.bam" + test_rna_paired_end_sorted_chr6_bam_bai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test.rna.paired_end.sorted.chr6.bam.bai" + + test2_paired_end_sorted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam" + test2_paired_end_sorted_bam_bai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam.bai" + test2_paired_end_name_sorted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.name.sorted.bam" + test2_paired_end_markduplicates_sorted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.markduplicates.sorted.bam" + test2_paired_end_markduplicates_sorted_bam_bai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.markduplicates.sorted.bam.bai" + test2_paired_end_recalibrated_sorted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam" + test2_paired_end_recalibrated_sorted_bam_bai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam.bai" + test2_paired_end_umi_consensus_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.umi_consensus.bam" + test2_paired_end_umi_converted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.umi_converted.bam" + test2_paired_end_umi_grouped_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.umi_grouped.bam" + test2_paired_end_umi_histogram_txt = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.umi_histogram.txt" + test2_paired_end_umi_unsorted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.umi_unsorted.bam" + test2_paired_end_umi_unsorted_tagged_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.unsorted_tagged.bam" + test_paired_end_duplex_umi_unmapped_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.duplex_umi_unmapped.bam" + test_paired_end_duplex_umi_mapped_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.duplex_umi_mapped.bam" + test_paired_end_duplex_umi_mapped_tagged_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.duplex_umi_mapped_tagged.bam" + test_paired_end_duplex_umi_grouped_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.duplex_umi_grouped.bam" + test_paired_end_duplex_umi_duplex_consensus_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.duplex_umi_duplex_consensus.bam" + + mitochon_standin_recalibrated_sorted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/mitochon_standin.recalibrated.sorted.bam" + mitochon_standin_recalibrated_sorted_bam_bai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/mitochon_standin.recalibrated.sorted.bam.bai" + test_illumina_mt_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test_illumina_mt.bam" + test_illumina_mt_bam_bai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test_illumina_mt.bam.bai" + + test3_single_end_markduplicates_sorted_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam" + + read_group_settings_txt = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bam/read_group_settings.txt" + + test_paired_end_sorted_cram = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram" + test_paired_end_sorted_cram_crai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai" + test_paired_end_markduplicates_sorted_cram = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test.paired_end.markduplicates.sorted.cram" + test_paired_end_markduplicates_sorted_cram_crai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test.paired_end.markduplicates.sorted.cram.crai" + test_paired_end_recalibrated_sorted_cram = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram" + test_paired_end_recalibrated_sorted_cram_crai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai" + + test2_paired_end_sorted_cram = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.sorted.cram" + test2_paired_end_sorted_cram_crai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.sorted.cram.crai" + test2_paired_end_markduplicates_sorted_cram = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.markduplicates.sorted.cram" + test2_paired_end_markduplicates_sorted_cram_crai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.markduplicates.sorted.cram.crai" + test2_paired_end_recalibrated_sorted_cram = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram" + test2_paired_end_recalibrated_sorted_cram_crai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram.crai" + test3_paired_end_recalibrated_sorted_cram = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test3.paired_end.recalibrated.sorted.cram" + test3_paired_end_recalibrated_sorted_cram_crai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/cram/test3.paired_end.recalibrated.sorted.cram.crai" + + test_1_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz" + test_2_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz" + test_umi_1_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/fastq/test.umi_1.fastq.gz" + test_umi_2_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/fastq/test.umi_2.fastq.gz" + test2_1_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/fastq/test2_1.fastq.gz" + test2_2_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/fastq/test2_2.fastq.gz" + test2_umi_1_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/fastq/test2.umi_1.fastq.gz" + test2_umi_2_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/fastq/test2.umi_2.fastq.gz" + test_rnaseq_1_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz" + test_rnaseq_2_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz" + test_paired_end_duplex_umi_1_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/fastq/test_duplex_umi_1.fastq.gz" + test_paired_end_duplex_umi_2_fastq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/fastq/test_duplex_umi_2.fastq.gz" + + test_baserecalibrator_table = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/test.baserecalibrator.table" + test2_baserecalibrator_table = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/test2.baserecalibrator.table" + test_pileups_table = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/test.pileups.table" + test2_pileups_table = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/test2.pileups.table" + + test_paired_end_sorted_dragstrmodel = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/test_paired_end_sorted_dragstrmodel.txt" + + test_genomicsdb_tar_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb.tar.gz" + test_pon_genomicsdb_tar_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/test_pon_genomicsdb.tar.gz" + + test2_haplotc_ann_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.ann.vcf.gz" + test2_haplotc_ann_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.ann.vcf.gz.tbi" + test_haplotc_cnn_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz" + test_haplotc_cnn_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz.tbi" + + test2_haplotc_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.vcf.gz" + test2_haplotc_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.vcf.gz.tbi" + + test2_recal = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2.recal" + test2_recal_idx = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2.recal.idx" + test2_tranches = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2.tranches" + test2_allele_specific_recal = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2_allele_specific.recal" + test2_allele_specific_recal_idx = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2_allele_specific.recal.idx" + test2_allele_specific_tranches = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2_allele_specific.tranches" + + test_test2_paired_mutect2_calls_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz" + test_test2_paired_mutect2_calls_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz.tbi" + test_test2_paired_mutect2_calls_vcf_gz_stats = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz.stats" + test_test2_paired_mutect2_calls_f1r2_tar_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.f1r2.tar.gz" + test_test2_paired_mutect2_calls_artifact_prior_tar_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/test_test2_paired_mutect2_calls.artifact-prior.tar.gz" + test_test2_paired_segmentation_table = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/test_test2_paired.segmentation.table" + test_test2_paired_contamination_table = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/test_test2_paired.contamination.table" + + test_genome_vcf = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gvcf/test.genome.vcf" + test_genome_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz" + test_genome_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi" + test_genome_vcf_idx = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.idx" + + test_genome_vcf_ud = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/svd/test.genome.vcf.UD" + test_genome_vcf_mu = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/svd/test.genome.vcf.mu" + test_genome_vcf_bed = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/svd/test.genome.vcf.bed" + + test2_genome_vcf = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf" + test2_genome_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.gz" + test2_genome_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.gz.tbi" + test2_genome_vcf_idx = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.idx" + + test_genome21_indels_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/test.genome_21.somatic_sv.vcf.gz" + test_genome21_indels_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/test.genome_21.somatic_sv.vcf.gz.tbi" + + test_mpileup = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/mpileup/test.mpileup.gz" + test2_mpileup = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/mpileup/test2.mpileup.gz" + + test_broadpeak = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/broadpeak/test.broadPeak" + test2_broadpeak = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/broadpeak/test2.broadPeak" + + test_narrowpeak = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/narrowpeak/test.narrowPeak" + test2_narrowpeak = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/narrowpeak/test2.narrowPeak" + + test_yak = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/yak/test.yak" + test2_yak = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/yak/test2.yak" + + cutandrun_bedgraph_test_1 = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bedgraph/cutandtag_h3k27me3_test_1.bedGraph" + cutandrun_bedgraph_test_2 = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bedgraph/cutandtag_igg_test_1.bedGraph" + + empty_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/empty.vcf.gz" + empty_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/empty.vcf.gz.tbi" + + simulated_sv = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz" + simulated_sv_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv.vcf.gz.tbi" + simulated_sv2 = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv2.vcf.gz" + simulated_sv2_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/chr21/simulated_sv2.vcf.gz.tbi" + + test_rnaseq_vcf = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/test.rnaseq.vcf" + test_sv_vcf = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/sv_query.vcf.gz" + test_sv_vcf_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/sv_query.vcf.gz.tbi" + genmod_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/genmod.vcf.gz" + genmod_annotate_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/test_annotate.vcf.gz" + genmod_models_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/test_models.vcf.gz" + genmod_score_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/test_score.vcf.gz" + + test_mito_vcf = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/vcf/NA12878_chrM.vcf.gz" + + test_pytor = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/pytor/test.pytor" + rank_model = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/genmod/svrank_model_-v1.8-.ini" + + test_flowcell = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bcl/flowcell.tar.gz" + test_flowcell_samplesheet = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bcl/flowcell_samplesheet.csv" + + varlociraptor_scenario = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/varlociraptor/scenario.yml" + + contig_ploidy_priors_table = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv" + + purecn_ex1_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/purecn/purecn_ex1.bam" + purecn_ex1_bai = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/purecn/purecn_ex1.bam.bai" + purecn_ex1_interval = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/purecn/purecn_ex1_intervals.txt" + purecn_ex1_normal = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/purecn/purecn_ex1_normal.txt.gz" + purecn_ex2_normal = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/purecn/purecn_ex2_normal.txt.gz" + purecn_normalpanel_vcf = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/purecn/purecn_normalpanel.vcf.gz" + purecn_normalpanel_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/purecn/purecn_normalpanel.vcf.gz.tbi" + } + 'pacbio' { + primers = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/fasta/primers.fasta" + alz = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/bam/alz.bam" + alzpbi = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/bam/alz.bam.pbi" + ccs = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/bam/alz.ccs.bam" + ccs_fa = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/fasta/alz.ccs.fasta" + ccs_fa_gz = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/fasta/alz.ccs.fasta.gz" + ccs_fq = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/fastq/alz.ccs.fastq" + ccs_fq_gz = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/fastq/alz.ccs.fastq.gz" + ccs_xml = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/xml/alz.ccs.consensusreadset.xml" + hifi = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/fastq/test_hifi.fastq.gz" + lima = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/bam/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.bam" + refine = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/bam/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.bam" + cluster = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/bam/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.clustered.bam" + singletons = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/bam/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.clustered.singletons.bam" + aligned = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/bam/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.clustered.singletons.merged.aligned.bam" + alignedbai = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/bam/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.clustered.singletons.merged.aligned.bam.bai" + genemodel1 = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/bed/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.clustered.singletons.merged.aligned_tc.bed" + genemodel2 = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/bed/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.clustered.singletons.merged.aligned_tc.2.bed" + filelist = "${params.test_data_base}/data/genomics/homo_sapiens/pacbio/txt/filelist.txt" + } + 'scramble' { + fasta = "${params.test_data_base}/data/genomics/homo_sapiens/scramble/test.fa" + fasta_fai = "${params.test_data_base}/data/genomics/homo_sapiens/scramble/test.fa.fai" + bam = "${params.test_data_base}/data/genomics/homo_sapiens/scramble/test.bam" + bam_bai = "${params.test_data_base}/data/genomics/homo_sapiens/scramble/test.bam.bai" + cram = "${params.test_data_base}/data/genomics/homo_sapiens/scramble/test.cram" + cram_crai = "${params.test_data_base}/data/genomics/homo_sapiens/scramble/test.cram.crai" + bed = "${params.test_data_base}/data/genomics/homo_sapiens/scramble/test.bed" + } + 'gene_set_analysis' { + gct = "${params.test_data_base}/data/genomics/homo_sapiens/gene_set_analysis/P53_6samples_collapsed_symbols.gct" + cls = "${params.test_data_base}/data/genomics/homo_sapiens/gene_set_analysis/P53_6samples.cls" + gmx = "${params.test_data_base}/data/genomics/homo_sapiens/gene_set_analysis/c1.symbols.reduced.gmx" + } + 'cnvkit' { + amplicon_cnr = "https://raw.githubusercontent.com/etal/cnvkit/v0.9.9/test/formats/amplicon.cnr" + amplicon_cns = "https://raw.githubusercontent.com/etal/cnvkit/v0.9.9/test/formats/amplicon.cns" + } + } + 'bacteroides_fragilis' { + 'genome' { + genome_fna_gz = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz" + genome_gbff_gz = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/genome/genome.gbff.gz" + genome_paf = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/genome/genome.paf" + genome_gff_gz = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/genome/genome.gff.gz" + + } + 'hamronization' { + genome_abricate_tsv = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/hamronization/genome.abricate.tsv" + genome_mapping_potential_arg = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/hamronization/genome.mapping.potential.ARG" + } + 'illumina' { + test1_contigs_fa_gz = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/illumina/fasta/test1.contigs.fa.gz" + test1_1_fastq_gz = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test1_1.fastq.gz" + test1_2_fastq_gz = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test1_2.fastq.gz" + test2_1_fastq_gz = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test2_1.fastq.gz" + test2_2_fastq_gz = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test2_2.fastq.gz" + test1_paired_end_bam = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/illumina/bam/test1.bam" + test1_paired_end_sorted_bam = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/illumina/bam/test1.sorted.bam" + test1_paired_end_sorted_bam_bai = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/illumina/bam/test1.sorted.bam.bai" + test2_paired_end_bam = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/illumina/bam/test2.bam" + test2_paired_end_sorted_bam = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/illumina/bam/test2.sorted.bam" + test2_paired_end_sorted_bam_bai = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/illumina/bam/test2.sorted.bam.bai" + } + 'nanopore' { + test_fastq_gz = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/nanopore/fastq/test.fastq.gz" + overlap_paf = "${params.test_data_base}/data/genomics/prokaryotes/bacteroides_fragilis/nanopore/overlap.paf" + } + } + 'candidatus_portiera_aleyrodidarum' { + 'genome' { + genome_fasta = "${params.test_data_base}/data/genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/genome.fasta" + genome_sizes = "${params.test_data_base}/data/genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/genome.sizes" + genome_aln_gz = "${params.test_data_base}/data/genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/genome.aln.gz" + genome_aln_nwk = "${params.test_data_base}/data/genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/genome.aln.nwk" + proteome_fasta = "${params.test_data_base}/data/genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/proteome.fasta" + test1_gff = "${params.test_data_base}/data/genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/gff/test1.gff" + test2_gff = "${params.test_data_base}/data/genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/gff/test2.gff" + test3_gff = "${params.test_data_base}/data/genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/gff/test3.gff" + } + 'illumina' { + test_1_fastq_gz = "${params.test_data_base}/data/genomics/prokaryotes/candidatus_portiera_aleyrodidarum/illumina/fastq/test_1.fastq.gz" + test_2_fastq_gz = "${params.test_data_base}/data/genomics/prokaryotes/candidatus_portiera_aleyrodidarum/illumina/fastq/test_2.fastq.gz" + test_se_fastq_gz = "${params.test_data_base}/data/genomics/prokaryotes/candidatus_portiera_aleyrodidarum/illumina/fastq/test_se.fastq.gz" + } + 'nanopore' { + test_fastq_gz = "${params.test_data_base}/data/genomics/prokaryotes/candidatus_portiera_aleyrodidarum/nanopore/fastq/test.fastq.gz" + } + } + 'haemophilus_influenzae' { + 'genome' { + genome_fna_gz = "${params.test_data_base}/data/genomics/prokaryotes/haemophilus_influenzae/genome/genome.fna.gz" + genome_aln_gz = "${params.test_data_base}/data/genomics/prokaryotes/haemophilus_influenzae/genome/genome.aln.gz" + genome_aln_nwk = "${params.test_data_base}/data/genomics/prokaryotes/haemophilus_influenzae/genome/genome.aln.nwk" + } + } + 'generic' { + 'csv' { + test_csv = "${params.test_data_base}/data/generic/csv/test.csv" + } + 'notebooks' { + rmarkdown = "${params.test_data_base}/data/generic/notebooks/rmarkdown/rmarkdown_notebook.Rmd" + ipython_md = "${params.test_data_base}/data/generic/notebooks/jupyter/ipython_notebook.md" + ipython_ipynb = "${params.test_data_base}/data/generic/notebooks/jupyter/ipython_notebook.ipynb" + } + 'tar' { + tar_gz = "${params.test_data_base}/data/generic/tar/hello.tar.gz" + } + 'tsv' { + test_tsv = "${params.test_data_base}/data/generic/tsv/test.tsv" + } + 'txt' { + hello = "${params.test_data_base}/data/generic/txt/hello.txt" + } + 'cooler'{ + test_pairix_pair_gz = "${params.test_data_base}/data/genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.blksrt.txt.gz" + test_pairix_pair_gz_px2 = "${params.test_data_base}/data/genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.blksrt.txt.gz.px2" + test_pairs_pair = "${params.test_data_base}/data/genomics/homo_sapiens/cooler/cload/hg19/hg19.sample1.pairs" + test_tabix_pair_gz = "${params.test_data_base}/data/genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.sorted.possrt.txt.gz" + test_tabix_pair_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.sorted.possrt.txt.gz.tbi" + hg19_chrom_sizes = "${params.test_data_base}/data/genomics/homo_sapiens/cooler/cload/hg19/hg19.chrom.sizes" + test_merge_cool = "${params.test_data_base}/data/genomics/homo_sapiens/cooler/merge/toy/toy.symm.upper.2.cool" + test_merge_cool_cp2 = "${params.test_data_base}/data/genomics/homo_sapiens/cooler/merge/toy/toy.symm.upper.2.cp2.cool" + + } + 'pairtools' { + mock_4dedup_pairsam = "${params.test_data_base}/data/genomics/homo_sapiens/pairtools/mock.4dedup.pairsam" + mock_4flip_pairs = "${params.test_data_base}/data/genomics/homo_sapiens/pairtools/mock.4flip.pairs" + mock_chrom_sizes = "${params.test_data_base}/data/genomics/homo_sapiens/pairtools/mock.chrom.sizes" + mock_pairsam = "${params.test_data_base}/data/genomics/homo_sapiens/pairtools/mock.pairsam" + mock_sam = "${params.test_data_base}/data/genomics/homo_sapiens/pairtools/mock.sam" + frag_bed = "${params.test_data_base}/data/genomics/homo_sapiens/pairtools/frag.bed" + } + 'config' { + ncbi_user_settings = "${params.test_data_base}/data/generic/config/ncbi_user_settings.mkfg" + } + 'unsorted_data' { + 'unsorted_text' { + genome_file = "${params.test_data_base}/data/generic/unsorted_data/unsorted_text/test.genome" + intervals = "${params.test_data_base}/data/generic/unsorted_data/unsorted_text/test.bed" + numbers_csv = "${params.test_data_base}/data/generic/unsorted_data/unsorted_text/test.csv" + } + } + } + 'proteomics' { + 'msspectra' { + ups_file1 = "${params.test_data_base}/data/proteomics/msspectra/OVEMB150205_12.raw" + ups_file2 = "${params.test_data_base}/data/proteomics/msspectra/OVEMB150205_14.raw" + } + 'database' { + yeast_ups = "${params.test_data_base}/data/proteomics/database/yeast_UPS.fasta" + } + 'maxquant' { + mq_contrasts = "${params.test_data_base}/data/proteomics/maxquant/MaxQuant_contrasts.csv" + mq_proteingroups = "${params.test_data_base}/data/proteomics/maxquant/MaxQuant_proteinGroups.txt" + mq_samplesheet = "${params.test_data_base}/data/proteomics/maxquant/MaxQuant_samplesheet.tsv" + mq_proteus_mat = "${params.test_data_base}/data/proteomics/maxquant/proteus.raw_MaxQuant_proteingroups_tab.tsv" + } + 'parameter' { + maxquant = "${params.test_data_base}/data/proteomics/parameter/mqpar.xml" + } + 'idfile' { + openms_idxml = "${params.test_data_base}/data/proteomics/openms_idxml/BSA_QC_file.idXML" + } + } + 'galaxea_fascicularis' { + hic { + pretext = "${params.test_data_base}/data/genomics/eukaryotes/galaxea_fascicularis/hic/jaGalFasc40_2.pretext" + } + } + 'deilephila_porcellus' { + 'mito' { + ref_fa = "${params.test_data_base}/data/genomics/eukaryotes/deilephila_porcellus/mito/MW539688.1.fasta" + ref_gb = "${params.test_data_base}/data/genomics/eukaryotes/deilephila_porcellus/mito/MW539688.1.gb" + hifi_reads = "${params.test_data_base}/data/genomics/eukaryotes/deilephila_porcellus/mito/ilDeiPorc1.HiFi.reads.fa" + contigs = "${params.test_data_base}/data/genomics/eukaryotes/deilephila_porcellus/mito/ilDeiPorc1.contigs.fa" + } + } + 'imaging' { + 'h5' { + plant_wga = "${params.test_data_base}/data/imaging/h5/plant_wga.h5" + plant_wga_prob = "${params.test_data_base}/data/imaging/h5/plant_wga_probabilities.h5" + } + 'ilp' { + plant_wga_multicut = "${params.test_data_base}/data/imaging/ilp/plant_wga.multicut.ilp" + plant_wga_pixel_class = "${params.test_data_base}/data/imaging/ilp/plant_wga.pixel_prob.ilp" + } + 'tiff' { + mouse_heart_wga = "${params.test_data_base}/data/imaging/tiff/mindagap.mouse_heart.wga.tiff" + } + 'ome-tiff' { + cycif_tonsil_channels = "${params.test_data_base}/data/imaging/ome-tiff/cycif-tonsil-channels.csv" + cycif_tonsil_cycle1 = "${params.test_data_base}/data/imaging/ome-tiff/cycif-tonsil-cycle1.ome.tif" + cycif_tonsil_cycle2 = "${params.test_data_base}/data/imaging/ome-tiff/cycif-tonsil-cycle2.ome.tif" + cycif_tonsil_cycle3 = "${params.test_data_base}/data/imaging/ome-tiff/cycif-tonsil-cycle3.ome.tif" + cycif_tonsil_dfp = "${params.test_data_base}/data/imaging/ome-tiff/cycif-tonsil-dfp.ome.tif" + cycif_tonsil_ffp = "${params.test_data_base}/data/imaging/ome-tiff/cycif-tonsil-ffp.ome.tif" + } + 'registration' { + markers = "${params.test_data_base}/data/imaging/registration/markers.csv" + cycle1 = "${params.test_data_base}/data/imaging/ome-tiff/cycif-tonsil-cycle1.ome.tif" + cycle2 = "${params.test_data_base}/data/imaging/ome-tiff/cycif-tonsil-cycle2.ome.tif" + } + 'segmentation' { + markers = "${params.test_data_base}/data/imaging/segmentation/markers.csv" + image = "${params.test_data_base}/data/imaging/segmentation/cycif_tonsil_registered.ome.tif" + } + 'quantification' { + markers = "${params.test_data_base}/data/imaging/quantification/markers.csv" + image = "${params.test_data_base}/data/imaging/quantification/cycif_tonsil_registered.ome.tif" + mask = "${params.test_data_base}/data/imaging/quantification/cell.ome.tif" + } + 'downstream' { + markers = "${params.test_data_base}/data/imaging/downstream/markers.csv" + cell_feature_array = "${params.test_data_base}/data/imaging/downstream/cycif_tonsil_cell.csv" + } + 'background_subtraction' { + markers = "${params.test_data_base}/data/imaging/background_subtraction/markers.csv" + image = "${params.test_data_base}/data/imaging/background_subtraction/cycif_tonsil_registered.ome.tif" + } + 'core_detection' { + image = "${params.test_data_base}/data/imaging/core_detection/single_core_dapi.tif" + } + } + } +} diff --git a/tests/csv/chunks.csv b/tests/csv/chunks.csv new file mode 100644 index 00000000..87f39e83 --- /dev/null +++ b/tests/csv/chunks.csv @@ -0,0 +1,3 @@ +panel,chr,file +1000GP,chr22,"https://github.com/nf-core/test-datasets/raw/phaseimpute/hum_data/panel/chr22/1000GP.chr22_chunks.txt" +1000GP,chr21,"https://github.com/nf-core/test-datasets/raw/phaseimpute/hum_data/panel/chr21/1000GP.chr21_chunks.txt" diff --git a/tests/csv/map.csv b/tests/csv/map.csv new file mode 100644 index 00000000..96257bff --- /dev/null +++ b/tests/csv/map.csv @@ -0,0 +1,3 @@ +chr,map +chr21,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/reference_genome/GRCh38_21.map +chr22,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/reference_genome/GRCh38_22.map diff --git a/tests/csv/panel.csv b/tests/csv/panel.csv new file mode 100644 index 00000000..190330af --- /dev/null +++ b/tests/csv/panel.csv @@ -0,0 +1,3 @@ +panel,chr,vcf,index +1000GP,chr21,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/panel/chr21/1000GP.chr21.s.norel.vcf.gz,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/panel/chr21/1000GP.chr21.s.norel.vcf.gz.csi +1000GP,chr22,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/panel/chr22/1000GP.chr22.s.norel.vcf.gz.csi diff --git a/tests/csv/panel_dog.csv b/tests/csv/panel_dog.csv new file mode 100644 index 00000000..59739626 --- /dev/null +++ b/tests/csv/panel_dog.csv @@ -0,0 +1,3 @@ +panel,chr,vcf,index +658Dog,chr21,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/dog_data/panel/21/658_dog.21.s.norel.vcf.gz,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/dog_data/panel/21/658_dog.21.s.norel.vcf.gz.csi +658Dog,chr22,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/dog_data/panel/22/658_dog.22.s.norel.vcf.gz,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/dog_data/panel/22/658_dog.22.s.norel.vcf.gz.csi diff --git a/tests/csv/panel_full.csv b/tests/csv/panel_full.csv new file mode 100644 index 00000000..a2a18ee0 --- /dev/null +++ b/tests/csv/panel_full.csv @@ -0,0 +1,23 @@ +panel,chr,vcf,index +1000GP,chr1,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr1.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr1.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr2,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr2.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr2.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr3,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr3.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr3.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr4,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr4.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr4.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr5,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr5.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr5.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr6,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr6.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr6.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr7,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr7.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr7.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr8,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr8.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr8.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr9,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr9.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr9.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr10,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr10.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr10.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr11,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr11.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr11.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr12,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr12.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr12.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr13,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr13.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr13.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr14,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr14.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr14.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr15,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr15.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr15.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr16,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr16.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr16.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr17,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr17.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr17.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr18,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr18.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr18.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr19,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr19.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr19.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr20,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr20.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr20.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr21,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr21.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr21.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr22,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr22.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr22.filtered.shapeit2-duohmm-phased.vcf.gz.tbi diff --git a/tests/csv/panel_fullchr.csv b/tests/csv/panel_fullchr.csv new file mode 100644 index 00000000..a74ec309 --- /dev/null +++ b/tests/csv/panel_fullchr.csv @@ -0,0 +1,3 @@ +panel,chr,vcf,index +1000GP,chr21,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr21.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr21.filtered.shapeit2-duohmm-phased.vcf.gz.tbi +1000GP,chr22,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr22.filtered.shapeit2-duohmm-phased.vcf.gz,http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/working/20201028_3202_phased/CCDG_14151_B01_GRM_WGS_2020-08-05_chr22.filtered.shapeit2-duohmm-phased.vcf.gz.tbi diff --git a/tests/csv/posfile.csv b/tests/csv/posfile.csv new file mode 100644 index 00000000..3f4a6474 --- /dev/null +++ b/tests/csv/posfile.csv @@ -0,0 +1,3 @@ +panel,chr,vcf,index,hap,legend +1000GP,chr21,"https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/panel/chr21/1000GP.chr21.sites.vcf.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/panel/chr21/1000GP.chr21.sites.vcf.gz.csi","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/panel/chr21/1000GP.chr21.hap.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/panel/chr21/1000GP.chr21.legend.gz" +1000GP,chr22,"https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/panel/chr22/1000GP.chr22.sites.vcf.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/panel/chr22/1000GP.chr22.sites.vcf.gz.csi","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/panel/chr22/1000GP.chr22.hap.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/panel/chr22/1000GP.chr22.legend.gz" diff --git a/tests/csv/posfile_hap_legend.csv b/tests/csv/posfile_hap_legend.csv new file mode 100644 index 00000000..55e0028f --- /dev/null +++ b/tests/csv/posfile_hap_legend.csv @@ -0,0 +1,3 @@ +panel,chr,hap,legend +1000GP,chr21,"https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/panel/chr21/1000GP.chr21.hap.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/panel/chr21/1000GP.chr21.legend.gz" +1000GP,chr22,"https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/panel/chr22/1000GP.chr22.hap.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/panel/chr22/1000GP.chr22.legend.gz" diff --git a/tests/csv/posfile_legend.csv b/tests/csv/posfile_legend.csv new file mode 100644 index 00000000..d83b9c88 --- /dev/null +++ b/tests/csv/posfile_legend.csv @@ -0,0 +1,3 @@ +panel,chr,legend +1000GP,chr21,"https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/panel/chr21/1000GP.chr21.legend.gz" +1000GP,chr22,"https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/panel/chr22/1000GP.chr22.legend.gz" diff --git a/tests/csv/posfile_vcf_index.csv b/tests/csv/posfile_vcf_index.csv new file mode 100644 index 00000000..2f233b1d --- /dev/null +++ b/tests/csv/posfile_vcf_index.csv @@ -0,0 +1,3 @@ +panel,chr,vcf,index +1000GP,chr21,"https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/panel/chr21/1000GP.chr21.sites.vcf.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/panel/chr21/1000GP.chr21.sites.vcf.gz.csi" +1000GP,chr22,"https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/panel/chr22/1000GP.chr22.sites.vcf.gz","https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/panel/chr22/1000GP.chr22.sites.vcf.gz.csi" diff --git a/tests/csv/region.csv b/tests/csv/region.csv new file mode 100644 index 00000000..7ef04608 --- /dev/null +++ b/tests/csv/region.csv @@ -0,0 +1,3 @@ +chr,start,end +chr21,16570000,16610000 +chr22,16570000,16610000 diff --git a/tests/csv/region_full.csv b/tests/csv/region_full.csv new file mode 100644 index 00000000..26a6b080 --- /dev/null +++ b/tests/csv/region_full.csv @@ -0,0 +1,3 @@ +chr,start,end +chr21,0,46709983 +chr22,0,50818468 diff --git a/tests/csv/sample_bam.csv b/tests/csv/sample_bam.csv new file mode 100644 index 00000000..36ee6398 --- /dev/null +++ b/tests/csv/sample_bam.csv @@ -0,0 +1,4 @@ +sample,file,index +NA12878,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA12878/NA12878.s.1x.bam,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA12878/NA12878.s.1x.bam.bai +NA19401,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA19401/NA19401.s.1x.bam,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA19401/NA19401.s.1x.bam.bai +NA20359,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA20359/NA20359.s.1x.bam,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA20359/NA20359.s.1x.bam.bai diff --git a/tests/csv/sample_sim.csv b/tests/csv/sample_sim.csv new file mode 100644 index 00000000..23fd5c21 --- /dev/null +++ b/tests/csv/sample_sim.csv @@ -0,0 +1,4 @@ +sample,file,index +NA12878,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA12878/NA12878.s.bam,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA12878/NA12878.s.bam.bai +NA19401,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA19401/NA19401.s.bam,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA19401/NA19401.s.bam.bai +NA20359,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA20359/NA20359.s.bam,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA20359/NA20359.s.bam.bai diff --git a/tests/csv/sample_sim_full.csv b/tests/csv/sample_sim_full.csv new file mode 100644 index 00000000..5100bfa1 --- /dev/null +++ b/tests/csv/sample_sim_full.csv @@ -0,0 +1,2 @@ +sample,file,index +HG001,ftp://ftp.sra.ebi.ac.uk/vol1/run/ERR323/ERR3239334/NA12878.final.cram,ftp://ftp.sra.ebi.ac.uk/vol1/run/ERR323/ERR3239334/NA12878.final.cram.crai diff --git a/tests/csv/sample_sim_full_truth.csv b/tests/csv/sample_sim_full_truth.csv new file mode 100644 index 00000000..e65884d9 --- /dev/null +++ b/tests/csv/sample_sim_full_truth.csv @@ -0,0 +1,2 @@ +sample,file,index +HG001,https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/NA12878_HG001/NISTv4.2.1/GRCh38/HG001_GRCh38_1_22_v4.2.1_benchmark.vcf.gz,https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/NA12878_HG001/NISTv4.2.1/GRCh38/HG001_GRCh38_1_22_v4.2.1_benchmark.vcf.gz.tbi diff --git a/tests/csv/sample_validate_imputed.csv b/tests/csv/sample_validate_imputed.csv new file mode 100644 index 00000000..77d03817 --- /dev/null +++ b/tests/csv/sample_validate_imputed.csv @@ -0,0 +1,4 @@ +sample,file,index +NA12878,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA12878/NA12878.s_imputed.bcf,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA12878/NA12878.s_imputed.bcf.csi +NA19401,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA19401/NA19401.s_imputed.bcf,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA19401/NA19401.s_imputed.bcf.csi +NA20359,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA20359/NA20359.s_imputed.bcf,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA20359/NA20359.s_imputed.bcf.csi diff --git a/tests/csv/sample_validate_truth.csv b/tests/csv/sample_validate_truth.csv new file mode 100644 index 00000000..12090dd3 --- /dev/null +++ b/tests/csv/sample_validate_truth.csv @@ -0,0 +1,4 @@ +sample,file,index +NA12878,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA12878/NA12878.s.bcf,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA12878/NA12878.s.bcf.csi +NA19401,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA19401/NA19401.s.bcf,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA19401/NA19401.s.bcf.csi +NA20359,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA20359/NA20359.s.bcf,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA20359/NA20359.s.bcf.csi diff --git a/tests/csv/sample_vcf.csv b/tests/csv/sample_vcf.csv new file mode 100644 index 00000000..e9ccb044 --- /dev/null +++ b/tests/csv/sample_vcf.csv @@ -0,0 +1,4 @@ +sample,file,index +NA12878,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA12878/NA12878.s.1x.bcf,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA12878/NA12878.s.1x.bcf.csi +NA19401,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA19401/NA19401.s.1x.bcf,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA19401/NA19401.s.1x.bcf.csi +NA20359,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA20359/NA20359.s.1x.bcf,https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA20359/NA20359.s.1x.bcf.csi diff --git a/tower.yml b/tower.yml new file mode 100644 index 00000000..787aedfe --- /dev/null +++ b/tower.yml @@ -0,0 +1,5 @@ +reports: + multiqc_report.html: + display: "MultiQC HTML report" + samplesheet.csv: + display: "Auto-created samplesheet with collated metadata and FASTQ paths" diff --git a/workflows/chrcheck/main.nf b/workflows/chrcheck/main.nf new file mode 100644 index 00000000..8ab2a0d4 --- /dev/null +++ b/workflows/chrcheck/main.nf @@ -0,0 +1,79 @@ +include { VCF_CHR_EXTRACT } from '../../modules/local/vcf_chr_extract' +include { BAM_CHR_EXTRACT } from '../../modules/local/bam_chr_extract' +include { BAM_CHR_RENAME_SAMTOOLS } from '../../subworkflows/local/bam_chr_rename_samtools' +include { VCF_CHR_RENAME_BCFTOOLS } from '../../subworkflows/local/vcf_chr_rename_bcftools' +include { checkChr } from '../../subworkflows/local/utils_nfcore_chrcheck_pipeline' +include { diffChr } from '../../subworkflows/local/utils_nfcore_chrcheck_pipeline' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow CHRCHECK { + take: + ch_input // [[id], file, index, [chr]] + + main: + ch_versions = Channel.empty() + // Split the input between VCF and BAM files + ch_input = ch_input.branch{ + bam: it[1] =~ 'bam|cram' + vcf: it[1] =~ 'vcf|bcf' + other: it[1].size() > 0 + empty: true + } + + ch_input.other.map { + error "File: ${it[1]} is not a VCF, BCFT or BAM, CRAM file." + } + + // Check if channel is empty + ch_vcf_split = Channel.empty() + // Extract the contig names from the VCF files + VCF_CHR_EXTRACT(ch_input.vcf.map{ meta, file, _index, _chr -> [meta, file] }) + ch_versions = ch_versions.mix(VCF_CHR_EXTRACT.out.versions) + ch_vcf_split = checkChr(VCF_CHR_EXTRACT.out.chr, ch_input.vcf) + + ch_bam_split = Channel.empty() + // Extract the contig names from the BAM files + BAM_CHR_EXTRACT(ch_input.bam.map{ meta, file, _index, _chr -> [meta, file] }) + ch_versions = ch_versions.mix(BAM_CHR_EXTRACT.out.versions) + ch_bam_split = checkChr(BAM_CHR_EXTRACT.out.chr, ch_input.bam) + + if (params.rename_chr == true) { + ch_bam_renamed = Channel.empty() + // Rename the contigs in the BAM files + BAM_CHR_RENAME_SAMTOOLS( + ch_bam_split.to_rename.map{meta, bam, csi, _diff, prefix -> [meta, bam, csi, prefix]} + ) + ch_versions = ch_versions.mix(BAM_CHR_RENAME_SAMTOOLS.out.versions) + ch_bam_renamed = BAM_CHR_RENAME_SAMTOOLS.out.bam_renamed + + ch_vcf_renamed = Channel.empty() + // Rename the contigs in the VCF files + VCF_CHR_RENAME_BCFTOOLS(ch_vcf_split.to_rename) + ch_versions = ch_versions.mix(VCF_CHR_RENAME_BCFTOOLS.out.versions) + ch_vcf_renamed = VCF_CHR_RENAME_BCFTOOLS.out.vcf_renamed + } else { + ch_vcf_split.to_rename.map { + def chr_names = it[3].size() > params.max_chr_names ? it[3][0..params.max_chr_names - 1] + ['...'] : it[3] + error "Contig names: ${chr_names} in VCF: ${it[1]} are not present in reference genome with same writing. Please set `rename_chr` to `true` to rename the contigs." + } + ch_bam_split.to_rename.map { + def chr_names = it[3].size() > params.max_chr_names ? it[3][0..params.max_chr_names - 1] + ['...'] : it[3] + error "Contig names: ${chr_names} in BAM: ${it[1]} are not present in reference genome with same writing. Please set `rename_chr` to `true` to rename the contigs." + } + ch_vcf_renamed = Channel.empty() + ch_bam_renamed = Channel.empty() + } + + ch_output = ch_bam_split.no_rename + .mix(ch_vcf_split.no_rename) + .mix(ch_bam_renamed) + .mix(ch_vcf_renamed) + emit: + output = ch_output // [ [id], file, index ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/workflows/chrcheck/meta.yml b/workflows/chrcheck/meta.yml new file mode 100644 index 00000000..ad4431e7 --- /dev/null +++ b/workflows/chrcheck/meta.yml @@ -0,0 +1,34 @@ +name: "chrcheck" +description: Check the consistency of the chromosome names between the input and a set of references contigs +keywords: + - chr + - VCF + - BAM +components: + - samtools/reheader + - samtools/index + - bcftools/annotate + - bcftools/index +input: + - ch_input: + type: file + description: | + Meta map. + Target dataset in CRAM, BAM or VCF/BCF format. + Index file of the input file. + Array of reference contigs. + Structure: [ meta, file, index, [chr] ] +output: + - output: + type: file + description: | + Output file with the updated chromosome names. + Structure: [meta, file, index] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@LouisLeNezet" +maintainers: + - "@LouisLeNezet" diff --git a/workflows/chrcheck/tests/main.nf.test b/workflows/chrcheck/tests/main.nf.test new file mode 100644 index 00000000..c6e063af --- /dev/null +++ b/workflows/chrcheck/tests/main.nf.test @@ -0,0 +1,230 @@ +nextflow_workflow { + + name "Test workflow CHRCHECK" + script "../main.nf" + + workflow "CHRCHECK" + + tag "workflows" + tag "workflows_local" + tag "workflows/chrcheck" + tag "chrcheck" + + tag "bcftools" + tag "bcftools/annotate" + tag "bcftools/index" + tag "gawk" + tag "samtools" + tag "samtools/reheader" + tag "samtools/index" + + test("Rename: VCF BAM chr + fasta no chr") { + config "./nextflow_rename.config" + when { + workflow { + """ + input[0] = Channel.fromList([ + [ + [id: "VCF_chr22"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz',checkIfExist:true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi',checkIfExist:true), + ["22"] + ], + [ + [id: "BAM_chr22"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExist:true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExist:true), + ["22"] + ] + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out.output + .collect{ + if (it[1].endsWith("vcf.gz")) { + path(it[1]).vcf.summary + } else { + bam(it[1]).getHeader().findAll { it.startsWith ("@SQ") } + } + }, + workflow.out.output.collect{ + file(it[1]).getName() + file(it[2]).getName() + }, + workflow.out.versions + ).match() + } + ) + } + } + + test("Rename: VCF no chr + fasta chr") { + config "./nextflow_rename.config" + tag "test" + setup { + run("BAM_CHR_RENAME_SAMTOOLS", alias: "PREPROCESS") { + script "../../../subworkflows/local/bam_chr_rename_samtools/main.nf" + process { + """ + input[0] = Channel.fromList([ + [ + [id: "BAM_22"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExist:true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExist:true), + "nochr" + ] + ]) + """ + } + } + } + when { + workflow { + """ + input[0] = PREPROCESS.out.bam_renamed + .mix( + Channel.fromList([ + [ + [id: "VCF_AllNoChr"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz',checkIfExist:true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz.tbi',checkIfExist:true), + ], + ]) + ) + .combine(Channel.of(["chr22"]).collect().toList()) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.output.collect{ + if (it[1].endsWith("vcf.gz")) { + path(it[1]).vcf.summary + } else { + bam(it[1]).getHeader().findAll { it.startsWith ("@SQ") } + } + }, + workflow.out.output.collect{ + file(it[1]).getName() + file(it[2]).getName() + }, + workflow.out.versions + ).match() + } + ) + } + } + + test("Error : missing renaming VCF") { + config "./nextflow.config" + when { + workflow { + """ + input[0] = Channel.fromList([ + [ + [id: "VCF_chr22"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz',checkIfExist:true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi',checkIfExist:true), + ["22"] + ] + ]) + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.errorReport + .contains("Contig names: [chr22] in VCF: /nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz are not present in reference genome with same writing. Please set `rename_chr` to `true` to rename the contigs.") + } + ) + } + } + + test("Error : missing renaming BAM") { + config "./nextflow.config" + when { + workflow { + """ + input[0] = Channel.fromList([ + [ + [id: "BAM_chr22"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExist:true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExist:true), + ["22"] + ] + ]) + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.errorReport + .contains("Contig names: [chr22] in BAM: /nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam are not present in reference genome with same writing. Please set `rename_chr` to `true` to rename the contigs.") + } + ) + } + } + + test("Error : still difference after renaming VCF"){ + config "./nextflow_rename.config" + when { + workflow { + """ + lst_chr = ["chr22", "chr34", "GL000207.1", "chr45", "chr46", "chr47", "chr48", "chr49"] + input[0] = Channel.fromList([ + [ + [id: "VCF_AllNoChr"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz',checkIfExist:true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz.tbi',checkIfExist:true), + lst_chr + ], + ]) + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.errorReport.contains("Contig names: [chr34, GL000207.1, chr45, chr46, ...] absent from file: /nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz and cannot be solved by adding or removing the `chr` prefix.")} + ) + } + } + + test("Error : still difference after renaming BAM"){ + config "./nextflow_rename.config" + when { + workflow { + """ + input[0] = Channel.fromList([ + [ + [id: "BAM_chr22"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExist:true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExist:true), + ["chr1"] + ], + ]) + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.errorReport.contains("Contig names: [chr1] absent from file: /nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam and cannot be solved by adding or removing the `chr` prefix.")} + ) + } + } +} diff --git a/workflows/chrcheck/tests/main.nf.test.snap b/workflows/chrcheck/tests/main.nf.test.snap new file mode 100644 index 00000000..0e085501 --- /dev/null +++ b/workflows/chrcheck/tests/main.nf.test.snap @@ -0,0 +1,54 @@ +{ + "Rename: VCF BAM chr + fasta no chr": { + "content": [ + [ + [ + "@SQ\tSN:22\tLN:40001" + ], + "VcfFile [chromosomes=[22], sampleCount=0, variantCount=2174, phased=true, phasedAutodetect=true]" + ], + [ + "BAM_chr22.bam.bai", + "VCF_chr22_chrrename.vcf.gz.tbi" + ], + [ + "versions.yml:md5,3ef072b2911c4ec69e8577fbc6cc26ab", + "versions.yml:md5,4ed3594603719fe5e9dd7b9f41ec1424", + "versions.yml:md5,7f7a21902e7f8f2e02152817c324e63f", + "versions.yml:md5,a36d2316798f70758d1fa92fa3b05cd3", + "versions.yml:md5,db9d56c857a695f01c1f9bc9440637f6" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-28T23:39:02.415897354" + }, + "Rename: VCF no chr + fasta chr": { + "content": [ + [ + [ + "@SQ\tSN:chr22\tLN:40001" + ], + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=4, phased=false, phasedAutodetect=false]" + ], + [ + "BAM_22.bam.bai", + "VCF_AllNoChr_chrrename.vcf.gz.tbi" + ], + [ + "versions.yml:md5,3ef072b2911c4ec69e8577fbc6cc26ab", + "versions.yml:md5,4ed3594603719fe5e9dd7b9f41ec1424", + "versions.yml:md5,7f7a21902e7f8f2e02152817c324e63f", + "versions.yml:md5,a36d2316798f70758d1fa92fa3b05cd3", + "versions.yml:md5,db9d56c857a695f01c1f9bc9440637f6" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-28T23:39:22.12563119" + } +} \ No newline at end of file diff --git a/workflows/chrcheck/tests/nextflow.config b/workflows/chrcheck/tests/nextflow.config new file mode 100644 index 00000000..43b74e31 --- /dev/null +++ b/workflows/chrcheck/tests/nextflow.config @@ -0,0 +1,23 @@ +process { + resourceLimits = [ + cpus: 2, + memory: '7.GB', + time: '1.h' + ] +} + +params { + rename_chr = false + max_chr_names = 4 +} + +process { + withName: BCFTOOLS_ANNOTATE { + ext.args = [ + "-Oz", + "--no-version", + "--write-index=tbi" + ].join(' ') + ext.prefix = { "${meta.id}_chrrename" } + } +} diff --git a/workflows/chrcheck/tests/nextflow_rename.config b/workflows/chrcheck/tests/nextflow_rename.config new file mode 100644 index 00000000..bd0e1f3c --- /dev/null +++ b/workflows/chrcheck/tests/nextflow_rename.config @@ -0,0 +1,26 @@ +process { + resourceLimits = [ + cpus: 2, + memory: '7.GB', + time: '1.h' + ] +} + +params { + rename_chr = true + max_chr_names = 4 +} + +process { + withName: BCFTOOLS_ANNOTATE { + ext.args = [ + "-Oz", + "--no-version", + "--write-index=tbi" + ].join(' ') + ext.prefix = { "${meta.id}_chrrename" } + } + withName: 'PREPROCESS:SAMTOOLS_REHEADER' { + ext.prefix = { "${meta.id}_nochr" } + } +} diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf new file mode 100644 index 00000000..111fc315 --- /dev/null +++ b/workflows/phaseimpute/main.nf @@ -0,0 +1,557 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// MODULE: Installed directly from nf-core/modules +// +include { MULTIQC } from '../../modules/nf-core/multiqc' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { paramsSummaryMultiqc } from '../../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../../subworkflows/local/utils_nfcore_phaseimpute_pipeline' +include { getFilesSameExt } from '../../subworkflows/local/utils_nfcore_phaseimpute_pipeline' +include { getFileExtension } from '../../subworkflows/local/utils_nfcore_phaseimpute_pipeline' +include { exportCsv } from '../../subworkflows/local/utils_nfcore_phaseimpute_pipeline' + +// +// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules +// + +// Simulate subworkflows +include { BAM_EXTRACT_REGION_SAMTOOLS } from '../../subworkflows/local/bam_extract_region_samtools' +include { BAM_DOWNSAMPLE_SAMTOOLS } from '../../subworkflows/local/bam_downsample_samtools' +include { SAMTOOLS_COVERAGE as SAMTOOLS_COVERAGE_INP } from '../../modules/nf-core/samtools/coverage' +include { SAMTOOLS_COVERAGE as SAMTOOLS_COVERAGE_DWN } from '../../modules/nf-core/samtools/coverage' +include { GAWK as FILTER_CHR_INP } from '../../modules/nf-core/gawk' +include { GAWK as FILTER_CHR_DWN } from '../../modules/nf-core/gawk' + +// Panelprep subworkflows +include { VCF_NORMALIZE_BCFTOOLS } from '../../subworkflows/local/vcf_normalize_bcftools' +include { VCF_SITES_EXTRACT_BCFTOOLS } from '../../subworkflows/local/vcf_sites_extract_bcftools' +include { VCF_PHASE_SHAPEIT5 } from '../../subworkflows/local/vcf_phase_shapeit5' +include { CHUNK_PREPARE_CHANNEL } from '../../subworkflows/local/chunk_prepare_channel' +include { VCF_CONCATENATE_BCFTOOLS as CONCAT_PANEL } from '../../subworkflows/local/vcf_concatenate_bcftools' +include { BCFTOOLS_STATS as BCFTOOLS_STATS_PANEL } from '../../modules/nf-core/bcftools/stats' + +// Imputation +include { LIST_TO_FILE } from '../../modules/local/list_to_file' +include { BCFTOOLS_QUERY as BCFTOOLS_QUERY_IMPUTED } from '../../modules/nf-core/bcftools/query' +include { GAWK as GAWK_IMPUTED } from '../../modules/nf-core/gawk' +include { VCF_SPLIT_BCFTOOLS as SPLIT_IMPUTED } from '../../subworkflows/local/vcf_split_bcftools' + +// GLIMPSE1 subworkflows +include { BAM_GL_BCFTOOLS as GL_GLIMPSE1 } from '../../subworkflows/local/bam_gl_bcftools' +include { VCF_IMPUTE_GLIMPSE1 } from '../../subworkflows/local/vcf_impute_glimpse1' +include { VCF_CONCATENATE_BCFTOOLS as CONCAT_GLIMPSE1} from '../../subworkflows/local/vcf_concatenate_bcftools' + +// GLIMPSE2 subworkflows +include { BAM_IMPUTE_GLIMPSE2 } from '../../subworkflows/local/bam_impute_glimpse2' +include { VCF_CONCATENATE_BCFTOOLS as CONCAT_GLIMPSE2} from '../../subworkflows/local/vcf_concatenate_bcftools' + +// QUILT subworkflows +include { VCF_CHUNK_GLIMPSE } from '../../subworkflows/local/vcf_chunk_glimpse' +include { BAM_IMPUTE_QUILT } from '../../subworkflows/local/bam_impute_quilt' +include { VCF_CONCATENATE_BCFTOOLS as CONCAT_QUILT } from '../../subworkflows/local/vcf_concatenate_bcftools' + +// STITCH subworkflows +include { BAM_IMPUTE_STITCH } from '../../subworkflows/local/bam_impute_stitch' +include { VCF_CONCATENATE_BCFTOOLS as CONCAT_STITCH } from '../../subworkflows/local/vcf_concatenate_bcftools' + +// Imputation stats +include { BCFTOOLS_STATS as BCFTOOLS_STATS_TOOLS } from '../../modules/nf-core/bcftools/stats' + +// Concordance subworkflows +include { BAM_GL_BCFTOOLS as GL_TRUTH } from '../../subworkflows/local/bam_gl_bcftools' +include { BCFTOOLS_QUERY as BCFTOOLS_QUERY_TRUTH } from '../../modules/nf-core/bcftools/query' +include { GAWK as GAWK_TRUTH } from '../../modules/nf-core/gawk' +include { VCF_SPLIT_BCFTOOLS as SPLIT_TRUTH } from '../../subworkflows/local/vcf_split_bcftools' +include { BCFTOOLS_STATS as BCFTOOLS_STATS_TRUTH } from '../../modules/nf-core/bcftools/stats' +include { VCF_CONCATENATE_BCFTOOLS as CONCAT_TRUTH } from '../../subworkflows/local/vcf_concatenate_bcftools' +include { VCF_CONCORDANCE_GLIMPSE2 } from '../../subworkflows/local/vcf_concordance_glimpse2' + + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow PHASEIMPUTE { + + take: + ch_input_impute // channel: input file [ [id], file, index ] + ch_input_sim // channel: input file [ [id], file, index ] + ch_input_validate // channel: input file [ [id], file, index ] + ch_input_truth // channel: truth file [ [id], file, index ] + ch_fasta // channel: fasta file [ [genome], fasta, fai ] + ch_panel // channel: panel file [ [id, chr], vcf, index ] + ch_region // channel: region to use [ [chr, region], region] + ch_depth // channel: depth select [ [depth], depth ] + ch_map // channel: genetic map [ [chr], map] + ch_posfile // channel: posfile [ [id, chr], vcf, index, hap, legend] + ch_chunks // channel: chunks [ [chr], txt] + chunk_model // parameter: chunk model + ch_versions // channel: versions of software used + + main: + + ch_multiqc_files = Channel.empty() + + // + // Simulate data if asked + // + if (params.steps.split(',').contains("simulate") || params.steps.split(',').contains("all")) { + // Test if the input are all bam files + getFilesSameExt(ch_input_sim) + .map{ if (it != "bam" & it != "cram") { + error "All input files must be in the same format, either BAM or CRAM, to perform simulation: ${it}" + } } + + if (params.input_region) { + // Split the bam into the regions specified + BAM_EXTRACT_REGION_SAMTOOLS(ch_input_sim, ch_region, ch_fasta) + ch_versions = ch_versions.mix(BAM_EXTRACT_REGION_SAMTOOLS.out.versions) + ch_input_sim = BAM_EXTRACT_REGION_SAMTOOLS.out.bam_region + } + + // Use input for simulation as truth for validation step + // if no truth is provided + if (!params.input_truth) { + ch_input_truth = ch_input_sim + } + + // Program to filter chromosomes + filter_chr_program = ch_region + .collect{ meta, region -> meta.chr } + .map { chr -> + "BEGIN { FS=\"\t\";\nsplit(\"" + chr.join(" ") + '", chr, " ");\n' + + 'for (i in chr) {\nchr_map[chr[i]] = 1;\n}\n}\n' + + 'NR == 1 || (\$1 in chr_map){\nprint \$0;\n}' + } + .collectFile(name:"program.txt") + .collect() + + // Compute coverage of input files + SAMTOOLS_COVERAGE_INP(ch_input_sim, ch_fasta) + ch_versions = ch_versions.mix(SAMTOOLS_COVERAGE_INP.out.versions) + ch_coverage = SAMTOOLS_COVERAGE_INP.out.coverage + + FILTER_CHR_INP( + SAMTOOLS_COVERAGE_INP.out.coverage, + filter_chr_program + ) + ch_versions = ch_versions.mix(FILTER_CHR_INP.out.versions) + ch_multiqc_files = ch_multiqc_files.mix(FILTER_CHR_INP.out.output.map{ it[1] }) + + if (params.depth) { + // Downsample input to desired depth + BAM_DOWNSAMPLE_SAMTOOLS(ch_input_sim, ch_depth, ch_fasta) + ch_versions = ch_versions.mix(BAM_DOWNSAMPLE_SAMTOOLS.out.versions) + ch_input_impute = BAM_DOWNSAMPLE_SAMTOOLS.out.bam_emul + + // Compute coverage of input files + SAMTOOLS_COVERAGE_DWN(BAM_DOWNSAMPLE_SAMTOOLS.out.bam_emul, ch_fasta) + ch_versions = ch_versions.mix(SAMTOOLS_COVERAGE_DWN.out.versions) + + FILTER_CHR_DWN( + SAMTOOLS_COVERAGE_DWN.out.coverage, + filter_chr_program + ) + ch_versions = ch_versions.mix(FILTER_CHR_DWN.out.versions) + ch_multiqc_files = ch_multiqc_files.mix(FILTER_CHR_DWN.out.output.map{ it[1] }) + } + + if (params.genotype) { + error "Genotype simulation not yet implemented" + } + + // Create CSV from simulate step + exportCsv( + ch_input_impute.map{ meta, file, index -> + [meta, [2:"simulation/samples", 3:"simulation/samples"], file, index] + }, + ["id"], "sample,file,index", + "simulate.csv", "simulation/csv" + ) + } + + // + // Prepare panel + // + if (params.steps.split(',').contains("panelprep") || params.steps.split(',').contains("all")) { + // Normalize indels in panel + VCF_NORMALIZE_BCFTOOLS(ch_panel, ch_fasta) + ch_panel_phased = VCF_NORMALIZE_BCFTOOLS.out.vcf_tbi + ch_versions = ch_versions.mix(VCF_NORMALIZE_BCFTOOLS.out.versions) + + // Extract sites from normalized vcf + VCF_SITES_EXTRACT_BCFTOOLS(ch_panel_phased, ch_fasta) + ch_versions = ch_versions.mix(VCF_SITES_EXTRACT_BCFTOOLS.out.versions) + + // Generate all necessary channels + ch_posfile = VCF_SITES_EXTRACT_BCFTOOLS.out.posfile + + // Phase panel with Shapeit5 + if (params.phase == true) { + VCF_PHASE_SHAPEIT5( + VCF_NORMALIZE_BCFTOOLS.out.vcf_tbi.combine(Channel.of([[]])), + ch_region, + [[],[],[]], + [[],[],[]], + ch_map, + chunk_model + ) + ch_panel_phased = VCF_PHASE_SHAPEIT5.out.vcf_tbi + ch_versions = ch_versions.mix(VCF_PHASE_SHAPEIT5.out.versions) + } + + // Create chunks from reference VCF + VCF_CHUNK_GLIMPSE(ch_panel_phased, ch_map, chunk_model) + ch_versions = ch_versions.mix(VCF_CHUNK_GLIMPSE.out.versions) + + // Assign chunks channels + ch_chunks_glimpse1 = VCF_CHUNK_GLIMPSE.out.chunks_glimpse1 + ch_chunks_glimpse2 = VCF_CHUNK_GLIMPSE.out.chunks_glimpse2 + ch_chunks_quilt = VCF_CHUNK_GLIMPSE.out.chunks_quilt + + // Create CSVs from panelprep step + // Phased panel + exportCsv( + ch_panel_phased.map{ meta, vcf, index -> + [meta, [2:"prep_panel/panel", 3:"prep_panel/panel"], vcf, index] + }, + ["id", "chr"], "panel,chr,vcf,index", + "panel.csv", "prep_panel/csv" + ) + // Posfile + exportCsv( + ch_posfile.map{ meta, vcf, index, hap, legend -> + [meta, [2:"prep_panel/sites", 3:"prep_panel/haplegend", 4:"prep_panel/haplegend"], vcf, index, hap, legend] + }, + ["id", "chr"], "panel,chr,vcf,index,hap,legend", + "posfile.csv", "prep_panel/csv" + ) + // Chunks + exportCsv( + VCF_CHUNK_GLIMPSE.out.chunks.map{ meta, file -> + [meta, [2:"prep_panel/chunks"], file] + }, + ["id", "chr"], "panel,chr,file", + "chunks.csv", "prep_panel/csv" + ) + } + + // + // Impute target files + // + if (params.steps.split(',').contains("impute") || params.steps.split(',').contains("all")) { + // Split input files into BAMs and VCFs + ch_input_type = ch_input_impute + .branch { + bam: it[1] =~ 'bam|cram' + vcf: it[1] =~ '(vcf|bcf)(.gz)*' + other: true + } + + // Check if input files are only BAM/CRAM or VCF/BCF + ch_input_type.other + .map{ error "Input files must be either BAM/CRAM or VCF/BCF" } + + // Group BAMs by batch size + def nb_batch = -1 + ch_input_bams = ch_input_type.bam + .toSortedList { it1, it2 -> it1[0]["id"] <=> it2[0]["id"] } + .map { list -> list.collate(params.batch_size) + .collect{ nb_batch += 1; [[id: "all", batch: nb_batch], it] } } + .map { list -> [list.collect{ it[0] }, list.collect{ it[1] }] } + .transpose() + .map { metaI, filestuples-> [ + metaI + [metas: filestuples.collect{it[0].findAll{it.key != "batch"}}], + filestuples.collect{it[1]}, filestuples.collect{it[2]} + ] } + + LIST_TO_FILE( + ch_input_bams.map{ meta, file, _index -> [ + meta, file, meta.metas.collect { it.id } + ] } + ) + + ch_input_bams_withlist = ch_input_bams + .join(LIST_TO_FILE.out.txt) + + // Use panel from parameters if provided + if (params.panel && !params.steps.split(',').find { it in ["all", "panelprep"] }) { + ch_panel_phased = ch_panel + } + + if (params.tools.split(',').contains("glimpse1")) { + log.info("Impute with GLIMPSE1") + + // Use chunks from parameters if provided or use previous chunks from panelprep + if (params.chunks) { + CHUNK_PREPARE_CHANNEL(ch_chunks, "glimpse") + ch_chunks_glimpse1 = CHUNK_PREPARE_CHANNEL.out.chunks + } + + // Glimpse1 subworkflow + // Compute GL from BAM files and merge them + GL_GLIMPSE1( + ch_input_type.bam, + ch_posfile.map{ [it[0], it[4]] }, + ch_fasta + ) + ch_multiqc_files = ch_multiqc_files.mix(GL_GLIMPSE1.out.multiqc_files) + ch_versions = ch_versions.mix(GL_GLIMPSE1.out.versions) + + // Combine vcf and processed bam + ch_input_glimpse1 = ch_input_type.vcf + .mix(GL_GLIMPSE1.out.vcf_tbi) + + // Run imputation + VCF_IMPUTE_GLIMPSE1( + ch_input_glimpse1, + ch_panel_phased, + ch_chunks_glimpse1 + ) + ch_versions = ch_versions.mix(VCF_IMPUTE_GLIMPSE1.out.versions) + + // Concatenate by chromosomes + CONCAT_GLIMPSE1(VCF_IMPUTE_GLIMPSE1.out.vcf_tbi) + ch_versions = ch_versions.mix(CONCAT_GLIMPSE1.out.versions) + + // Add results to input validate + ch_input_validate = ch_input_validate.mix(CONCAT_GLIMPSE1.out.vcf_tbi) + + } + + if (params.tools.split(',').contains("glimpse2")) { + log.info("Impute with GLIMPSE2") + + if (params.chunks) { + CHUNK_PREPARE_CHANNEL(ch_chunks, "glimpse") + ch_chunks_glimpse2 = CHUNK_PREPARE_CHANNEL.out.chunks + } + + // Run imputation + BAM_IMPUTE_GLIMPSE2( + ch_input_bams_withlist + .map{ [it[0], it[1], it[2], it[3]] } + .mix(ch_input_type.vcf.combine([])), + ch_panel_phased, + ch_chunks_glimpse2, + ch_fasta + ) + ch_versions = ch_versions.mix(BAM_IMPUTE_GLIMPSE2.out.versions) + // Concatenate by chromosomes + CONCAT_GLIMPSE2(BAM_IMPUTE_GLIMPSE2.out.vcf_tbi) + ch_versions = ch_versions.mix(CONCAT_GLIMPSE2.out.versions) + + // Add results to input validate + ch_input_validate = ch_input_validate.mix(CONCAT_GLIMPSE2.out.vcf_tbi) + } + + if (params.tools.split(',').contains("stitch")) { + log.info("Impute with STITCH") + + // Impute with STITCH + BAM_IMPUTE_STITCH ( + ch_input_bams_withlist.map{ [it[0], it[1], it[2], it[4]] }, + ch_posfile.map{ [it[0], it[4]] }, + ch_region, + ch_fasta + ) + ch_versions = ch_versions.mix(BAM_IMPUTE_STITCH.out.versions) + + // Concatenate by chromosomes + CONCAT_STITCH(BAM_IMPUTE_STITCH.out.vcf_tbi) + ch_versions = ch_versions.mix(CONCAT_STITCH.out.versions) + + // Add results to input validate + ch_input_validate = ch_input_validate.mix(CONCAT_STITCH.out.vcf_tbi) + + } + + if (params.tools.split(',').contains("quilt")) { + log.info("Impute with QUILT") + + // Use provided chunks if --chunks + if (params.chunks) { + CHUNK_PREPARE_CHANNEL(ch_chunks, "quilt") + ch_chunks_quilt = CHUNK_PREPARE_CHANNEL.out.chunks + } + + // Impute BAMs with QUILT + BAM_IMPUTE_QUILT( + ch_input_bams_withlist.map{ [it[0], it[1], it[2], it[4]] }, + ch_posfile.map{ [it[0], it[3], it[4]] }, + ch_chunks_quilt, + ch_fasta.map{ [it[0], it[1]] } + ) + ch_versions = ch_versions.mix(BAM_IMPUTE_QUILT.out.versions) + + // Concatenate by chromosomes + CONCAT_QUILT(BAM_IMPUTE_QUILT.out.vcf_tbi) + ch_versions = ch_versions.mix(CONCAT_QUILT.out.versions) + + // Add results to input validate + ch_input_validate = ch_input_validate.mix(CONCAT_QUILT.out.vcf_tbi) + } + + // Prepare renaming file + BCFTOOLS_QUERY_IMPUTED(ch_input_validate, [], [], []) + GAWK_IMPUTED(BCFTOOLS_QUERY_IMPUTED.out.output, []) + ch_split_imputed = ch_input_validate.join(GAWK_IMPUTED.out.output) + + // Split result by samples + SPLIT_IMPUTED(ch_split_imputed) + ch_versions = ch_versions.mix(SPLIT_IMPUTED.out.versions) + ch_input_validate = SPLIT_IMPUTED.out.vcf_tbi + + // Compute stats on imputed files + BCFTOOLS_STATS_TOOLS( + ch_input_validate, + [[],[]], + [[],[]], + [[],[]], + [[],[]], + ch_fasta.map{ [it[0], it[1]] }) + ch_versions = ch_versions.mix(BCFTOOLS_STATS_TOOLS.out.versions) + ch_multiqc_files = ch_multiqc_files.mix(BCFTOOLS_STATS_TOOLS.out.stats.map{ [it[1]] }) + + // Export all files to csv + exportCsv( + ch_input_validate.map{ meta, file, index -> + [meta, [2:"imputation/${meta.tools}/samples/", 3:"imputation/${meta.tools}/samples/"], file, index] + }, + ["id", "tools"], "sample,tools,vcf,index", + "impute.csv", "imputation/csv" + ) + } + + if (params.steps.split(',').contains("validate") || params.steps.split(',').contains("all")) { + // Concatenate all sites into a single VCF (for GLIMPSE concordance) + CONCAT_PANEL(ch_posfile.map{ [it[0], it[1], it[2]] }) + ch_versions = ch_versions.mix(CONCAT_PANEL.out.versions) + ch_panel_sites = CONCAT_PANEL.out.vcf_tbi + + // Compute stats on panel + BCFTOOLS_STATS_PANEL( + ch_panel_sites, + [[],[]], + [[],[]], + [[],[]], + [[],[]], + ch_fasta.map{ [it[0], it[1]] }) + ch_versions = ch_versions.mix(BCFTOOLS_STATS_PANEL.out.versions) + ch_multiqc_files = ch_multiqc_files.mix(BCFTOOLS_STATS_PANEL.out.stats.map{ [it[1]] }) + + ch_truth_vcf = Channel.empty() + + // Channels for branching + ch_truth = ch_input_truth + .map { [it[0], it[1], it[2], getFileExtension(it[1])] } + .branch { + bam: it[3] =~ 'bam|cram' + vcf: it[3] =~ '(vcf|bcf)(.gz)*' + other: true + } + + ch_truth.other + .map{ error "Input files must be either BAM/CRAM or VCF/BCF" } + + GL_TRUTH( + ch_truth.bam.map { [it[0], it[1], it[2]] }, + ch_posfile.map{ [it[0], it[4]] }, + ch_fasta + ) + ch_versions = ch_versions.mix(GL_TRUTH.out.versions) + + // Mix the original vcf and the computed vcf + ch_truth_vcf = ch_truth.vcf + .map { [it[0], it[1], it[2]] } + .mix(GL_TRUTH.out.vcf_tbi) + + // Concatenate truth vcf by chromosomes + CONCAT_TRUTH(ch_truth_vcf) + ch_versions = ch_versions.mix(CONCAT_TRUTH.out.versions) + + // Prepare renaming file + BCFTOOLS_QUERY_TRUTH(CONCAT_TRUTH.out.vcf_tbi, [], [], []) + GAWK_TRUTH(BCFTOOLS_QUERY_TRUTH.out.output, []) + ch_split_truth = CONCAT_TRUTH.out.vcf_tbi.join(GAWK_TRUTH.out.output) + + // Split truth vcf by samples + SPLIT_TRUTH(ch_split_truth) + ch_versions = ch_versions.mix(SPLIT_TRUTH.out.versions) + + // Compute stats on truth files + BCFTOOLS_STATS_TRUTH( + SPLIT_TRUTH.out.vcf_tbi, + [[],[]], + [[],[]], + [[],[]], + [[],[]], + ch_fasta.map{ [it[0], it[1]] } + ) + ch_versions = ch_versions.mix(BCFTOOLS_STATS_TRUTH.out.versions) + ch_multiqc_files = ch_multiqc_files.mix(BCFTOOLS_STATS_TRUTH.out.stats.map{ [it[1]] }) + + // Compute concordance analysis + VCF_CONCORDANCE_GLIMPSE2( + ch_input_validate, + SPLIT_TRUTH.out.vcf_tbi, + ch_panel_sites, + ch_region + ) + ch_multiqc_files = ch_multiqc_files.mix(VCF_CONCORDANCE_GLIMPSE2.out.multiqc_files) + ch_versions = ch_versions.mix(VCF_CONCORDANCE_GLIMPSE2.out.versions) + } + + // + // Collate and save software versions + // + softwareVersionsToYAML(ch_versions) + .collectFile( + storeDir: "${params.outdir}/pipeline_info", + name: 'nf_core_' + 'pipeline_software_' + 'mqc_' + 'versions.yml', + sort: true, newLine: true + ).set { ch_collated_versions } + + // + // MODULE: MultiQC + // + ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty() + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) + ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) + ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: false)) + ch_multiqc_replace_names = params.multiqc_replace_names ? Channel.fromPath(params.multiqc_replace_names, checkIfExists: true) : Channel.empty() + ch_multiqc_sample_names = params.multiqc_sample_names ? Channel.fromPath(params.multiqc_sample_names, checkIfExists: true) : Channel.empty() + + MULTIQC ( + ch_multiqc_files.collect(), + ch_multiqc_config.toList(), + ch_multiqc_custom_config.toList(), + ch_multiqc_logo.toList(), + ch_multiqc_replace_names.toList(), + ch_multiqc_sample_names.toList() + ) + + emit: + multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [ path(versions.yml) ] +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ diff --git a/workflows/phaseimpute/tests/main.nf.test b/workflows/phaseimpute/tests/main.nf.test new file mode 100644 index 00000000..6d9eeca1 --- /dev/null +++ b/workflows/phaseimpute/tests/main.nf.test @@ -0,0 +1,301 @@ +nextflow_pipeline { + + name "Test phaseimpute workflow" + script "main.nf" + tag "pipeline" + tag "pipeline/phaseimpute" + + + test("Check test") { + tag "test" + config "../../../conf/test.config" + when { + params { + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/' + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + path("${outputDir}/imputation/") + .list() + .collect { getRecursiveFileNames(it, outputDir) } + .flatten(), + path("$outputDir/imputation/glimpse1/concat/all.batch0.glimpse1.vcf.gz").vcf.summary.replaceAll(", phasedAutodetect=(false|true)", ""), + path("$outputDir/imputation/glimpse1/concat/all.batch0.glimpse1.vcf.gz").vcf.header.getGenotypeSamples().sort() + ).match() + } + ) + } + + } + + test("Check test_glimpse2") { + tag "test_glimpse2" + config "../../../conf/test_glimpse2.config" + when { + params { + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/' + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + path("${outputDir}/imputation/") + .list() + .collect { getRecursiveFileNames(it, outputDir) } + .flatten(), + path("$outputDir/imputation/glimpse2/concat/all.batch0.glimpse2.vcf.gz").vcf.summary.replaceAll(", phasedAutodetect=(false|true)", ""), + path("$outputDir/imputation/glimpse2/concat/all.batch0.glimpse2.vcf.gz").vcf.header.getGenotypeSamples().sort() + ).match() + } + ) + } + } + + test("Check test_quilt") { + tag "test_quilt" + config "../../../conf/test_quilt.config" + when { + params { + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/' + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + path("${outputDir}/imputation/") + .list() + .collect { getRecursiveFileNames(it, outputDir) } + .flatten(), + path("$outputDir/imputation/quilt/concat/all.batch0.quilt.vcf.gz").vcf.summary.replaceAll(", phasedAutodetect=(false|true)", ""), + path("$outputDir/imputation/quilt/concat/all.batch0.quilt.vcf.gz").vcf.header.getGenotypeSamples().sort() + ).match() + } + ) + } + + } + + test("Check test_stitch") { + tag "test_stitch" + config "../../../conf/test_stitch.config" + when { + params { + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/' + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + path("${outputDir}/imputation/") + .list() + .collect { getRecursiveFileNames(it, outputDir) } + .flatten(), + path("$outputDir/imputation/stitch/concat/all.batch0.stitch.vcf.gz").vcf.summary.replaceAll(", phasedAutodetect=(false|true)", ""), + path("$outputDir/imputation/stitch/concat/all.batch0.stitch.vcf.gz").vcf.header.getGenotypeSamples().sort() + ).match() + } + ) + } + + } + + test("Check test_sim") { + tag "test_sim" + config "../../../conf/test_sim.config" + when { + params { + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/' + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + path("${outputDir}/simulation/") + .list() + .collect { getRecursiveFileNames(it, outputDir) } + .flatten(), + ).match() } + ) + } + } + + test("Check test_validate") { + tag "test_validate" + config "../../../conf/test_validate.config" + when { + params { + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/' + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + path("${outputDir}/validation/") + .list() + .collect { getRecursiveFileNames(it, outputDir) } + .flatten() + ).match() } + ) + } + + } + + test("Check test_all") { + tag "test_all" + config "../../../conf/test_all.config" + when { + params { + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/' + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + path("${outputDir}/simulation") + .list() + .collect { getRecursiveFileNames(it, outputDir) } + .flatten(), + path("${outputDir}/imputation") + .list() + .collect { getRecursiveFileNames(it, outputDir) } + .flatten(), + path("${outputDir}/prep_panel") + .list() + .collect { getRecursiveFileNames(it, outputDir) } + .flatten(), + path("${outputDir}/validation") + .list() + .collect { getRecursiveFileNames(it, outputDir) } + .flatten(), + path("${outputDir}/imputation/glimpse1/concat/all.batch0.glimpse1.vcf.gz").vcf.summary.replaceAll(", phasedAutodetect=(false|true)", ""), + path("${outputDir}/imputation/glimpse1/concat/all.batch0.glimpse1.vcf.gz").vcf.header.getGenotypeSamples().sort(), + path("${outputDir}/imputation/glimpse2/concat/all.batch0.glimpse2.vcf.gz").vcf.summary.replaceAll(", phasedAutodetect=(false|true)", ""), + path("${outputDir}/imputation/glimpse2/concat/all.batch0.glimpse2.vcf.gz").vcf.header.getGenotypeSamples().sort(), + path("${outputDir}/imputation/stitch/concat/all.batch0.stitch.vcf.gz").vcf.summary.replaceAll(", phasedAutodetect=(false|true)", ""), + path("${outputDir}/imputation/stitch/concat/all.batch0.stitch.vcf.gz").vcf.header.getGenotypeSamples().sort(), + path("${outputDir}/imputation/quilt/concat/all.batch0.quilt.vcf.gz").vcf.summary.replaceAll(", phasedAutodetect=(false|true)", ""), + path("${outputDir}/imputation/quilt/concat/all.batch0.quilt.vcf.gz").vcf.header.getGenotypeSamples().sort() + ).match() + } + ) + } + } + + test("Check test_dog") { + tag "test_dog" + config "../../../conf/test_dog.config" + when { + params { + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/' + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + path("${outputDir}/prep_panel") + .list() + .collect { getRecursiveFileNames(it, outputDir) } + .flatten(), + path("${outputDir}/imputation") + .list() + .collect { getRecursiveFileNames(it, outputDir) } + .flatten(), + ).match() + } + ) + } + } + + test("Check test_panelprep") { + tag "test_panelprep" + config "../../../conf/test_panelprep.config" + when { + params { + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/' + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + path("${outputDir}/prep_panel") + .list() + .collect { getRecursiveFileNames(it, outputDir) } + .flatten(), + ).match() + } + ) + } + } + + test("Check test_batch") { + tag "test_batch" + config "../../../conf/test_batch.config" + when { + params { + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/' + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + path("${outputDir}/imputation/") + .list() + .collect { getRecursiveFileNames(it, outputDir) } + .flatten(), + path("${outputDir}/imputation/glimpse2/concat/all.batch0.glimpse2.vcf.gz").vcf.summary.replaceAll(", phasedAutodetect=(false|true)", ""), + path("${outputDir}/imputation/glimpse2/concat/all.batch0.glimpse2.vcf.gz").vcf.header.getGenotypeSamples().sort(), + path("${outputDir}/imputation/quilt/concat/all.batch0.quilt.vcf.gz").vcf.summary.replaceAll(", phasedAutodetect=(false|true)", ""), + path("${outputDir}/imputation/quilt/concat/all.batch0.quilt.vcf.gz").vcf.header.getGenotypeSamples().sort(), + path("${outputDir}/imputation/glimpse2/concat/all.batch1.glimpse2.vcf.gz").vcf.summary.replaceAll(", phasedAutodetect=(false|true)", ""), + path("${outputDir}/imputation/glimpse2/concat/all.batch1.glimpse2.vcf.gz").vcf.header.getGenotypeSamples().sort(), + path("${outputDir}/imputation/quilt/concat/all.batch1.quilt.vcf.gz").vcf.summary.replaceAll(", phasedAutodetect=(false|true)", ""), + path("${outputDir}/imputation/quilt/concat/all.batch1.quilt.vcf.gz").vcf.header.getGenotypeSamples().sort(), + path("${outputDir}/validation/") + .list() + .collect { getRecursiveFileNames(it, outputDir) } + .flatten() + ).match() + } + ) + } + } +} + +def getRecursiveFileNames(fileOrDir, outputDir) { + if(file(fileOrDir.toString()).isDirectory()) { + return fileOrDir.list().collect { getRecursiveFileNames(it, outputDir) } + } + return fileOrDir.toString().replace("${outputDir}/", "") +} diff --git a/workflows/phaseimpute/tests/main.nf.test.snap b/workflows/phaseimpute/tests/main.nf.test.snap new file mode 100644 index 00000000..43ece590 --- /dev/null +++ b/workflows/phaseimpute/tests/main.nf.test.snap @@ -0,0 +1,471 @@ +{ + "Check test_stitch": { + "content": [ + [ + "imputation/batch/all.batch0.id.txt", + "imputation/batch/all.batch0.noid.txt", + "imputation/csv/impute.csv", + "imputation/stats/NA12878.stitch.bcftools_stats.txt", + "imputation/stats/NA19401.stitch.bcftools_stats.txt", + "imputation/stats/NA20359.stitch.bcftools_stats.txt", + "imputation/stitch/concat/all.batch0.stitch.vcf.gz", + "imputation/stitch/concat/all.batch0.stitch.vcf.gz.tbi", + "imputation/stitch/samples/NA12878.stitch.vcf.gz", + "imputation/stitch/samples/NA12878.stitch.vcf.gz.tbi", + "imputation/stitch/samples/NA19401.stitch.vcf.gz", + "imputation/stitch/samples/NA19401.stitch.vcf.gz.tbi", + "imputation/stitch/samples/NA20359.stitch.vcf.gz", + "imputation/stitch/samples/NA20359.stitch.vcf.gz.tbi" + ], + "VcfFile [chromosomes=[chr21, chr22], sampleCount=3, variantCount=1739, phased=false]", + [ + "NA12878", + "NA19401", + "NA20359" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-27T18:17:15.103980096" + }, + "Check test_all": { + "content": [ + [ + "simulation/csv/simulate.csv", + "simulation/samples/NA12878.depth_1x.bam", + "simulation/samples/NA12878.depth_1x.bam.csi", + "simulation/samples/NA19401.depth_1x.bam", + "simulation/samples/NA19401.depth_1x.bam.csi", + "simulation/samples/NA20359.depth_1x.bam", + "simulation/samples/NA20359.depth_1x.bam.csi" + ], + [ + "imputation/batch/all.batch0.id.txt", + "imputation/batch/all.batch0.noid.txt", + "imputation/csv/impute.csv", + "imputation/glimpse1/concat/all.batch0.glimpse1.vcf.gz", + "imputation/glimpse1/concat/all.batch0.glimpse1.vcf.gz.tbi", + "imputation/glimpse1/samples/NA12878.glimpse1.vcf.gz", + "imputation/glimpse1/samples/NA12878.glimpse1.vcf.gz.tbi", + "imputation/glimpse1/samples/NA19401.glimpse1.vcf.gz", + "imputation/glimpse1/samples/NA19401.glimpse1.vcf.gz.tbi", + "imputation/glimpse1/samples/NA20359.glimpse1.vcf.gz", + "imputation/glimpse1/samples/NA20359.glimpse1.vcf.gz.tbi", + "imputation/glimpse2/concat/all.batch0.glimpse2.vcf.gz", + "imputation/glimpse2/concat/all.batch0.glimpse2.vcf.gz.tbi", + "imputation/glimpse2/samples/NA12878.glimpse2.vcf.gz", + "imputation/glimpse2/samples/NA12878.glimpse2.vcf.gz.tbi", + "imputation/glimpse2/samples/NA19401.glimpse2.vcf.gz", + "imputation/glimpse2/samples/NA19401.glimpse2.vcf.gz.tbi", + "imputation/glimpse2/samples/NA20359.glimpse2.vcf.gz", + "imputation/glimpse2/samples/NA20359.glimpse2.vcf.gz.tbi", + "imputation/quilt/concat/all.batch0.quilt.vcf.gz", + "imputation/quilt/concat/all.batch0.quilt.vcf.gz.tbi", + "imputation/quilt/samples/NA12878.quilt.vcf.gz", + "imputation/quilt/samples/NA12878.quilt.vcf.gz.tbi", + "imputation/quilt/samples/NA19401.quilt.vcf.gz", + "imputation/quilt/samples/NA19401.quilt.vcf.gz.tbi", + "imputation/quilt/samples/NA20359.quilt.vcf.gz", + "imputation/quilt/samples/NA20359.quilt.vcf.gz.tbi", + "imputation/stats/NA12878.glimpse1.bcftools_stats.txt", + "imputation/stats/NA12878.glimpse2.bcftools_stats.txt", + "imputation/stats/NA12878.quilt.bcftools_stats.txt", + "imputation/stats/NA12878.stitch.bcftools_stats.txt", + "imputation/stats/NA19401.glimpse1.bcftools_stats.txt", + "imputation/stats/NA19401.glimpse2.bcftools_stats.txt", + "imputation/stats/NA19401.quilt.bcftools_stats.txt", + "imputation/stats/NA19401.stitch.bcftools_stats.txt", + "imputation/stats/NA20359.glimpse1.bcftools_stats.txt", + "imputation/stats/NA20359.glimpse2.bcftools_stats.txt", + "imputation/stats/NA20359.quilt.bcftools_stats.txt", + "imputation/stats/NA20359.stitch.bcftools_stats.txt", + "imputation/stitch/concat/all.batch0.stitch.vcf.gz", + "imputation/stitch/concat/all.batch0.stitch.vcf.gz.tbi", + "imputation/stitch/samples/NA12878.stitch.vcf.gz", + "imputation/stitch/samples/NA12878.stitch.vcf.gz.tbi", + "imputation/stitch/samples/NA19401.stitch.vcf.gz", + "imputation/stitch/samples/NA19401.stitch.vcf.gz.tbi", + "imputation/stitch/samples/NA20359.stitch.vcf.gz", + "imputation/stitch/samples/NA20359.stitch.vcf.gz.tbi" + ], + [ + "prep_panel/chunks/glimpse1/1000GP_chr21_chunks_glimpse1.txt", + "prep_panel/chunks/glimpse1/1000GP_chr22_chunks_glimpse1.txt", + "prep_panel/chunks/glimpse2/1000GP_chr21_chunks_glimpse2.txt", + "prep_panel/chunks/glimpse2/1000GP_chr22_chunks_glimpse2.txt", + "prep_panel/csv/chunks.csv", + "prep_panel/csv/panel.csv", + "prep_panel/csv/posfile.csv", + "prep_panel/haplegend/1000GP_chr21.hap.gz", + "prep_panel/haplegend/1000GP_chr21.legend.gz", + "prep_panel/haplegend/1000GP_chr21.samples", + "prep_panel/haplegend/1000GP_chr22.hap.gz", + "prep_panel/haplegend/1000GP_chr22.legend.gz", + "prep_panel/haplegend/1000GP_chr22.samples", + "prep_panel/panel/1000GP_chr21_phased.vcf.gz", + "prep_panel/panel/1000GP_chr21_phased.vcf.gz.csi", + "prep_panel/panel/1000GP_chr22_phased.vcf.gz", + "prep_panel/panel/1000GP_chr22_phased.vcf.gz.csi", + "prep_panel/sites/1000GP_chr21_glimpse1_sites.vcf.gz", + "prep_panel/sites/1000GP_chr21_glimpse1_sites.vcf.gz.tbi", + "prep_panel/sites/1000GP_chr22_glimpse1_sites.vcf.gz", + "prep_panel/sites/1000GP_chr22_glimpse1_sites.vcf.gz.tbi", + "prep_panel/stats/1000GP.panel.bcftools_stats.txt" + ], + [ + "validation/concat/all.batch0.truth.vcf.gz", + "validation/concat/all.batch0.truth.vcf.gz.tbi", + "validation/samples/NA12878.truth.vcf.gz", + "validation/samples/NA12878.truth.vcf.gz.tbi", + "validation/samples/NA19401.truth.vcf.gz", + "validation/samples/NA19401.truth.vcf.gz.tbi", + "validation/samples/NA20359.truth.vcf.gz", + "validation/samples/NA20359.truth.vcf.gz.tbi", + "validation/stats/AllSamples.txt", + "validation/stats/NA12878.truth.bcftools_stats.txt", + "validation/stats/NA12878_P1000GP_Tglimpse1_SNP.txt", + "validation/stats/NA12878_P1000GP_Tglimpse2_SNP.txt", + "validation/stats/NA12878_P1000GP_Tquilt_SNP.txt", + "validation/stats/NA12878_P1000GP_Tstitch_SNP.txt", + "validation/stats/NA19401.truth.bcftools_stats.txt", + "validation/stats/NA19401_P1000GP_Tglimpse1_SNP.txt", + "validation/stats/NA19401_P1000GP_Tglimpse2_SNP.txt", + "validation/stats/NA19401_P1000GP_Tquilt_SNP.txt", + "validation/stats/NA19401_P1000GP_Tstitch_SNP.txt", + "validation/stats/NA20359.truth.bcftools_stats.txt", + "validation/stats/NA20359_P1000GP_Tglimpse1_SNP.txt", + "validation/stats/NA20359_P1000GP_Tglimpse2_SNP.txt", + "validation/stats/NA20359_P1000GP_Tquilt_SNP.txt", + "validation/stats/NA20359_P1000GP_Tstitch_SNP.txt" + ], + "VcfFile [chromosomes=[chr21, chr22], sampleCount=3, variantCount=1739, phased=false]", + [ + "NA12878", + "NA19401", + "NA20359" + ], + "VcfFile [chromosomes=[chr21, chr22], sampleCount=3, variantCount=1739, phased=true]", + [ + "NA12878", + "NA19401", + "NA20359" + ], + "VcfFile [chromosomes=[chr21, chr22], sampleCount=3, variantCount=1739, phased=false]", + [ + "NA12878", + "NA19401", + "NA20359" + ], + "VcfFile [chromosomes=[chr21, chr22], sampleCount=3, variantCount=1739, phased=true]", + [ + "NA12878", + "NA19401", + "NA20359" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-27T18:23:38.85114929" + }, + "Check test_validate": { + "content": [ + [ + "validation/samples/NA12878.truth.vcf.gz", + "validation/samples/NA12878.truth.vcf.gz.tbi", + "validation/samples/NA19401.truth.vcf.gz", + "validation/samples/NA19401.truth.vcf.gz.tbi", + "validation/samples/NA20359.truth.vcf.gz", + "validation/samples/NA20359.truth.vcf.gz.tbi", + "validation/stats/AllSamples.txt", + "validation/stats/NA12878.truth.bcftools_stats.txt", + "validation/stats/NA12878_SNP.txt", + "validation/stats/NA19401.truth.bcftools_stats.txt", + "validation/stats/NA19401_SNP.txt", + "validation/stats/NA20359.truth.bcftools_stats.txt", + "validation/stats/NA20359_SNP.txt" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-22T13:54:09.435194577" + }, + "Check test_batch": { + "content": [ + [ + "imputation/batch/all.batch0.id.txt", + "imputation/batch/all.batch0.noid.txt", + "imputation/batch/all.batch1.id.txt", + "imputation/batch/all.batch1.noid.txt", + "imputation/csv/impute.csv", + "imputation/glimpse2/concat/all.batch0.glimpse2.vcf.gz", + "imputation/glimpse2/concat/all.batch0.glimpse2.vcf.gz.tbi", + "imputation/glimpse2/concat/all.batch1.glimpse2.vcf.gz", + "imputation/glimpse2/concat/all.batch1.glimpse2.vcf.gz.tbi", + "imputation/glimpse2/samples/NA12878.glimpse2.vcf.gz", + "imputation/glimpse2/samples/NA12878.glimpse2.vcf.gz.tbi", + "imputation/glimpse2/samples/NA19401.glimpse2.vcf.gz", + "imputation/glimpse2/samples/NA19401.glimpse2.vcf.gz.tbi", + "imputation/glimpse2/samples/NA20359.glimpse2.vcf.gz", + "imputation/glimpse2/samples/NA20359.glimpse2.vcf.gz.tbi", + "imputation/quilt/concat/all.batch0.quilt.vcf.gz", + "imputation/quilt/concat/all.batch0.quilt.vcf.gz.tbi", + "imputation/quilt/concat/all.batch1.quilt.vcf.gz", + "imputation/quilt/concat/all.batch1.quilt.vcf.gz.tbi", + "imputation/quilt/samples/NA12878.quilt.vcf.gz", + "imputation/quilt/samples/NA12878.quilt.vcf.gz.tbi", + "imputation/quilt/samples/NA19401.quilt.vcf.gz", + "imputation/quilt/samples/NA19401.quilt.vcf.gz.tbi", + "imputation/quilt/samples/NA20359.quilt.vcf.gz", + "imputation/quilt/samples/NA20359.quilt.vcf.gz.tbi", + "imputation/stats/NA12878.glimpse2.bcftools_stats.txt", + "imputation/stats/NA12878.quilt.bcftools_stats.txt", + "imputation/stats/NA19401.glimpse2.bcftools_stats.txt", + "imputation/stats/NA19401.quilt.bcftools_stats.txt", + "imputation/stats/NA20359.glimpse2.bcftools_stats.txt", + "imputation/stats/NA20359.quilt.bcftools_stats.txt" + ], + "VcfFile [chromosomes=[chr21, chr22], sampleCount=2, variantCount=1739, phased=true]", + [ + "NA12878", + "NA19401" + ], + "VcfFile [chromosomes=[chr21, chr22], sampleCount=2, variantCount=1739, phased=true]", + [ + "NA12878", + "NA19401" + ], + "VcfFile [chromosomes=[chr21, chr22], sampleCount=1, variantCount=1739, phased=true]", + [ + "NA20359" + ], + "VcfFile [chromosomes=[chr21, chr22], sampleCount=1, variantCount=1739, phased=true]", + [ + "NA20359" + ], + [ + "validation/samples/NA12878.truth.vcf.gz", + "validation/samples/NA12878.truth.vcf.gz.tbi", + "validation/samples/NA19401.truth.vcf.gz", + "validation/samples/NA19401.truth.vcf.gz.tbi", + "validation/samples/NA20359.truth.vcf.gz", + "validation/samples/NA20359.truth.vcf.gz.tbi", + "validation/stats/AllSamples.txt", + "validation/stats/NA12878.truth.bcftools_stats.txt", + "validation/stats/NA12878_P1000GP_Tglimpse2_SNP.txt", + "validation/stats/NA12878_P1000GP_Tquilt_SNP.txt", + "validation/stats/NA19401.truth.bcftools_stats.txt", + "validation/stats/NA19401_P1000GP_Tglimpse2_SNP.txt", + "validation/stats/NA19401_P1000GP_Tquilt_SNP.txt", + "validation/stats/NA20359.truth.bcftools_stats.txt", + "validation/stats/NA20359_P1000GP_Tglimpse2_SNP.txt", + "validation/stats/NA20359_P1000GP_Tquilt_SNP.txt" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-27T18:30:51.51345352" + }, + "Check test_quilt": { + "content": [ + [ + "imputation/batch/all.batch0.id.txt", + "imputation/batch/all.batch0.noid.txt", + "imputation/csv/impute.csv", + "imputation/quilt/concat/all.batch0.quilt.vcf.gz", + "imputation/quilt/concat/all.batch0.quilt.vcf.gz.tbi", + "imputation/quilt/samples/NA12878.quilt.vcf.gz", + "imputation/quilt/samples/NA12878.quilt.vcf.gz.tbi", + "imputation/quilt/samples/NA19401.quilt.vcf.gz", + "imputation/quilt/samples/NA19401.quilt.vcf.gz.tbi", + "imputation/quilt/samples/NA20359.quilt.vcf.gz", + "imputation/quilt/samples/NA20359.quilt.vcf.gz.tbi", + "imputation/stats/NA12878.quilt.bcftools_stats.txt", + "imputation/stats/NA19401.quilt.bcftools_stats.txt", + "imputation/stats/NA20359.quilt.bcftools_stats.txt" + ], + "VcfFile [chromosomes=[chr21, chr22], sampleCount=3, variantCount=1739, phased=true]", + [ + "NA12878", + "NA19401", + "NA20359" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-27T18:15:27.194211063" + }, + "Check test_sim": { + "content": [ + [ + "simulation/csv/simulate.csv", + "simulation/samples/NA12878.depth_1x.bam", + "simulation/samples/NA12878.depth_1x.bam.csi", + "simulation/samples/NA19401.depth_1x.bam", + "simulation/samples/NA19401.depth_1x.bam.csi", + "simulation/samples/NA20359.depth_1x.bam", + "simulation/samples/NA20359.depth_1x.bam.csi" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-17T21:35:03.693487008" + }, + "Check test_panelprep": { + "content": [ + [ + "prep_panel/chunks/glimpse1/1000GP_chr21_chunks_glimpse1.txt", + "prep_panel/chunks/glimpse1/1000GP_chr22_chunks_glimpse1.txt", + "prep_panel/chunks/glimpse2/1000GP_chr21_chunks_glimpse2.txt", + "prep_panel/chunks/glimpse2/1000GP_chr22_chunks_glimpse2.txt", + "prep_panel/csv/chunks.csv", + "prep_panel/csv/panel.csv", + "prep_panel/csv/posfile.csv", + "prep_panel/haplegend/1000GP_chr21.hap.gz", + "prep_panel/haplegend/1000GP_chr21.legend.gz", + "prep_panel/haplegend/1000GP_chr21.samples", + "prep_panel/haplegend/1000GP_chr22.hap.gz", + "prep_panel/haplegend/1000GP_chr22.legend.gz", + "prep_panel/haplegend/1000GP_chr22.samples", + "prep_panel/panel/1000GP_chr21_phased.vcf.gz", + "prep_panel/panel/1000GP_chr21_phased.vcf.gz.csi", + "prep_panel/panel/1000GP_chr22_phased.vcf.gz", + "prep_panel/panel/1000GP_chr22_phased.vcf.gz.csi", + "prep_panel/sites/1000GP_chr21_glimpse1_sites.vcf.gz", + "prep_panel/sites/1000GP_chr21_glimpse1_sites.vcf.gz.tbi", + "prep_panel/sites/1000GP_chr22_glimpse1_sites.vcf.gz", + "prep_panel/sites/1000GP_chr22_glimpse1_sites.vcf.gz.tbi" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-14T19:19:50.07758554" + }, + "Check test_glimpse2": { + "content": [ + [ + "imputation/batch/all.batch0.id.txt", + "imputation/batch/all.batch0.noid.txt", + "imputation/csv/impute.csv", + "imputation/glimpse2/concat/all.batch0.glimpse2.vcf.gz", + "imputation/glimpse2/concat/all.batch0.glimpse2.vcf.gz.tbi", + "imputation/glimpse2/samples/NA12878.glimpse2.vcf.gz", + "imputation/glimpse2/samples/NA12878.glimpse2.vcf.gz.tbi", + "imputation/glimpse2/samples/NA19401.glimpse2.vcf.gz", + "imputation/glimpse2/samples/NA19401.glimpse2.vcf.gz.tbi", + "imputation/glimpse2/samples/NA20359.glimpse2.vcf.gz", + "imputation/glimpse2/samples/NA20359.glimpse2.vcf.gz.tbi", + "imputation/stats/NA12878.glimpse2.bcftools_stats.txt", + "imputation/stats/NA19401.glimpse2.bcftools_stats.txt", + "imputation/stats/NA20359.glimpse2.bcftools_stats.txt" + ], + "VcfFile [chromosomes=[chr21, chr22], sampleCount=3, variantCount=1739, phased=true]", + [ + "NA12878", + "NA19401", + "NA20359" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-27T18:13:04.376764506" + }, + "Check test": { + "content": [ + [ + "imputation/batch/all.batch0.id.txt", + "imputation/batch/all.batch0.noid.txt", + "imputation/csv/impute.csv", + "imputation/glimpse1/concat/all.batch0.glimpse1.vcf.gz", + "imputation/glimpse1/concat/all.batch0.glimpse1.vcf.gz.tbi", + "imputation/glimpse1/samples/NA12878.glimpse1.vcf.gz", + "imputation/glimpse1/samples/NA12878.glimpse1.vcf.gz.tbi", + "imputation/glimpse1/samples/NA19401.glimpse1.vcf.gz", + "imputation/glimpse1/samples/NA19401.glimpse1.vcf.gz.tbi", + "imputation/glimpse1/samples/NA20359.glimpse1.vcf.gz", + "imputation/glimpse1/samples/NA20359.glimpse1.vcf.gz.tbi", + "imputation/stats/NA12878.glimpse1.bcftools_stats.txt", + "imputation/stats/NA19401.glimpse1.bcftools_stats.txt", + "imputation/stats/NA20359.glimpse1.bcftools_stats.txt" + ], + "VcfFile [chromosomes=[chr21, chr22], sampleCount=3, variantCount=1739, phased=false]", + [ + "NA12878", + "NA19401", + "NA20359" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-27T18:11:44.567996758" + }, + "Check test_dog": { + "content": [ + [ + "prep_panel/chunks/glimpse1/658Dog_chr21_chunks_glimpse1.txt", + "prep_panel/chunks/glimpse1/658Dog_chr22_chunks_glimpse1.txt", + "prep_panel/chunks/glimpse2/658Dog_chr21_chunks_glimpse2.txt", + "prep_panel/chunks/glimpse2/658Dog_chr22_chunks_glimpse2.txt", + "prep_panel/csv/chunks.csv", + "prep_panel/csv/panel.csv", + "prep_panel/csv/posfile.csv", + "prep_panel/haplegend/658Dog_chr21.hap.gz", + "prep_panel/haplegend/658Dog_chr21.legend.gz", + "prep_panel/haplegend/658Dog_chr21.samples", + "prep_panel/haplegend/658Dog_chr22.hap.gz", + "prep_panel/haplegend/658Dog_chr22.legend.gz", + "prep_panel/haplegend/658Dog_chr22.samples", + "prep_panel/panel/658Dog_chr21_phased.vcf.gz", + "prep_panel/panel/658Dog_chr21_phased.vcf.gz.csi", + "prep_panel/panel/658Dog_chr22_phased.vcf.gz", + "prep_panel/panel/658Dog_chr22_phased.vcf.gz.csi", + "prep_panel/sites/658Dog_chr21_glimpse1_sites.vcf.gz", + "prep_panel/sites/658Dog_chr21_glimpse1_sites.vcf.gz.tbi", + "prep_panel/sites/658Dog_chr22_glimpse1_sites.vcf.gz", + "prep_panel/sites/658Dog_chr22_glimpse1_sites.vcf.gz.tbi" + ], + [ + "imputation/batch/all.batch0.id.txt", + "imputation/batch/all.batch0.noid.txt", + "imputation/csv/impute.csv", + "imputation/glimpse1/concat/dog_1735.batch0.glimpse1.vcf.gz", + "imputation/glimpse1/concat/dog_1735.batch0.glimpse1.vcf.gz.tbi", + "imputation/glimpse1/samples/dog_1735.glimpse1.vcf.gz", + "imputation/glimpse1/samples/dog_1735.glimpse1.vcf.gz.tbi", + "imputation/glimpse2/concat/all.batch0.glimpse2.vcf.gz", + "imputation/glimpse2/concat/all.batch0.glimpse2.vcf.gz.tbi", + "imputation/glimpse2/samples/dog_1735.glimpse2.vcf.gz", + "imputation/glimpse2/samples/dog_1735.glimpse2.vcf.gz.tbi", + "imputation/quilt/concat/all.batch0.quilt.vcf.gz", + "imputation/quilt/concat/all.batch0.quilt.vcf.gz.tbi", + "imputation/quilt/samples/1735.quilt.vcf.gz", + "imputation/quilt/samples/1735.quilt.vcf.gz.tbi", + "imputation/stats/1735.quilt.bcftools_stats.txt", + "imputation/stats/dog_1735.glimpse1.bcftools_stats.txt", + "imputation/stats/dog_1735.glimpse2.bcftools_stats.txt" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-27T18:25:56.980470563" + } +} \ No newline at end of file