diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 0b3a5f4..75f3f83 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -3,9 +3,12 @@ name: Tests on: push: branches: - - master + - main + - dev pull_request: branches-ignore: [] + schedule: + - cron: '0 0 * * SUN' jobs: build-linux: @@ -13,9 +16,10 @@ jobs: strategy: max-parallel: 5 matrix: - python: [3.7,3.8,3.9] + python: [3.7, 3.8, 3.9] fail-fast: false + steps: - name: install graphviz @@ -34,12 +38,10 @@ jobs: run: | # $CONDA is an environment variable pointing to the root of the miniconda directory echo $CONDA/bin >> $GITHUB_PATH - conda update ruamel_yaml - name: conda run: | - conda install -c conda-forge --quiet mamba python=${{ matrix.python }} - mamba install -c bioconda -c conda-forge --quiet -y bowtie samtools bamtools pigz bedtools + conda install -c conda-forge -c bioconda -y python=${{ matrix.python }} 'bowtie>=1.3.0' samtools bamtools pigz bedtools - name: Install dependencies run: | @@ -48,6 +50,7 @@ jobs: - name: install package itself run: | pip install . + pip install requests --upgrade - name: testing run: | diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml index a13767b..18a194d 100644 --- a/.github/workflows/pypi.yml +++ b/.github/workflows/pypi.yml @@ -8,9 +8,9 @@ on: jobs: build-n-publish: name: Build and publish to PyPI and TestPyPI - runs-on: ubuntu-18.04 + runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@master + - uses: actions/checkout@main - name: Set up Python 3.7 uses: actions/setup-python@v1 with: @@ -26,14 +26,14 @@ jobs: python setup.py sdist - name: Publish distribution to Test PyPI - uses: pypa/gh-action-pypi-publish@master + uses: pypa/gh-action-pypi-publish@release/v1 with: user: __token__ password: ${{ secrets.TEST_PYPI_API_TOKEN }} repository_url: https://test.pypi.org/legacy/ - name: Publish distribution to PyPI if: startsWith(github.ref, 'refs/tags') - uses: pypa/gh-action-pypi-publish@master + uses: pypa/gh-action-pypi-publish@release/v1 with: user: __token__ password: ${{ secrets.PYPI_API_TOKEN }} diff --git a/README.rst b/README.rst index 6f0d89d..0a54f22 100644 --- a/README.rst +++ b/README.rst @@ -62,8 +62,10 @@ Requirements This pipelines requires the following executable(s): -- bowtie1 +- bowtie1 >= 1.3.0 +- bedtools - samtools +- bamtools - pigz .. image:: https://raw.githubusercontent.com/sequana/ribofinder/master/sequana_pipelines/ribofinder/dag.png @@ -102,6 +104,7 @@ Changelog ========= ==================================================================== Version Description ========= ==================================================================== +0.12.0 * set singularity containers 0.11.1 * Fix config file (removing hard-coded path) 0.11.0 * Fix multiqc plot using same fix as in sequna_rnaseq pipelines * add utility plot to check rate of ribosomal per sequence and also diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..bbb7fbc --- /dev/null +++ b/environment.yml @@ -0,0 +1,18 @@ +name: sequana_variant_calling + +channels: + - conda-forge + - bioconda + - defaults + +dependencies: + - bowtie>=1.3.0 + - samtools + - bamtools + - pigz + - bedtools + - pip + - pip: + - sequana + + diff --git a/requirements.txt b/requirements.txt index bf9e35a..7d6b785 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ -sequana>=0.12.7 -sequana_pipetools>=0.7.1 +sequana>=0.14.1 +sequana_pipetools>=0.9.2 diff --git a/sequana_pipelines/ribofinder/main.py b/sequana_pipelines/ribofinder/main.py index 6cb5b6e..ad9ab2d 100755 --- a/sequana_pipelines/ribofinder/main.py +++ b/sequana_pipelines/ribofinder/main.py @@ -1,12 +1,8 @@ -# -*- coding: utf-8 -*- # # This file is part of Sequana software # # Copyright (c) 2016 - Sequana Development Team # -# File author(s): -# Thomas Cokelaer -# # Distributed under the terms of the 3-clause BSD license. # The full license is in the LICENSE file, distributed with this software. # @@ -17,7 +13,6 @@ import sys import os import argparse -import shutil import subprocess from sequana_pipetools.options import * diff --git a/sequana_pipelines/ribofinder/requirements.txt b/sequana_pipelines/ribofinder/requirements.txt index dddf2a1..e5c02cc 100644 --- a/sequana_pipelines/ribofinder/requirements.txt +++ b/sequana_pipelines/ribofinder/requirements.txt @@ -1,3 +1,5 @@ bowtie samtools +bedtools +bamtools pigz diff --git a/sequana_pipelines/ribofinder/ribofinder.rules b/sequana_pipelines/ribofinder/ribofinder.rules index 8171ee6..443ccad 100644 --- a/sequana_pipelines/ribofinder/ribofinder.rules +++ b/sequana_pipelines/ribofinder/ribofinder.rules @@ -12,13 +12,10 @@ # Documentation: https://github.com/sequana/fastqc/README.rst ############################################################################## """Ribofinder pipeline""" -import glob -import os import shutil -from os.path import join +import os from collections import Counter -import sequana from sequana_pipetools import PipelineManager from sequana_pipetools import snaketools as sm @@ -29,86 +26,107 @@ from sequana.gff3 import GFF3 configfile: "config.yaml" manager = PipelineManager("ribofinder", config) -manager.setup(globals(), mode="warning") -__data__input = manager.getrawdata() - rule pipeline: - input: + input: ".sequana/rulegraph.svg", "outputs/proportions.png", "outputs/RPKM.png", "multiqc/multiqc_report.html" -__fasta_file__ = config['general']['reference_file'] -if config['general']['genbank_file']: - __annot_file__ = config['general']['genbank_file'] -elif config['general']['gff_file']: - __annot_file__ = config['general']['gff_file'] -elif config['general']['rRNA_file']: - __annot_file__ = config['general']['rRNA_file'] -__prefix_name__ = "indexing/features" +def get_annot_file(): + if config['general']['genbank_file']: + return config['general']['genbank_file'] + elif config['general']['gff_file']: + return config['general']['gff_file'] + elif config['general']['rRNA_file']: + return config['general']['rRNA_file'] + raise ValueError("You must provide a genbank of gff or file with rRNA sequences.") + + +input_fasta = config['general']['reference_file'] if manager.config.general.rRNA_file: - __bowtie1_index_rna__fasta = config["general"]["rRNA_file"] - if os.path.exists(__bowtie1_index_rna__fasta) is False: - log.error(f"File {__bowtie1_index_rna__fasta} does not exists. Check your config file") + user_file = config["general"]["rRNA_file"] + if os.path.exists(user_file) is False: + log.error(f"File {user_file} does not exists. Check your config file") sys.exit(1) -elif __annot_file__.endswith(".gbk"): + os.makedirs("build_feature_fasta", exist_ok=True) + shutil.copy(user_file, "build_feature_fasta/feature.fasta") +elif get_annot_file().endswith(".gbk"): # This is for genbank input - __build_feature_fasta__output = "build_feature_fasta/feature.fasta" - for this in [__fasta_file__, __annot_file__]: + for this in [input_fasta, get_annot_file()]: if os.path.exists(this) is False: - raise IOError("File {} not found".format(__fasta_file__)) + raise IOError("File {} not found".format(input_fasta)) + rule build_feature_fasta: input: - fasta = __fasta_file__, - annot = __annot_file__ - output: __build_feature_fasta__output + fasta = input_fasta, + annot = get_annot_file() + output: "build_feature_fasta/feature.fasta" run: from sequana.genbank import GenBank gg = GenBank(input.annot) sequence = gg.extract_fasta(input.fasta, features=['rRNA']) with open(output[0], "w") as fout: fout.write(sequence) - __bowtie1_index_rna__fasta = __build_feature_fasta__output -elif __annot_file__.endswith(".gff") or __annot_file__.endswith(".gff3"): - for this in [__fasta_file__, __annot_file__]: + +elif get_annot_file().endswith(".gff") or get_annot_file().endswith(".gff3"): + for this in [input_fasta, get_annot_file()]: if os.path.exists(this) is False: - raise IOError("File {} not found".format(__fasta_file__)) - # extract rRNA feature from GFF and get corresponding fasta - # and gff. if no match for rRNA, save empty fasta as AAAAAAAAAAA - __build_feature_fasta__output = "build_feature_fasta/feature.fasta" + raise IOError("File {} not found".format(input_fasta)) - __extract_fasta_from_bed__input = __fasta_file__ - __extract_fasta_from_bed__gff = __annot_file__ - __extract_fasta_from_bed__feature = config["general"]["rRNA_feature"] - __extract_fasta_from_bed__output = __build_feature_fasta__output - __extract_fasta_from_bed__output_features = __prefix_name__ + "_rRNA.gff" - __extract_fasta_from_bed__log = "indexing/get_rRNA.log" - # ---------------------------------------------------------------------------- - include: sm.modules["extract_fasta_from_bed"] - __bowtie1_index_rna__fasta = __extract_fasta_from_bed__output + + rule build_feature_fasta: + """ extract rRNA feature from GFF and get corresponding fasta and gff. + + if no match for rRNA, save empty fasta as AAAAAAAAAAA + """ + input: + fasta = input_fasta, + gff = get_annot_file() + output: + fasta = "build_feature_fasta/feature.fasta", + fai = "build_feature_fasta/feature.fasta.fai", + gff = "build_feature_fasta/feature_rRNA.gff" + params: + feature = config['general']['rRNA_feature'] + container: + "https://zenodo.org/record/7031863/files/sequana_tools_0.14.2.img" + shell: + """ + # used to be gawk but awk is more generic. + awk '{{ if ($3=="{params.feature}") print }}' {input.gff} > {output.gff} + if [ -s {output.gff} ] + then + bedtools getfasta -fi {input.fasta} -bed {output.gff} -fo {output.fasta} + else : + echo -e ">empty\\nAAAAAAAAAAAAAA" > {output.fasta} + fi + samtools faidx {output.fasta} + """ -__bowtie1_reference__ = __bowtie1_index_rna__fasta.rsplit(".",1)[0] + "_rRNA.1.ebwt" rule bowtie1_indexing: input: - reference= __bowtie1_index_rna__fasta + reference= "build_feature_fasta/feature.fasta" output: - __bowtie1_reference__ + "build_feature_fasta/feature_rRNA.1.ebwt" log: "indexing/bowtie_rRNA.log" params: options="" - threads: 2 + threads: + 2 + container: + "https://zenodo.org/record/7031863/files/sequana_tools_0.14.2.img" wrapper: "main/wrappers/bowtie1/build" @@ -126,6 +144,8 @@ rule unpigz: input: manager.getrawdata() output: temp("{sample}/unpigz/{sample}.fastq") threads: 4 + container: + "https://zenodo.org/record/7031863/files/sequana_tools_0.14.2.img" shell: """ unpigz -p {threads} -fk --stdout {input[0]} > {output} @@ -137,7 +157,7 @@ rule unpigz: rule bowtie1_mapping_rna: input: fastq="{sample}/unpigz/{sample}.fastq", - index=__bowtie1_reference__ + index="build_feature_fasta/feature_rRNA.1.ebwt" output: bam="{sample}/bowtie1/{sample}.bam", sorted="{sample}/bowtie1/{sample}.sorted.bam" @@ -145,7 +165,10 @@ rule bowtie1_mapping_rna: "{sample}/bowtie1/{sample}.log" params: options="" - threads: config['bowtie1_mapping_rna']['threads'] + threads: + config['bowtie1_mapping_rna']['threads'] + container: + "https://zenodo.org/record/7031863/files/sequana_tools_0.14.2.img" wrapper: "main/wrappers/bowtie1/align" @@ -178,13 +201,13 @@ rule fix_bowtie1_log: rule plotting: input: bam_files=expand("{sample}/bowtie1/{sample}.sorted.bam", sample=manager.samples), - fasta_file=__bowtie1_index_rna__fasta + fasta_file= "build_feature_fasta/feature.fasta" output: png="outputs/proportions.png", rpkm="outputs/RPKM.png" run: import pandas as pd - from sequana import BAM + from sequana import BAM, FastA from pylab import tight_layout, savefig, xlabel, ylabel, clf results = [] @@ -214,7 +237,7 @@ rule plotting: savefig(output['png']) # now, we shown the RPKM (read count normalised by gene length) - from sequana import FastA + f = FastA(input['fasta_file']) df = df.T L = [ f.get_lengths_as_dict()[x] for x in df.index] @@ -235,6 +258,8 @@ rule bam_indexing: "{sample}/bowtie1/{sample}.sorted.bam" output: "{sample}/bowtie1/{sample}.sorted.bam.bai" + container: + "https://zenodo.org/record/7031863/files/sequana_tools_0.14.2.img" shell: """ bamtools index -in {input} diff --git a/setup.py b/setup.py index eda7af5..cd05fb9 100644 --- a/setup.py +++ b/setup.py @@ -1,13 +1,11 @@ -# -*- coding: utf-8 -*- -# License: 3-clause BSD from setuptools import setup, find_namespace_packages from setuptools.command.develop import develop from setuptools.command.install import install import subprocess _MAJOR = 0 -_MINOR = 11 -_MICRO = 1 +_MINOR = 12 +_MICRO = 0 version = '%d.%d.%d' % (_MAJOR, _MINOR, _MICRO) release = '%d.%d' % (_MAJOR, _MINOR) @@ -39,20 +37,6 @@ NAME = "ribofinder" -class Install(install): - def run(self): - cmd = "sequana_completion --name {} --force ".format(NAME) - try: subprocess.run(cmd.split()) - except:pass - install.run(self) - -class Develop(develop): - def run(self): - cmd = "sequana_completion --name {} --force ".format(NAME) - try:subprocess.run(cmd.split()) - except:pass - develop.run(self) - setup( name = "sequana_{}".format(NAME), version = version, @@ -76,8 +60,7 @@ def run(self): # This is recursive include of data files exclude_package_data = {"": ["__pycache__"]}, package_data = { - '': ['*.yaml', "*.rules", "*.json", "requirements.txt", "*png"], - 'sequana_pipelines.ribofinder.data' : ['*.*'], + '': ['*.yaml', "*.rules", "*.json", "requirements.txt", "*png", "*yml", "*smk"] }, zip_safe=False, diff --git a/test/test_main.py b/test/test_main.py index 87f2dc2..e4d73f9 100644 --- a/test/test_main.py +++ b/test/test_main.py @@ -39,7 +39,14 @@ def test_full_rRNA_extract(): cmd += f"--working-directory {wk} --force --reference-file {sharedir}/Lepto.fa --gff-file {sharedir}/Lepto.gff" subprocess.call(cmd.split()) stat = subprocess.call("sh ribofinder.sh".split(), cwd=wk) - assert os.path.exists(wk + "/summary.html") + + + if os.path.exists(wk + "/summary.html"): + pass + else: + with open(f"{wk}/indexing/bowtie_rRNA.log", "r") as fout: + print(fout.read()) + raise IOError def test_version(): cmd = "sequana_ribofinder --version"