Skip to content

Commit

Permalink
Merge pull request #4 from cokelaer/main
Browse files Browse the repository at this point in the history
Update pipeline to set singularity
  • Loading branch information
cokelaer authored Aug 29, 2022
2 parents 6b5c2f5 + d757db5 commit 3bb1305
Show file tree
Hide file tree
Showing 10 changed files with 125 additions and 89 deletions.
13 changes: 8 additions & 5 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,23 @@ name: Tests
on:
push:
branches:
- master
- main
- dev
pull_request:
branches-ignore: []
schedule:
- cron: '0 0 * * SUN'

jobs:
build-linux:
runs-on: ubuntu-latest
strategy:
max-parallel: 5
matrix:
python: [3.7,3.8,3.9]
python: [3.7, 3.8, 3.9]
fail-fast: false


steps:

- name: install graphviz
Expand All @@ -34,12 +38,10 @@ jobs:
run: |
# $CONDA is an environment variable pointing to the root of the miniconda directory
echo $CONDA/bin >> $GITHUB_PATH
conda update ruamel_yaml
- name: conda
run: |
conda install -c conda-forge --quiet mamba python=${{ matrix.python }}
mamba install -c bioconda -c conda-forge --quiet -y bowtie samtools bamtools pigz bedtools
conda install -c conda-forge -c bioconda -y python=${{ matrix.python }} 'bowtie>=1.3.0' samtools bamtools pigz bedtools
- name: Install dependencies
run: |
Expand All @@ -48,6 +50,7 @@ jobs:
- name: install package itself
run: |
pip install .
pip install requests --upgrade
- name: testing
run: |
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/pypi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ on:
jobs:
build-n-publish:
name: Build and publish to PyPI and TestPyPI
runs-on: ubuntu-18.04
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@master
- uses: actions/checkout@main
- name: Set up Python 3.7
uses: actions/setup-python@v1
with:
Expand All @@ -26,14 +26,14 @@ jobs:
python setup.py sdist
- name: Publish distribution to Test PyPI
uses: pypa/gh-action-pypi-publish@master
uses: pypa/gh-action-pypi-publish@release/v1
with:
user: __token__
password: ${{ secrets.TEST_PYPI_API_TOKEN }}
repository_url: https://test.pypi.org/legacy/
- name: Publish distribution to PyPI
if: startsWith(github.ref, 'refs/tags')
uses: pypa/gh-action-pypi-publish@master
uses: pypa/gh-action-pypi-publish@release/v1
with:
user: __token__
password: ${{ secrets.PYPI_API_TOKEN }}
5 changes: 4 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,10 @@ Requirements

This pipelines requires the following executable(s):

- bowtie1
- bowtie1 >= 1.3.0
- bedtools
- samtools
- bamtools
- pigz

.. image:: https://raw.githubusercontent.com/sequana/ribofinder/master/sequana_pipelines/ribofinder/dag.png
Expand Down Expand Up @@ -102,6 +104,7 @@ Changelog
========= ====================================================================
Version Description
========= ====================================================================
0.12.0 * set singularity containers
0.11.1 * Fix config file (removing hard-coded path)
0.11.0 * Fix multiqc plot using same fix as in sequna_rnaseq pipelines
* add utility plot to check rate of ribosomal per sequence and also
Expand Down
18 changes: 18 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
name: sequana_variant_calling

channels:
- conda-forge
- bioconda
- defaults

dependencies:
- bowtie>=1.3.0
- samtools
- bamtools
- pigz
- bedtools
- pip
- pip:
- sequana


4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
sequana>=0.12.7
sequana_pipetools>=0.7.1
sequana>=0.14.1
sequana_pipetools>=0.9.2
5 changes: 0 additions & 5 deletions sequana_pipelines/ribofinder/main.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
# -*- coding: utf-8 -*-
#
# This file is part of Sequana software
#
# Copyright (c) 2016 - Sequana Development Team
#
# File author(s):
# Thomas Cokelaer <thomas.cokelaer@pasteur.fr>
#
# Distributed under the terms of the 3-clause BSD license.
# The full license is in the LICENSE file, distributed with this software.
#
Expand All @@ -17,7 +13,6 @@
import sys
import os
import argparse
import shutil
import subprocess

from sequana_pipetools.options import *
Expand Down
2 changes: 2 additions & 0 deletions sequana_pipelines/ribofinder/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
bowtie
samtools
bedtools
bamtools
pigz
127 changes: 76 additions & 51 deletions sequana_pipelines/ribofinder/ribofinder.rules
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,10 @@
# Documentation: https://github.com/sequana/fastqc/README.rst
##############################################################################
"""Ribofinder pipeline"""
import glob
import os
import shutil
from os.path import join
import os
from collections import Counter

import sequana

from sequana_pipetools import PipelineManager
from sequana_pipetools import snaketools as sm
Expand All @@ -29,86 +26,107 @@ from sequana.gff3 import GFF3
configfile: "config.yaml"

manager = PipelineManager("ribofinder", config)
manager.setup(globals(), mode="warning")



__data__input = manager.getrawdata()

rule pipeline:
input:
input:
".sequana/rulegraph.svg",
"outputs/proportions.png",
"outputs/RPKM.png",
"multiqc/multiqc_report.html"

__fasta_file__ = config['general']['reference_file']
if config['general']['genbank_file']:
__annot_file__ = config['general']['genbank_file']
elif config['general']['gff_file']:
__annot_file__ = config['general']['gff_file']
elif config['general']['rRNA_file']:
__annot_file__ = config['general']['rRNA_file']

__prefix_name__ = "indexing/features"
def get_annot_file():
if config['general']['genbank_file']:
return config['general']['genbank_file']
elif config['general']['gff_file']:
return config['general']['gff_file']
elif config['general']['rRNA_file']:
return config['general']['rRNA_file']
raise ValueError("You must provide a genbank of gff or file with rRNA sequences.")


input_fasta = config['general']['reference_file']


if manager.config.general.rRNA_file:
__bowtie1_index_rna__fasta = config["general"]["rRNA_file"]
if os.path.exists(__bowtie1_index_rna__fasta) is False:
log.error(f"File {__bowtie1_index_rna__fasta} does not exists. Check your config file")
user_file = config["general"]["rRNA_file"]
if os.path.exists(user_file) is False:
log.error(f"File {user_file} does not exists. Check your config file")
sys.exit(1)
elif __annot_file__.endswith(".gbk"):
os.makedirs("build_feature_fasta", exist_ok=True)
shutil.copy(user_file, "build_feature_fasta/feature.fasta")
elif get_annot_file().endswith(".gbk"):
# This is for genbank input
__build_feature_fasta__output = "build_feature_fasta/feature.fasta"
for this in [__fasta_file__, __annot_file__]:
for this in [input_fasta, get_annot_file()]:
if os.path.exists(this) is False:
raise IOError("File {} not found".format(__fasta_file__))
raise IOError("File {} not found".format(input_fasta))

rule build_feature_fasta:
input:
fasta = __fasta_file__,
annot = __annot_file__
output: __build_feature_fasta__output
fasta = input_fasta,
annot = get_annot_file()
output: "build_feature_fasta/feature.fasta"
run:
from sequana.genbank import GenBank
gg = GenBank(input.annot)
sequence = gg.extract_fasta(input.fasta, features=['rRNA'])
with open(output[0], "w") as fout:
fout.write(sequence)
__bowtie1_index_rna__fasta = __build_feature_fasta__output
elif __annot_file__.endswith(".gff") or __annot_file__.endswith(".gff3"):
for this in [__fasta_file__, __annot_file__]:

elif get_annot_file().endswith(".gff") or get_annot_file().endswith(".gff3"):
for this in [input_fasta, get_annot_file()]:
if os.path.exists(this) is False:
raise IOError("File {} not found".format(__fasta_file__))
# extract rRNA feature from GFF and get corresponding fasta
# and gff. if no match for rRNA, save empty fasta as AAAAAAAAAAA
__build_feature_fasta__output = "build_feature_fasta/feature.fasta"
raise IOError("File {} not found".format(input_fasta))

__extract_fasta_from_bed__input = __fasta_file__
__extract_fasta_from_bed__gff = __annot_file__
__extract_fasta_from_bed__feature = config["general"]["rRNA_feature"]
__extract_fasta_from_bed__output = __build_feature_fasta__output
__extract_fasta_from_bed__output_features = __prefix_name__ + "_rRNA.gff"
__extract_fasta_from_bed__log = "indexing/get_rRNA.log"
# ----------------------------------------------------------------------------
include: sm.modules["extract_fasta_from_bed"]
__bowtie1_index_rna__fasta = __extract_fasta_from_bed__output

rule build_feature_fasta:
""" extract rRNA feature from GFF and get corresponding fasta and gff.

if no match for rRNA, save empty fasta as AAAAAAAAAAA
"""
input:
fasta = input_fasta,
gff = get_annot_file()
output:
fasta = "build_feature_fasta/feature.fasta",
fai = "build_feature_fasta/feature.fasta.fai",
gff = "build_feature_fasta/feature_rRNA.gff"
params:
feature = config['general']['rRNA_feature']
container:
"https://zenodo.org/record/7031863/files/sequana_tools_0.14.2.img"
shell:
"""
# used to be gawk but awk is more generic.
awk '{{ if ($3=="{params.feature}") print }}' {input.gff} > {output.gff}
if [ -s {output.gff} ]
then
bedtools getfasta -fi {input.fasta} -bed {output.gff} -fo {output.fasta}
else :
echo -e ">empty\\nAAAAAAAAAAAAAA" > {output.fasta}
fi
samtools faidx {output.fasta}
"""


__bowtie1_reference__ = __bowtie1_index_rna__fasta.rsplit(".",1)[0] + "_rRNA.1.ebwt"



rule bowtie1_indexing:
input:
reference= __bowtie1_index_rna__fasta
reference= "build_feature_fasta/feature.fasta"
output:
__bowtie1_reference__
"build_feature_fasta/feature_rRNA.1.ebwt"
log:
"indexing/bowtie_rRNA.log"
params:
options=""
threads: 2
threads:
2
container:
"https://zenodo.org/record/7031863/files/sequana_tools_0.14.2.img"
wrapper:
"main/wrappers/bowtie1/build"

Expand All @@ -126,6 +144,8 @@ rule unpigz:
input: manager.getrawdata()
output: temp("{sample}/unpigz/{sample}.fastq")
threads: 4
container:
"https://zenodo.org/record/7031863/files/sequana_tools_0.14.2.img"
shell:
"""
unpigz -p {threads} -fk --stdout {input[0]} > {output}
Expand All @@ -137,15 +157,18 @@ rule unpigz:
rule bowtie1_mapping_rna:
input:
fastq="{sample}/unpigz/{sample}.fastq",
index=__bowtie1_reference__
index="build_feature_fasta/feature_rRNA.1.ebwt"
output:
bam="{sample}/bowtie1/{sample}.bam",
sorted="{sample}/bowtie1/{sample}.sorted.bam"
log:
"{sample}/bowtie1/{sample}.log"
params:
options=""
threads: config['bowtie1_mapping_rna']['threads']
threads:
config['bowtie1_mapping_rna']['threads']
container:
"https://zenodo.org/record/7031863/files/sequana_tools_0.14.2.img"
wrapper:
"main/wrappers/bowtie1/align"

Expand Down Expand Up @@ -178,13 +201,13 @@ rule fix_bowtie1_log:
rule plotting:
input:
bam_files=expand("{sample}/bowtie1/{sample}.sorted.bam", sample=manager.samples),
fasta_file=__bowtie1_index_rna__fasta
fasta_file= "build_feature_fasta/feature.fasta"
output:
png="outputs/proportions.png",
rpkm="outputs/RPKM.png"
run:
import pandas as pd
from sequana import BAM
from sequana import BAM, FastA
from pylab import tight_layout, savefig, xlabel, ylabel, clf

results = []
Expand Down Expand Up @@ -214,7 +237,7 @@ rule plotting:
savefig(output['png'])

# now, we shown the RPKM (read count normalised by gene length)
from sequana import FastA

f = FastA(input['fasta_file'])
df = df.T
L = [ f.get_lengths_as_dict()[x] for x in df.index]
Expand All @@ -235,6 +258,8 @@ rule bam_indexing:
"{sample}/bowtie1/{sample}.sorted.bam"
output:
"{sample}/bowtie1/{sample}.sorted.bam.bai"
container:
"https://zenodo.org/record/7031863/files/sequana_tools_0.14.2.img"
shell:
"""
bamtools index -in {input}
Expand Down
Loading

0 comments on commit 3bb1305

Please sign in to comment.