Merge pull request #3 from cokelaer/main

add apptainer and update documentation
sequana · Jan 28, 2023 · e7b7396 · e7b7396
2 parents 3b830d5 + b68c738
commit e7b7396
Show file tree

Hide file tree

Showing 7 changed files with 144 additions and 48 deletions.
diff --git a/.github/workflows/apptainer.yml b/.github/workflows/apptainer.yml
@@ -0,0 +1,59 @@
+name: Apptainer Run
+
+on:
+  push:
+    branches:
+      - main
+      - dev
+  pull_request:
+    branches-ignore: []
+  schedule:
+    - cron: '0 0 * * SUN'
+
+jobs:
+  build-linux:
+    runs-on: ubuntu-latest
+    strategy:
+      max-parallel: 5
+      matrix:
+        python: [3.8, 3.9, '3.10']
+      fail-fast: false
+
+
+    steps:
+
+    - name: precleanup
+      run: |
+        sudo rm -rf /usr/share/dotnet
+        sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+    - name: install graphviz
+      run: |
+        sudo apt update
+        sudo apt-get install -y graphviz software-properties-common
+        sudo add-apt-repository -y ppa:apptainer/ppa
+        sudo apt update
+        sudo apt install -y apptainer
+
+    - name: checkout git repo
+      uses: actions/checkout@v3
+
+    - name: Set up Python 3.X
+      uses: actions/setup-python@v3
+      with:
+        python-version: ${{ matrix.python }}
+
+    - name: Install dependencies
+      run: |
+        pip install .[testing]
+
+    - name: install package itself
+      run: |
+         pip install .
+
+    - name: testing
+      run: |
+        export PATH=$PATH:/usr/local/bin
+        sequana_bioconvert --input-directory test/data/ --input-ext "fastq.gz" --output-ext "fasta.gz" --use-apptainer --command fastq2fasta --input-pattern "*" && cd bioconvert && sh bioconvert.sh
+
+
+
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -24,7 +24,7 @@ jobs:
 
     - name: install graphviz
       run: |
-        sudo apt-get install -y graphviz
+        sudo apt-get install -y graphviz libcurl4-openssl-dev build-essential
 
     - name: checkout git repo
       uses: actions/checkout@v2
@@ -36,19 +36,18 @@ jobs:
           extra-specs: |
             python=${{ matrix.python }}
 
-    - name: Install dependencies
-      run: |
-        pip install coveralls pytest-cov pytest pytest-xdist
-
     - name: install package itself
+      shell: bash -l {0}
       run: |
-         pip install .
+         pip install .[testing]
 
     - name: testing
+      shell: bash -l {0}
       run: |
         pytest -v --cov-report term-missing --cov=sequana_pipelines.bioconvert
 
     - name: coveralls
+      shell: bash -l {0}
       run: |
         echo $COVERALLS_REPO_TOKEN
         coveralls --service=github

diff --git a/README.rst b/README.rst
@@ -23,12 +23,16 @@ This is is the **bioconvert** pipeline from the `Sequana <https://sequana.readth
 Installation
 ~~~~~~~~~~~~
 
-You must install Sequana first::
+This package depends on Python only and singularity. To install **sequana_bioconvert**, just install this package as
+follows::
 
-    pip install sequana
+    pip install sequana_bioconvert
+
+For **singularity**, we recommend to use a conda environment::
 
-Then, just install this package::
 
+    conda create --name bioconvert -y python=3.8 singularity
+    conda activate bioconvert
     pip install sequana_bioconvert
 
 
@@ -39,8 +43,7 @@ Usage
 
     sequana_bioconvert --help
 
-
-You need to provide the type of conversion you wish to perform with the 
+You need to provide the type of conversion you wish to perform with the
 *--command* argument. You also need to tell the type of extensions expected
 including the compression (gz, bz2 or dsrc recognised). Finally, the
 *--input-directory* and *--input-pattern* must be used to find the input
@@ -49,50 +52,41 @@ files.::
     sequana_bioconvert --input-directory . --input-ext fastq.gz --output-ext
         fasta.gz --command fastq2fasta --input-pattern "*.fastq.gz"
 
-
-This creates a directory with the pipeline and configuration file. You will then need 
-to execute the pipeline::
+This command creates a directory with the pipeline and configuration file. You will then need 
+to execute the pipeline as follows::
 
     cd bioconvert
     sh bioconvert.sh  # for a local run
 
-This launch a snakemake pipeline. Symboli links to the input data are created in
+This launches a snakemake pipeline. Symbolic links to the input data are created in
 the ./input directory and results stored in the ./output directory.
 
+Some conversions require external standalones. We recommend to use our singularity image. 
+To do so, add **--use-apptainer** options when you initialse the pipeline. You can also 
+specify where to store the singularity image once for all using **--apptainer-prefix**::
+
+    sequana_bioconvert --input-directory . --input-ext fastq.gz --output-ext
+        fasta.gz --command fastq2fasta --input-pattern "*.fastq.gz"
+        --use-apptainer --apptainer-prefix ~/images
+
+
 See bioconvert.readthedocs.io for more details about **bioconvert** itself.
 
-If you are familiar with snakemake, you can retrieve the pipeline itself and its 
+If you are familiar with snakemake, you can retrieve the pipeline itself and its
 configuration files and then execute the pipeline yourself with specific parameters::
 
     snakemake -s bioconvert.rules -c config.yaml --cores 4 --stats stats.txt
 
 Or use `sequanix <https://sequana.readthedocs.io/en/main/sequanix.html>`_ interface.
 
-Singularity
-~~~~~~~~~~~
-
-::
-
-    sequana_bioconvert --input-pattern '*gz' --input-directory data --input-ext fastq.gz --output-ext fasta.gz --command fastq2fasta --force --use-singularity --singularity-prefix ~/images/
-
-
-
-
 Requirements
 ~~~~~~~~~~~~
 
-This pipelines requires the following executable(s):
-
-- bioconvert
-
-.. image:: https://raw.githubusercontent.com/sequana/sequana_bioconvert/main/sequana_pipelines/bioconvert/dag.png
+This pipelines requires the following executable(s) installed with sequana_bioconvert: bioconvert
 
+All dependencies and external dependencies related to bioconvert are available through the apptainer used by this
+**sequana_bioconvert** pipeline. 
 
-Details
-~~~~~~~~~
-
-This pipeline runs **bioconvert** in parallel on the input fastq files (paired or not). 
-A brief sequana summary report is also produced.
 
 
 Rules and configuration details
@@ -107,6 +101,7 @@ Changelog
 ========= ====================================================================
 Version   Description
 ========= ====================================================================
+1.0.0     Uses bioconvert 1.0.0
 0.10.0    Add container
 0.9.0     Version using new sequana/sequana_pipetools framework
 0.8.1     **Working version**

diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,4 @@
 sequana_pipetools>=0.7.2
 tqdm
-
+bioconvert
+snakemake
diff --git a/sequana_pipelines/bioconvert/bioconvert.rules b/sequana_pipelines/bioconvert/bioconvert.rules
@@ -31,7 +31,6 @@ filenames = glob.glob(config["input_directory"] + os.sep + input_pattern)
 
 samples = [Path(x).name.split(".")[0] for x in filenames]
 
-print(samples)
 
 # create symbolic links
 try:

diff --git a/sequana_pipelines/bioconvert/main.py b/sequana_pipelines/bioconvert/main.py
@@ -21,16 +21,45 @@
 from sequana_pipetools.info import sequana_epilog, sequana_prolog
 from sequana_pipetools import SequanaManager
 
+
 col = Colors()
 
 NAME = "bioconvert"
 
 
+# retrieve possible commands from the bioconvert registry.
+from bioconvert.core.registry import Registry
+from bioconvert import logger as blog
+blog.level = "ERROR"
+r = Registry()
+blog.level = "WARNING"
+commands = list(r.get_converters_names())
+
+methods = {}
+smethods = set()
+for command in r._fmt_registry.values():
+    methods[command.__name__.lower()] = command.available_methods
+    for x in command.available_methods:
+        smethods.add(x)
+
+
 
 
 class Options(argparse.ArgumentParser):
     def __init__(self, prog=NAME, epilog=None):
         usage = col.purple(sequana_prolog.format(**{"name": NAME}))
+
+        usage += """\nTo convert a bunch of fastq files into fasta, initiate the pipeline using:
+
+    sequana_bioconvert --input-directory data/ --input-ext "fastq.gz" --output-ext "fasta.gz" 
+ --use-apptainer --apptainer-prefix ~/images/ --command fastq2fasta --input-pattern "*"
+
+    cd bioconvert
+    sh bioconvert.sh
+
+
+"""
+
         super(Options, self).__init__(usage=usage, prog=prog, description="",
             epilog=epilog,
             formatter_class=argparse.ArgumentDefaultsHelpFormatter
@@ -50,19 +79,22 @@ def __init__(self, prog=NAME, epilog=None):
         pipeline_group = self.add_argument_group("pipeline")
 
         pipeline_group.add_argument("--input-pattern", dest="input_pattern",
-            required=True, type=str)
+            required=True, type=str, help="""The input pattern that allows you to restrict the search more specifically
+(default is to take all files in the input directory""")
         pipeline_group.add_argument("--input-directory", dest="input_directory",
-            required=True, type=str)
+            required=True, type=str, help="""The input directory where to look for input files""")
         pipeline_group.add_argument("--input-ext", dest="input_extension",
-            required=True, type=str)
-        pipeline_group.add_argument("--output-ext", dest="output_extension",
-            required=True, type=str)
+            required=True, type=str, help="""The extension of the files to convert. See bioconvert --help for details""")
+        pipeline_group.add_argument("--output-ext", dest="output_extension", 
+            required=True, type=str, help="""The extension of the output files. See bioconvert --help for details""")
         pipeline_group.add_argument("--command", dest="command",
-            required=True, type=str)
+            required=True, type=str, help="""One of the possible conversion available in bioconvert.""",
+choices=commands)
         pipeline_group.add_argument("--method", dest="method",
             type=str,
             default=None,
-            help="If you know bioconvert and method's name, you can set it here")
+            choices=smethods,
+            help="If you know bioconvert and method's name, you can set it here. This depends on the command used. Type 'bioconvert fastq-fasta --show--methods' to get the valid method for the command 'fastq2fasta' ")
 
 
 def main(args=None):
@@ -82,7 +114,6 @@ def main(args=None):
     # create the beginning of the command and the working directory
     manager.setup()
     from sequana_pipetools import logger
-
     logger.setLevel(options.level)
     logger.name = "sequana_bioconvert"
     logger.info(f"#Welcome to sequana_bioconvert pipeline.")

diff --git a/setup.py b/setup.py
@@ -4,8 +4,8 @@
 from setuptools.command.install import install
 import subprocess
 
-_MAJOR               = 0
-_MINOR               = 10
+_MAJOR               = 1
+_MINOR               = 0
 _MICRO               = 0
 version              = '%d.%d.%d' % (_MAJOR, _MINOR, _MICRO)
 release              = '%d.%d' % (_MAJOR, _MINOR)
@@ -59,6 +59,18 @@
 
     install_requires = open("requirements.txt").read(),
 
+    extras_require={
+        "testing": [
+            "pytest",
+            "pytest-cov",
+            "pytest-xdist",
+            "pytest-mock",
+            "pytest-timeout",
+            "pytest-runner",
+            "coveralls",
+        ],
+    },
+
     # This is recursive include of data files
     exclude_package_data = {"": ["__pycache__"]},
     package_data = {
-Original file line number
+Diff line change
@@ Expand Up @@
     samples = [Path(x).name.split(".")[0] for x in filenames]
-    print(samples)
     # create symbolic links
     try:
@@ Expand Down @@