Merge pull request #115 from pepkit/dev

Release 0.12.2
pepkit · Apr 25, 2023 · f37fbe8 · f37fbe8
2 parents cb1c4e4 + 3ad1dda
commit f37fbe8
Show file tree

Hide file tree

Showing 31 changed files with 282 additions and 6,160 deletions.
diff --git a/.github/workflows/run-codecov.yml b/.github/workflows/run-codecov.yml
@@ -1,21 +1,38 @@
 name: Run codecov
 
 on:
+  push:
+    branches: [dev]
   pull_request:
-    branches: [master, dev]
+    branches: [master]
 
 jobs:
   pytest:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        python-version: [3.9]
+        python-version: [3.11]
         os: [ubuntu-latest]
 
     steps:
     - uses: actions/checkout@v2
+
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+
+    - name: Install test dependencies
+      run: if [ -f requirements/requirements-test.txt ]; then pip install -r requirements/requirements-test.txt; fi
+
+    - name: Install package
+      run: python -m pip install .
+
+    - name: Run pytest tests
+      run: pytest tests --cov=./ --cov-report=xml
+
     - name: Upload coverage to Codecov
-      uses: codecov/codecov-action@v2
+      uses: codecov/codecov-action@v3
       with:
         file: ./coverage.xml
-        name: py-${{ matrix.python-version }}-${{ matrix.os }}
+        name: py-${{ matrix.python-version }}-${{ matrix.os }}
diff --git a/.github/workflows/run-pytest.yml b/.github/workflows/run-pytest.yml
@@ -11,7 +11,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        python-version: ["3.8", "3.9", "3.10"]
+        python-version: ["3.8", "3.11"]
         os: [ubuntu-latest]
 
     steps:
@@ -22,9 +22,6 @@ jobs:
       with:
         python-version: ${{ matrix.python-version }}
 
-    - name: Install dev dependencies
-      run: if [ -f requirements/requirements-dev.txt ]; then pip install -r requirements/requirements-dev.txt; fi
-
     - name: Install test dependencies
       run: if [ -f requirements/requirements-test.txt ]; then pip install -r requirements/requirements-test.txt; fi
 

diff --git a/.gitignore b/.gitignore
@@ -3,6 +3,7 @@
 
 # Python
 *.pyc
+build/
 
 # ignore test results
 tests/test/*
@@ -94,4 +95,4 @@ docs_jupyter/*
 .env/
 env/
 .venv/
-venv/
+venv/
diff --git a/docs/changelog.md b/docs/changelog.md
@@ -1,5 +1,9 @@
 # Changelog
 
+## [0.12.2] -- 2023-04-25
+- Added `max-prefetch-size` argument. #113
+- Improved code and logger structure.
+
 ## [0.12.0] -- 2023-03-27
 - Added functionality that saves gse metadata to config file
 - Fixed description in initialization of pepy object

diff --git a/docs/gse_finder.md b/docs/gse_finder.md
@@ -17,7 +17,7 @@ from geofetch import Finder
 gse_obj = Finder()
 
 # Optionally: provide filter string and max number of retrieve elements
-gse_obj = Finder(filter="((bed) OR narrow peak) AND Homo sapiens[Organism]", retmax=10)
+gse_obj = Finder(filters="((bed) OR narrow peak) AND Homo sapiens[Organism]", retmax=10)
 ```
 
 1) Get list of all GSE in GEO 

diff --git a/docs/usage.md b/docs/usage.md
@@ -1,108 +1,127 @@
-# Usage reference
+# <img src="./img/geofetch_logo.svg" class="img-header">  usage reference
+
+`geofetch` command-line usage instructions:
 
-geofetch command-line usage instructions:
 
-`geofetch -V`
-```console
-geofetch 0.11.0
-```
 
 `geofetch --help`
-```console
-usage: geofetch [-h] [-V] -i INPUT [-n NAME] [-m METADATA_ROOT] [-u METADATA_FOLDER]
-                [--just-metadata] [-r] [--config-template CONFIG_TEMPLATE]
-                [--pipeline-samples PIPELINE_SAMPLES] [--pipeline-project PIPELINE_PROJECT]
-                [--disable-progressbar] [-k SKIP] [--acc-anno] [--discard-soft]
-                [--const-limit-project CONST_LIMIT_PROJECT]
-                [--const-limit-discard CONST_LIMIT_DISCARD]
-                [--attr-limit-truncate ATTR_LIMIT_TRUNCATE] [--add-dotfile] [-p]
-                [--data-source {all,samples,series}] [--filter FILTER]
-                [--filter-size FILTER_SIZE] [-g GEO_FOLDER] [-x] [-b BAM_FOLDER]
-                [-f FQ_FOLDER] [--use-key-subset] [--silent] [--verbosity V] [--logdev]
+```{console}
+usage: geofetch [<args>]
+
+The example how to use geofetch (to download GSE573030 just metadata):
+    geofetch -i GSE67303 -m <folder> --just-metadata
+
+To download all processed data of GSE57303:
+    geofetch -i GSE67303 --processed --geo-folder <folder> -m <folder>
 
 Automatic GEO and SRA data downloader
 
-optional arguments:
+options:
   -h, --help            show this help message and exit
   -V, --version         show program's version number and exit
   -i INPUT, --input INPUT
-                        required: a GEO (GSE) accession, or a file with a list of GSE
-                        numbers
+                        required: a GEO (GSE) accession, or a file with a list
+                        of GSE numbers
   -n NAME, --name NAME  Specify a project name. Defaults to GSE number
   -m METADATA_ROOT, --metadata-root METADATA_ROOT
-                        Specify a parent folder location to store metadata. The project name
-                        will be added as a subfolder [Default: $SRAMETA:]
+                        Specify a parent folder location to store metadata.
+                        The project name will be added as a subfolder
+                        [Default: $SRAMETA:]
   -u METADATA_FOLDER, --metadata-folder METADATA_FOLDER
-                        Specify an absolute folder location to store metadata. No subfolder
-                        will be added. Overrides value of --metadata-root [Default: Not used
-                        (--metadata-root is used by default)]
-  --just-metadata       If set, don't actually run downloads, just create metadata
+                        Specify an absolute folder location to store metadata.
+                        No subfolder will be added. Overrides value of
+                        --metadata-root.
+  --just-metadata       If set, don't actually run downloads, just create
+                        metadata
   -r, --refresh-metadata
                         If set, re-download metadata even if it exists.
   --config-template CONFIG_TEMPLATE
                         Project config yaml file template.
   --pipeline-samples PIPELINE_SAMPLES
-                        Optional: Specify one or more filepaths to SAMPLES pipeline
-                        interface yaml files. These will be added to the project config file
-                        to make it immediately compatible with looper. [Default: null]
+                        Optional: Specify one or more filepaths to SAMPLES
+                        pipeline interface yaml files. These will be added to
+                        the project config file to make it immediately
+                        compatible with looper. [Default: null]
   --pipeline-project PIPELINE_PROJECT
-                        Optional: Specify one or more filepaths to PROJECT pipeline
-                        interface yaml files. These will be added to the project config file
-                        to make it immediately compatible with looper. [Default: null]
+                        Optional: Specify one or more filepaths to PROJECT
+                        pipeline interface yaml files. These will be added to
+                        the project config file to make it immediately
+                        compatible with looper. [Default: null]
   --disable-progressbar
                         Optional: Disable progressbar
   -k SKIP, --skip SKIP  Skip some accessions. [Default: no skip].
-  --acc-anno            Optional: Produce annotation sheets for each accession. Project
-                        combined PEP for the whole project won't be produced.
-  --discard-soft        Optional: After creation of PEP files, all soft and additional files
+  --acc-anno            Optional: Produce annotation sheets for each
+                        accession. Project combined PEP for the whole project
+                        won't be produced.
+  --discard-soft        Optional: After creation of PEP files, all .soft files
                         will be deleted
   --const-limit-project CONST_LIMIT_PROJECT
-                        Optional: Limit of the number of the constant sample characters that
-                        should not be in project yaml. [Default: 50]
+                        Optional: Limit of the number of the constant sample
+                        characters that should not be in project yaml.
+                        [Default: 50]
   --const-limit-discard CONST_LIMIT_DISCARD
-                        Optional: Limit of the number of the constant sample characters that
-                        should not be discarded [Default: 250]
+                        Optional: Limit of the number of the constant sample
+                        characters that should not be discarded [Default: 250]
   --attr-limit-truncate ATTR_LIMIT_TRUNCATE
-                        Optional: Limit of the number of sample characters.Any attribute
-                        with more than X characters will truncate to the first X, where X is
-                        a number of characters [Default: 500]
-  --add-dotfile         Optional: Add .pep.yaml file that points .yaml PEP file
+                        Optional: Limit of the number of sample characters.Any
+                        attribute with more than X characters will truncate to
+                        the first X, where X is a number of characters
+                        [Default: 500]
+  --add-dotfile         Optional: Add .pep.yaml file that points .yaml PEP
+                        file
+  --max-soft-size MAX_SOFT_SIZE
+                        Optional: Max size of soft file. [Default: 1GB].
+                        Supported input formats : 12B, 12KB, 12MB, 12GB.
+  --max-prefetch-size MAX_PREFETCH_SIZE
+                        Argument to pass to prefetch program's --max-size
+                        option, if prefetch will be used in this run of
+                        geofetch; for reference: https://github.com/ncbi/sra-
+                        tools/wiki/08.-prefetch-and-fasterq-dump#check-the-
+                        maximum-size-limit-of-the-prefetch-tool
   --silent              Silence logging. Overrides verbosity.
   --verbosity V         Set logging level (1-5 or logging module level name)
   --logdev              Expand content of logging message format.
 
 processed:
   -p, --processed       Download processed data [Default: download raw data].
   --data-source {all,samples,series}
-                        Optional: Specifies the source of data on the GEO record to retrieve
-                        processed data, which may be attached to the collective series
-                        entity, or to individual samples. Allowable values are: samples,
-                        series or both (all). Ignored unless 'processed' flag is set.
-                        [Default: samples]
-  --filter FILTER       Optional: Filter regex for processed filenames [Default:
-                        None].Ignored unless 'processed' flag is set.
+                        Optional: Specifies the source of data on the GEO
+                        record to retrieve processed data, which may be
+                        attached to the collective series entity, or to
+                        individual samples. Allowable values are: samples,
+                        series or both (all). Ignored unless 'processed' flag
+                        is set. [Default: samples]
+  --filter FILTER       Optional: Filter regex for processed filenames
+                        [Default: None].Ignored unless 'processed' flag is
+                        set.
   --filter-size FILTER_SIZE
-                        Optional: Filter size for processed files that are stored as sample
-                        repository [Default: None]. Works only for sample data. Supported
-                        input formats : 12B, 12KB, 12MB, 12GB. Ignored unless 'processed'
-                        flag is set.
+                        Optional: Filter size for processed files that are
+                        stored as sample repository [Default: None]. Works
+                        only for sample data. Supported input formats : 12B,
+                        12KB, 12MB, 12GB. Ignored unless 'processed' flag is
+                        set.
   -g GEO_FOLDER, --geo-folder GEO_FOLDER
-                        Optional: Specify a location to store processed GEO files. Ignored
-                        unless 'processed' flag is set.[Default: $GEODATA:]
+                        Optional: Specify a location to store processed GEO
+                        files. Ignored unless 'processed' flag is
+                        set.[Default: $GEODATA:]
 
 raw:
   -x, --split-experiments
-                        Split SRR runs into individual samples. By default, SRX experiments
-                        with multiple SRR Runs will have a single entry in the annotation
-                        table, with each run as a separate row in the subannotation table.
-                        This setting instead treats each run as a separate sample
+                        Split SRR runs into individual samples. By default,
+                        SRX experiments with multiple SRR Runs will have a
+                        single entry in the annotation table, with each run as
+                        a separate row in the subannotation table. This
+                        setting instead treats each run as a separate sample
   -b BAM_FOLDER, --bam-folder BAM_FOLDER
-                        Optional: Specify folder of bam files. Geofetch will not download
-                        sra files when corresponding bam files already exist. [Default:
-                        $SRABAM:]
+                        Optional: Specify folder of bam files. Geofetch will
+                        not download sra files when corresponding bam files
+                        already exist. [Default: $SRABAM:]
   -f FQ_FOLDER, --fq-folder FQ_FOLDER
-                        Optional: Specify folder of fastq files. Geofetch will not download
-                        sra files when corresponding fastq files already exist. [Default:
-                        $SRAFQ:]
-  --use-key-subset      Use just the keys defined in this module when writing out metadata.
+                        Optional: Specify folder of fastq files. Geofetch will
+                        not download sra files when corresponding fastq files
+                        already exist. [Default: $SRAFQ:]
+  --use-key-subset      Use just the keys defined in this module when writing
+                        out metadata.
+  --add-convert-modifier
+                        Add looper SRA convert modifier to config file.
 ```
diff --git a/docs_jupyter/build/processed-data-downloading.md b/docs_jupyter/build/processed-data-downloading.md
@@ -24,11 +24,6 @@ Calling geofetch will do 4 tasks:
 
 Complete details about geofetch outputs is cataloged in the [metadata outputs reference](metadata_output.md).
 
-from IPython.core.display import SVG
-SVG(filename='logo.svg')
-
-![arguments_outputs.svg](attachment:arguments_outputs.svg)
-
 ## Download the data
 
 First, create the metadata for processed data (by adding --processed and --just-metadata):

diff --git a/geofetch/__init__.py b/geofetch/__init__.py
@@ -1,7 +1,12 @@
 """ Package-level data """
-from .geofetch import *
-from .finder import *
-from ._version import __version__
 import logmuse
 
+from geofetch.geofetch import *
+from geofetch.finder import *
+from geofetch._version import __version__
+
+
+__author__ = ["Oleksandr Khoroshevskyi", "Vince Reuter", "Nathan Sheffield"]
+__all__ = ["Finder", "Geofetcher"]
+
 logmuse.init_logger("geofetch")
diff --git a/geofetch/__main__.py b/geofetch/__main__.py
@@ -0,0 +1,10 @@
+import sys
+from geofetch.geofetch import main
+
+if __name__ == "__main__":
+    try:
+        sys.exit(main())
+
+    except KeyboardInterrupt:
+        print("Pipeline aborted.")
+        sys.exit(1)
diff --git a/geofetch/_version.py b/geofetch/_version.py
@@ -1 +1 @@
-__version__ = "0.12.1"
+__version__ = "0.12.2"
diff --git a/geofetch/cli.py b/geofetch/cli.py
@@ -1,7 +1,7 @@
 import argparse
 import os
 import logmuse
-from ._version import __version__
+from geofetch._version import __version__
 
 
 def _safe_echo(var):
@@ -18,12 +18,10 @@ def _parse_cmdl(cmdl):
         usage="""geofetch [<args>]
 
 The example how to use geofetch (to download GSE573030 just metadata):
-    geofetch -i GSE67303 -m `pwd` --just-metadata
+    geofetch -i GSE67303 -m <folder> --just-metadata
 
 To download all processed data of GSE57303:
-    geofetch -i GSE67303 --processed --geo-folder `pwd` -m `pwd`
-
-* where `pwd` is a current directory
+    geofetch -i GSE67303 --processed --geo-folder <folder> -m <folder>
 
 """,
     )
@@ -63,8 +61,7 @@ def _parse_cmdl(cmdl):
         "-u",
         "--metadata-folder",
         help="Specify an absolute folder location to store metadata. "
-        "No subfolder will be added. Overrides value of --metadata-root "
-        "[Default: Not used (--metadata-root is used by default)]",
+        "No subfolder will be added. Overrides value of --metadata-root.",
     )
 
     parser.add_argument(
@@ -127,7 +124,7 @@ def _parse_cmdl(cmdl):
     parser.add_argument(
         "--discard-soft",
         action="store_true",
-        help="Optional: After creation of PEP files, all soft and additional files will be deleted",
+        help="Optional: After creation of PEP files, all .soft files will be deleted",
     )
 
     parser.add_argument(
@@ -170,6 +167,12 @@ def _parse_cmdl(cmdl):
                 Supported input formats : 12B, 12KB, 12MB, 12GB. """,
     )
 
+    parser.add_argument(
+        "--max-prefetch-size",
+        help="Argument to pass to prefetch program's --max-size option, if prefetch will be used in this run of geofetch; "
+        "for reference: https://github.com/ncbi/sra-tools/wiki/08.-prefetch-and-fasterq-dump#check-the-maximum-size-limit-of-the-prefetch-tool",
+    )
+
     processed_group.add_argument(
         "-p",
         "--processed",
-Original file line number
+Diff line change
@@ Expand Up / @@ -3,6 +3,7 @@ @@
     # Python
     *.pyc
+    build/
     # ignore test results
     tests/test/*
@@ Expand Down Expand Up / @@ -94,4 +95,4 @@ docs_jupyter/* @@
     .env/
     env/
     .venv/
-    venv/
+    venv/