From 8b214a442080eebb83a0983008f66721ff2812b1 Mon Sep 17 00:00:00 2001 From: "Ziyang \"Claude\" Hu" <33562602+ClaudeHu@users.noreply.github.com> Date: Tue, 2 Apr 2024 16:24:03 -0400 Subject: [PATCH 1/7] Update text2bednn-search-interface.md --- .../tutorials/text2bednn-search-interface.md | 51 ++++++++++++++----- 1 file changed, 37 insertions(+), 14 deletions(-) diff --git a/docs/geniml/tutorials/text2bednn-search-interface.md b/docs/geniml/tutorials/text2bednn-search-interface.md index 5c284ee..25913d6 100644 --- a/docs/geniml/tutorials/text2bednn-search-interface.md +++ b/docs/geniml/tutorials/text2bednn-search-interface.md @@ -10,7 +10,7 @@ file embedding vectors, and the BED files whose embedding vectors are closest to ## Store embedding vectors It is recommended to use `geniml.search.backend.HNSWBackend` to store embedding vectors. In the `HNSWBackend` that stores each BED file embedding vector, the `payload` should contain the name of BED file. In the `HNSWBackend` that stores the embedding vectors of each -metadata string, the `payload` should contain the name of BED files that have that string in metadata. +metadata string, the `payload` should contain the original string text and the names of BED files that have that string in metadata. ## Train the model Training a `Vec2VecFNN` needs x-y pairs of vectors (x: metadata embedding vector; y: BED embedding vector). A pair of a metadata embedding @@ -39,24 +39,45 @@ v2v_torch_contrast.train( ``` -## text2bednn search interface -The `TextToBedNNSearchInterface` includes model that encode natural language to vectors (default: `FlagEmbedding`), a -model that encode natural language embedding vectors to BED file embedding vectors (`Embed2EmbedNN`), and a `search` backend. +## Search interface +A search interface consists of a storage backend where vectors are stored, and a module (`geniml.search.query2vec`) that embed the query. +`geniml.search` supports two types of queries: region set query and text query. + +### Region set query + +`BED2Vec` embed the query region set with a `Region2VecExModel`, and the embedding vector is used to perform KNN search within the backend. ```python -from geniml.text2bednn.text2bednn import Text2BEDSearchInterface +from geniml.search import BED2BEDSearchInterface, BED2Vec + +# init BED2Vec with a hugging face repo of a Region2VecExModel +bed2vec = BED2Vec("databio/r2v-ChIP-atlas-hg38-v2") + +# the search_backend can be QdrantBackend or HNSWBackend +search_interface = BED2BEDSearchInterface(search_backend, bed2vec) + +# the query cam be a RegionSet object (see geniml.io) or path to a BED file in disk +file_search_result = search_interface.query_search("path/to/a/bed/file.bed", 5) +``` + +### Text query -# initiate the search interface -file_interface = Text2BEDSearchInterface(nl_model, e2enn, hnsw_backend) +`Text2Vec` embed the query string with a with a natural language embedding model first (default: `FlagEmbedding`), and then maps the text embedding vector into the embedding space of region sets through a trained `Vec2VecFNN`. -# natural language query string -query_term = "human, kidney, blood" -# perform KNN search with K = 5, the id of stored vectors and the distance / similarity score will be returned -ids, scores = file_interface.nl_vec_search(query_term, 5) +``` +from geniml.search import Text2BEDSearchInterface, Text2Vec + +text2vec = Text2Vec( + "sentence-transformers/all-MiniLM-L6-v2", # either a hugging face repo or an object from geniml.text2bednn.embedder + "databio/v2v-geo-hg38" # either a hugging face repo or a Vec2VecFNN +) + +search_interface = Text2BEDSearchInterface(search_backend, text2vec) +text_search_result = search_interface.query_search("cancer cells", 5) ``` -### Evaluate search performance With a dictionary that contains query strings and id of relevant query results in search backend in this format: + ``` { : [ @@ -66,7 +87,9 @@ With a dictionary that contains query strings and id of relevant query results i ... } ``` -`TextToBedNNSearchInterface` can return [mean average precision](https://www.youtube.com/watch?v=pM6DJ0ZZee0&t=157s), [average AUC-ROC](https://nlp.stanford.edu/IR-book/pdf/08eval.pdf), and [average R-Precision](https://link.springer.com/referenceworkentry/10.1007/978-0-387-39940-9_491), here is example code: + +`Text2BEDSearchInterface` can return [mean average precision](https://www.youtube.com/watch?v=pM6DJ0ZZee0&t=157s), [average AUC-ROC](https://nlp.stanford.edu/IR-book/pdf/08eval.pdf), and [average R-Precision](https://link.springer.com/referenceworkentry/10.1007/978-0-387-39940-9_491), here is example code: + ```python query_dict = { "metadata string 1": [2, 3], @@ -75,5 +98,5 @@ query_dict = { "metadata string 1": [0] } -MAP, AUC, RP = file_interface.eval(query_dict) +MAP, AUC, RP = search_interface.eval(query_dict) ``` From caf20ab2f5ce3aa12de611d12a1fb5a174cc1af0 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 15 Apr 2024 09:54:41 -0400 Subject: [PATCH 2/7] fixedd test 2 bed nn --- autodoc.py | 2 +- ci/scripts/count_records.py | 23 ++++++++++++++----- .../tutorials/text2bednn-search-interface.md | 6 ++--- mkdocs.yml | 1 + 4 files changed, 22 insertions(+), 10 deletions(-) diff --git a/autodoc.py b/autodoc.py index 94d36f9..f493343 100644 --- a/autodoc.py +++ b/autodoc.py @@ -99,4 +99,4 @@ with open(out, "w") as stream: stream.write(md_result) else: - print("Skipping jupyter notebooks") \ No newline at end of file + print("Skipping jupyter notebooks") diff --git a/ci/scripts/count_records.py b/ci/scripts/count_records.py index 936ec36..dbf5c6a 100755 --- a/ci/scripts/count_records.py +++ b/ci/scripts/count_records.py @@ -7,15 +7,26 @@ parser = ArgumentParser(description="Count records in a PostgreSQL table and verify") -parser.add_argument("-t", "--table", help="Table to count records in", - required=True, type=str) -parser.add_argument("-e", "--expected-count", help="Expected number of records", - type=int, required=False, default=None) +parser.add_argument( + "-t", "--table", help="Table to count records in", required=True, type=str +) +parser.add_argument( + "-e", + "--expected-count", + help="Expected number of records", + type=int, + required=False, + default=None, +) args = parser.parse_args() -bbc = BedBaseConf(get_bedbase_cfg('$GITHUB_WORKSPACE/ci/cfg/config_min.yaml')) +bbc = BedBaseConf(get_bedbase_cfg("$GITHUB_WORKSPACE/ci/cfg/config_min.yaml")) row_count = bbc._count_rows(table_name=args.table) if args.expected_count: - assert row_count == args.expected_count, "Number of records in the '{}' table ({}) not equal {}".format(args.table, row_count, args.expected_count) + assert ( + row_count == args.expected_count + ), "Number of records in the '{}' table ({}) not equal {}".format( + args.table, row_count, args.expected_count + ) sys.exit(0) diff --git a/docs/geniml/tutorials/text2bednn-search-interface.md b/docs/geniml/tutorials/text2bednn-search-interface.md index 25913d6..08e8c68 100644 --- a/docs/geniml/tutorials/text2bednn-search-interface.md +++ b/docs/geniml/tutorials/text2bednn-search-interface.md @@ -1,15 +1,15 @@ # How to create a natural language search backend for BED files The metadata of each BED file is needed to build a natural language search backend. BED files embedding vectors are created by -`Region2Vec`, and metadata embedding vectors are created by [`FastEmbed`](https://github.com/qdrant/fastembed), [`SentenceTransformers`](https://www.sbert.net/), or other text embedding models. +`Region2Vec` model, and metadata embedding vectors are created by [`FastEmbed`](https://github.com/qdrant/fastembed), [`SentenceTransformers`](https://www.sbert.net/), or other text embedding models. `Vec2VecFNN`, a feedforward neural network (FNN), is trained to maps vectors from the embedding space of natural language to the embedding -space of BED files. When a natural language query string is given, it will first be encoded to a vector by the text embedding model, and that +space of BED files. When a natural language query string is given, it will first be encoded to a vector by the text embedding model, and then created vector will be encoded to a query vector by the FNN. `search` backend can perform k-nearest neighbors (KNN) search among the stored BED file embedding vectors, and the BED files whose embedding vectors are closest to that query vector are the search results. ## Store embedding vectors It is recommended to use `geniml.search.backend.HNSWBackend` to store embedding vectors. In the `HNSWBackend` that stores each BED file embedding -vector, the `payload` should contain the name of BED file. In the `HNSWBackend` that stores the embedding vectors of each +vector, the `payload` should contain the name or identifier of BED file. In the `HNSWBackend` that stores the embedding vectors of each metadata string, the `payload` should contain the original string text and the names of BED files that have that string in metadata. ## Train the model diff --git a/mkdocs.yml b/mkdocs.yml index 3b33496..4ae8610 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -102,6 +102,7 @@ nav: - Fine-tune embeddings: geniml/tutorials/fine-tune-region2vec-model.md - Randomize bed files: geniml/tutorials/bedshift.md - Create evaluation dataset with bedshift: geniml/tutorials/bedshift-evaluation-guide.md + - Create search backend: geniml/tutorials/text2bednn-search-interface.md - Reference: - How to cite: citations.md - API documentation: geniml/autodoc_build/geniml.md From 3ebb2909dd10e7f91962d040305935efe4e0fa6d Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 15 Apr 2024 11:20:21 -0400 Subject: [PATCH 3/7] updated bedboss --- docs/bedboss/bedboss-insert.md | 8 +- docs/bedboss/usage.md | 490 ++++++++++++++++----------------- 2 files changed, 239 insertions(+), 259 deletions(-) diff --git a/docs/bedboss/bedboss-insert.md b/docs/bedboss/bedboss-insert.md index 7a345ac..92c924d 100644 --- a/docs/bedboss/bedboss-insert.md +++ b/docs/bedboss/bedboss-insert.md @@ -1,10 +1,10 @@ -# BEDboss insert +# BEDboss run-pep -Bedboss supports inserting and running all pipelines using a single command: `bedboss insert`. +Bedboss supports inserting and running all pipelines using a single command: `bedboss run-pep`. To run `bedboss insert` you need to have few things set up:
-0) Installed bedboss dependencies. See [bedboss dependencies](./how_to_install_r_dep.md) for more information
-1) Created **config file** with all the necessary information. See [bedboss config](./how_to_config_bedboss.md) for more information
+0) Installed bedboss dependencies. See [bedboss dependencies](./how_to_install_r_dependencies.md) for more information
+1) Created **config file** with all the necessary information. See [bedboss config](./how-to_configure.md) for more information
2) **PEP** project with all the necessary information. Project can be stored locally or on [PEPhub](https://pephub.databio.org/)
Before running pipeline PEP should be validated using eido or pephub schema. Bedboss insert schema: [bedboss insert schema](https://schema.databio.org/pipelines/bedboss.yaml)
3) Provide path to **output directory**
diff --git a/docs/bedboss/usage.md b/docs/bedboss/usage.md index da1003f..fd0983c 100644 --- a/docs/bedboss/usage.md +++ b/docs/bedboss/usage.md @@ -1,301 +1,281 @@ # Usage reference -BEDboss is command-line tool-warehouse of 3 pipelines for genomic interval files +BEDboss is command-line tool-manager and a set of tools for working with BED files and BEDbase. Main components of BEDboss are: +1) Pipelines for processing BED files: bedmaker, bedqc, and bedstats. +2) Indexing of the Bed files in bedbase +3) Managing bed and bedsets in the database -BEDboss include: bedmaker, bedqc, bedstat. This pipelines can be run using next positional arguments: +Here you can see the command-line usage instructions for the main bedboss command and for each subcommand: -- `bedbase all`: Runs all pipelines one in order: bedmaker -> bedqc -> bedstat +## `bedboss --help` +```console + + Usage: bedboss [OPTIONS] COMMAND [ARGS]... + +╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ --version -v App version │ +│ --install-completion [bash|zsh|fish|powershell|pwsh] Install completion for the specified shell. [default: None] │ +│ --show-completion [bash|zsh|fish|powershell|pwsh] Show completion for the specified shell, to copy it or customize the installation. [default: None] │ +│ --help Show this message and exit. │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ check-requirements check installed R packages │ +│ delete-bed Delete bed from the bedbase database │ +│ delete-bedset Delete BedSet from the bedbase database │ +│ init-config Initialize the new, sample configuration file │ +│ make-bed Create a bed files form a [bigwig, bedgraph, bed, bigbed, wig] file │ +│ make-bedset Create a bedset from a pep file, and insert it to the bedbase database. │ +│ make-bigbed Create a bigbed files form a bed file │ +│ reindex Reindex the bedbase database and insert all files to the qdrant database. │ +│ run-all Run all the bedboss pipeline for a single bed file │ +│ run-pep Run the all bedboss pipeline for a bed files in a PEP │ +│ run-qc Run the quality control for a bed file │ +│ run-stats Create the statistics for a single bed file. │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -- `bedbase insert`: Runs all pipelines one in order by using PEP file and creates bedset: bedmaker -> bedqc -> bedstat -> bedbuncher +``` -- `bedbase make`: Creates Bed and BigBed files from other type of genomic interval files [bigwig|bedgraph|bed|bigbed|wig] +## `bedboss check-requirements --help` +```console + + Usage: bedboss check-requirements [OPTIONS] + + check installed R packages + +╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ --help Show this message and exit. │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -- `bedbase qc`: Runs Quality control for bed file (Works only with bed files) +``` -- `bedbase stat`: Runs statistics for bed and bigbed files. +## `bedboss delete-bed --help` +```console + + Usage: bedboss delete-bed [OPTIONS] + + Delete bed from the bedbase database + +╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ * --sample-id TEXT Sample ID [default: None] [required] │ +│ * --config TEXT Path to the bedbase config file [default: None] [required] │ +│ --help Show this message and exit. │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -- `bedbase bunch`: Creates bedset from PEP file +``` -- `bedbase index`: Creates bed file vectors and inserts to qdrant database +## `bedboss delete-bedset --help` +```console + + Usage: bedboss delete-bedset [OPTIONS] + + Delete BedSet from the bedbase database + +╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ * --identifier TEXT BedSet ID [default: None] [required] │ +│ * --config TEXT Path to the bedbase config file [default: None] [required] │ +│ --help Show this message and exit. │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -Here you can see the command-line usage instructions for the main bedboss command and for each subcommand: +``` -## `bedboss --help` +## `bedboss init-config --help` ```console -version: 0.1.0a5 -usage: bedboss [-h] [--version] [--silent] [--verbosity V] [--logdev] - {all,insert,make,qc,stat,bunch,index} ... + + Usage: bedboss init-config [OPTIONS] + + Initialize the new, sample configuration file + +╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ * --outfolder TEXT Path to the output folder [default: None] [required] │ +│ --help Show this message and exit. │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -Warehouse of pipelines for BED-like files: bedmaker, bedstat, and bedqc. +``` -positional arguments: - {all,insert,make,qc,stat,bunch,index} - all Run all bedboss pipelines and insert data into bedbase - insert Run all bedboss pipelines using one PEP and insert - data into bedbase - make A pipeline to convert bed, bigbed, bigwig or bedgraph - files into bed and bigbed formats - qc Run quality control on bed file (bedqc) - stat A pipeline to read a file in BED format and produce - metadata in JSON format. - bunch A pipeline to create bedsets (sets of BED files) that - will be retrieved from bedbase. - index Index not indexed bed files and add them to the qdrant - database +## `bedboss make-bed --help` +```console + + Usage: bedboss make-bed [OPTIONS] + + Create a bed files form a [bigwig, bedgraph, bed, bigbed, wig] file + +╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ * --input-file TEXT Path to the input file [default: None] [required] │ +│ * --input-type TEXT Type of the input file. Options are: bigwig, bedgraph, bed, bigbed, wig [default: None] [required] │ +│ * --outfolder TEXT Path to the output folder [default: None] [required] │ +│ * --genome TEXT Genome name. Example: 'hg38' [default: None] [required] │ +│ --rfg-config TEXT Path to the rfg config file [default: None] │ +│ --narrowpeak --no-narrowpeak Is the input file a narrowpeak file? [default: no-narrowpeak] │ +│ --chrom-sizes TEXT Path to the chrom sizes file [default: None] │ +│ --multi --no-multi Run multiple samples [default: no-multi] │ +│ --recover --no-recover Recover from previous run [default: recover] │ +│ --dirty --no-dirty Run without removing existing files [default: no-dirty] │ +│ --help Show this message and exit. │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -options: - -h, --help show this help message and exit - --version show program's version number and exit - --silent Silence logging. Overrides verbosity. - --verbosity V Set logging level (1-5 or logging module level name) - --logdev Expand content of logging message format. ``` -## `bedboss all --help` +## `bedboss make-bedset --help` ```console -usage: bedboss all [-h] --outfolder OUTFOLDER -s SAMPLE_NAME -f INPUT_FILE -t - INPUT_TYPE -g GENOME [-r RFG_CONFIG] - [--chrom-sizes CHROM_SIZES] [-n] [--standard-chrom] - [--check-qc] [--open-signal-matrix OPEN_SIGNAL_MATRIX] - [--ensdb ENSDB] --bedbase-config BEDBASE_CONFIG - [--treatment TREATMENT] [--cell-type CELL_TYPE] - [--description DESCRIPTION] [--no-db-commit] - [--just-db-commit] [--skip-qdrant] [-R] [-N] [-D] [-F] [-T] - [--silent] [--verbosity V] [--logdev] + + Usage: bedboss make-bedset [OPTIONS] + + Create a bedset from a pep file, and insert it to the bedbase database. + +╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ * --pep TEXT PEP file. Local or remote path [default: None] [required] │ +│ * --outfolder TEXT Path to the output folder [default: None] [required] │ +│ * --bedbase-config TEXT Path to the bedbase config file [default: None] [required] │ +│ * --bedset-name TEXT Name of the bedset [default: None] [required] │ +│ --heavy --no-heavy Run the heavy version of the pipeline [default: no-heavy] │ +│ --force-overwrite --no-force-overwrite Force overwrite the output files [default: no-force-overwrite] │ +│ --upload-s3 --no-upload-s3 Upload to S3 [default: no-upload-s3] │ +│ --upload-pephub --no-upload-pephub Upload to PEPHub [default: no-upload-pephub] │ +│ --no-fail --no-no-fail Do not fail on error [default: no-no-fail] │ +│ --help Show this message and exit. │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -options: - -h, --help show this help message and exit - --outfolder OUTFOLDER - Pipeline output folder [Required] - -s SAMPLE_NAME, --sample-name SAMPLE_NAME - name of the sample used to systematically build the - output name [Required] - -f INPUT_FILE, --input-file INPUT_FILE - Input file [Required] - -t INPUT_TYPE, --input-type INPUT_TYPE - Input type [Required] options: - (bigwig|bedgraph|bed|bigbed|wig) - -g GENOME, --genome GENOME - reference genome (assembly) [Required] - -r RFG_CONFIG, --rfg-config RFG_CONFIG - file path to the genome config file(refgenie) - --chrom-sizes CHROM_SIZES - a full path to the chrom.sizes required for the - bedtobigbed conversion - -n, --narrowpeak whether it's a narrowpeak file - --standard-chrom Standardize chromosome names. Default: False - --check-qc Check quality control before processing data. Default: - True - --open-signal-matrix OPEN_SIGNAL_MATRIX - a full path to the openSignalMatrix required for the - tissue specificity plots - --ensdb ENSDB A full path to the ensdb gtf file required for genomes - not in GDdata - --bedbase-config BEDBASE_CONFIG - a path to the bedbase configuration file [Required] - --treatment TREATMENT - A treatment of the bed file - --cell-type CELL_TYPE - A cell type of the bed file - --description DESCRIPTION - A description of the bed file - --no-db-commit skip the JSON commit to the database - --just-db-commit just commit the JSON to the database - --skip-qdrant whether to skip qdrant indexing - -R, --recover Overwrite locks to recover from previous failed run - -N, --new-start Overwrite all results to start a fresh run - -D, --dirty Don't auto-delete intermediate files - -F, --force-follow Always run 'follow' commands - -T, --testmode Only print commands, don't run - --silent Silence logging. Overrides verbosity. - --verbosity V Set logging level (1-5 or logging module level name) - --logdev Expand content of logging message format. ``` -## `bedboss insert --help` +## `bedboss make-bigbed --help` ```console -usage: bedboss insert [-h] --bedbase-config BEDBASE_CONFIG --pep PEP - --output-folder OUTPUT_FOLDER [-r RFG_CONFIG] - [--check-qc] [--standard-chrom] [--create-bedset] - [--skip-qdrant] [--ensdb ENSDB] [--no-db-commit] - [--just-db-commit] [--force_overwrite] [--upload-s3] - [-R] [-N] [-D] [-F] [-T] [--silent] [--verbosity V] - [--logdev] + + Usage: bedboss make-bigbed [OPTIONS] + + Create a bigbed files form a bed file + +╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ * --bed-file TEXT Path to the input file [default: None] [required] │ +│ * --bed-type TEXT bed type to be used for bigBed file generation 'bed{bedtype}+{n}' [Default: None] (e.g bed3+1) [default: None] [required] │ +│ * --outfolder TEXT Path to the output folder [default: None] [required] │ +│ * --genome TEXT Genome name. Example: 'hg38' [default: None] [required] │ +│ --rfg-config TEXT Path to the rfg config file [default: None] │ +│ --chrom-sizes TEXT Path to the chrom sizes file [default: None] │ +│ --multi --no-multi Run multiple samples [default: no-multi] │ +│ --recover --no-recover Recover from previous run [default: recover] │ +│ --dirty --no-dirty Run without removing existing files [default: no-dirty] │ +│ --help Show this message and exit. │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -options: - -h, --help show this help message and exit - --bedbase-config BEDBASE_CONFIG - a path to the bedbase configuration file [Required] - --pep PEP path to the pep file or pephub registry path - containing pep [Required] - --output-folder OUTPUT_FOLDER - Pipeline output folder [Required] - -r RFG_CONFIG, --rfg-config RFG_CONFIG - file path to the genome config file(refgenie) - --check-qc Check quality control before processing data. Default: - True - --standard-chrom Standardize chromosome names. Default: False - --create-bedset Create bedset using pep samples. Name of the bedset - will be based on pep name.Default: False - --skip-qdrant whether to skip qdrant indexing - --ensdb ENSDB A full path to the ensdb gtf file required for genomes - not in GDdata - --no-db-commit skip the JSON commit to the database - --just-db-commit just commit the JSON to the database - --force_overwrite Weather to overwrite existing records. [Default: - False] - --upload-s3 Weather to upload bed, bigbed, and statistics to s3. - Before uploading you have to set up all necessury env - vars: AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, and - AWS_ENDPOINT_URL. [Default: False] - -R, --recover Overwrite locks to recover from previous failed run - -N, --new-start Overwrite all results to start a fresh run - -D, --dirty Don't auto-delete intermediate files - -F, --force-follow Always run 'follow' commands - -T, --testmode Only print commands, don't run - --silent Silence logging. Overrides verbosity. - --verbosity V Set logging level (1-5 or logging module level name) - --logdev Expand content of logging message format. ``` -## `bedboss make --help` +## `bedboss reindex --help` ```console -usage: bedboss make [-h] -f INPUT_FILE --outfolder OUTFOLDER [-n] -t - INPUT_TYPE -g GENOME [-r RFG_CONFIG] -o OUTPUT_BED - --output-bigbed OUTPUT_BIGBED -s SAMPLE_NAME - [--chrom-sizes CHROM_SIZES] [--standard-chrom] [-R] [-N] - [-D] [-F] [-T] [--silent] [--verbosity V] [--logdev] + + Usage: bedboss reindex [OPTIONS] + + Reindex the bedbase database and insert all files to the qdrant database. + +╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ * --bedbase-config TEXT Path to the bedbase config file [default: None] [required] │ +│ --help Show this message and exit. │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -options: - -h, --help show this help message and exit - -f INPUT_FILE, --input-file INPUT_FILE - path to the input file [Required] - --outfolder OUTFOLDER - Pipeline output folder [Required] - -n, --narrowpeak whether it's a narrowpeak file - -t INPUT_TYPE, --input-type INPUT_TYPE - input file format (supported formats: bedGraph, - bigBed, bigWig, wig) [Required] - -g GENOME, --genome GENOME - reference genome [Required] - -r RFG_CONFIG, --rfg-config RFG_CONFIG - file path to the genome config file - -o OUTPUT_BED, --output-bed OUTPUT_BED - path to the output BED files [Required] - --output-bigbed OUTPUT_BIGBED - path to the folder of output bigBed files [Required] - -s SAMPLE_NAME, --sample-name SAMPLE_NAME - name of the sample used to systematically build the - output name [Required] - --chrom-sizes CHROM_SIZES - whether standardize chromosome names. If ture, - bedmaker will remove the regions on ChrUn chromosomes, - such as chrN_random and chrUn_random. [Default: False] - --standard-chrom Standardize chromosome names. Default: False - -R, --recover Overwrite locks to recover from previous failed run - -N, --new-start Overwrite all results to start a fresh run - -D, --dirty Don't auto-delete intermediate files - -F, --force-follow Always run 'follow' commands - -T, --testmode Only print commands, don't run - --silent Silence logging. Overrides verbosity. - --verbosity V Set logging level (1-5 or logging module level name) - --logdev Expand content of logging message format. ``` -## `bedboss qc --help` +## `bedboss run-all --help` ```console -usage: bedboss qc [-h] --bedfile BEDFILE --outfolder OUTFOLDER [-R] [-N] [-D] - [-F] [-T] [--silent] [--verbosity V] [--logdev] + + Usage: bedboss run-all [OPTIONS] + + Run all the bedboss pipeline for a single bed file + +╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ * --input-file TEXT Path to the input file [default: None] [required] │ +│ * --input-type TEXT Type of the input file. Options are: bigwig, bedgraph, bed, bigbed, wig [default: None] [required] │ +│ * --outfolder TEXT Path to the output folder [default: None] [required] │ +│ * --genome TEXT Genome name. Example: 'hg38' [default: None] [required] │ +│ * --bedbase-config TEXT Path to the bedbase config file [default: None] [required] │ +│ --rfg-config TEXT Path to the rfg config file [default: None] │ +│ --narrowpeak --no-narrowpeak Is the input file a narrowpeak file? [default: no-narrowpeak] │ +│ --check-qc --no-check-qc Check the quality of the input file? [default: check-qc] │ +│ --chrom-sizes TEXT Path to the chrom sizes file [default: None] │ +│ --open-signal-matrix TEXT Path to the open signal matrix file [default: None] │ +│ --ensdb TEXT Path to the EnsDb database file [default: None] │ +│ --just-db-commit --no-just-db-commit Just commit to the database? [default: no-just-db-commit] │ +│ --force-overwrite --no-force-overwrite Force overwrite the output files [default: no-force-overwrite] │ +│ --upload-qdrant --no-upload-qdrant Upload to Qdrant [default: no-upload-qdrant] │ +│ --upload-s3 --no-upload-s3 Upload to S3 [default: no-upload-s3] │ +│ --upload-pephub --no-upload-pephub Upload to PEPHub [default: no-upload-pephub] │ +│ --multi --no-multi Run multiple samples [default: no-multi] │ +│ --recover --no-recover Recover from previous run [default: recover] │ +│ --dirty --no-dirty Run without removing existing files [default: no-dirty] │ +│ --help Show this message and exit. │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -options: - -h, --help show this help message and exit - --bedfile BEDFILE a full path to bed file to process [Required] - --outfolder OUTFOLDER - a full path to output log folder. [Required] - -R, --recover Overwrite locks to recover from previous failed run - -N, --new-start Overwrite all results to start a fresh run - -D, --dirty Don't auto-delete intermediate files - -F, --force-follow Always run 'follow' commands - -T, --testmode Only print commands, don't run - --silent Silence logging. Overrides verbosity. - --verbosity V Set logging level (1-5 or logging module level name) - --logdev Expand content of logging message format. ``` -## `bedboss stat --help` +## `bedboss run-pep --help` ```console -usage: bedboss stat [-h] --bedfile BEDFILE --outfolder OUTFOLDER - [--open-signal-matrix OPEN_SIGNAL_MATRIX] [--ensdb ENSDB] - [--bigbed BIGBED] --bedbase-config BEDBASE_CONFIG - [-y SAMPLE_YAML] --genome GENOME [--no-db-commit] - [--just-db-commit] [-R] [-N] [-D] [-F] [-T] [--silent] - [--verbosity V] [--logdev] + + Usage: bedboss run-pep [OPTIONS] + + Run the all bedboss pipeline for a bed files in a PEP + +╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ * --pep TEXT PEP file. Local or remote path [default: None] [required] │ +│ * --outfolder TEXT Path to the output folder [default: None] [required] │ +│ * --bedbase-config TEXT Path to the bedbase config file [default: None] [required] │ +│ --create-bedset --no-create-bedset Create a new bedset [default: no-create-bedset] │ +│ --bedset-heavy --no-bedset-heavy Run the heavy version of the bedbuncher pipeline [default: no-bedset-heavy] │ +│ --bedset-id TEXT Bedset ID [default: None] │ +│ --rfg-config TEXT Path to the rfg config file [default: None] │ +│ --check-qc --no-check-qc Check the quality of the input file? [default: check-qc] │ +│ --ensdb TEXT Path to the EnsDb database file [default: None] │ +│ --just-db-commit --no-just-db-commit Just commit to the database? [default: no-just-db-commit] │ +│ --force-overwrite --no-force-overwrite Force overwrite the output files [default: no-force-overwrite] │ +│ --upload-qdrant --no-upload-qdrant Upload to Qdrant [default: no-upload-qdrant] │ +│ --upload-s3 --no-upload-s3 Upload to S3 [default: no-upload-s3] │ +│ --upload-pephub --no-upload-pephub Upload to PEPHub [default: no-upload-pephub] │ +│ --no-fail --no-no-fail Do not fail on error [default: no-no-fail] │ +│ --multi --no-multi Run multiple samples [default: no-multi] │ +│ --recover --no-recover Recover from previous run [default: recover] │ +│ --dirty --no-dirty Run without removing existing files [default: no-dirty] │ +│ --help Show this message and exit. │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -options: - -h, --help show this help message and exit - --bedfile BEDFILE a full path to bed file to process [Required] - --outfolder OUTFOLDER - Pipeline output folder [Required] - --open-signal-matrix OPEN_SIGNAL_MATRIX - a full path to the openSignalMatrix required for the - tissue specificity plots - --ensdb ENSDB a full path to the ensdb gtf file required for genomes - not in GDdata - --bigbed BIGBED a full path to the bigbed files - --bedbase-config BEDBASE_CONFIG - a path to the bedbase configuration file [Required] - -y SAMPLE_YAML, --sample-yaml SAMPLE_YAML - a yaml config file with sample attributes to pass on - more metadata into the database - --genome GENOME genome assembly of the sample [Required] - --no-db-commit whether the JSON commit to the database should be - skipped - --just-db-commit whether just to commit the JSON to the database - -R, --recover Overwrite locks to recover from previous failed run - -N, --new-start Overwrite all results to start a fresh run - -D, --dirty Don't auto-delete intermediate files - -F, --force-follow Always run 'follow' commands - -T, --testmode Only print commands, don't run - --silent Silence logging. Overrides verbosity. - --verbosity V Set logging level (1-5 or logging module level name) - --logdev Expand content of logging message format. ``` -## `bedboss bunch --help` +## `bedboss run-qc --help` ```console -usage: bedboss bunch [-h] --bedbase-config BEDBASE_CONFIG --bedset-name - BEDSET_NAME --bedset-pep BEDSET_PEP - [--base-api BEDBASE_API] [--cache-path CACHE_PATH] - [--heavy] + + Usage: bedboss run-qc [OPTIONS] + + Run the quality control for a bed file + +╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ * --bed-file TEXT Path to the bed file to check the quality control on. [default: None] [required] │ +│ * --outfolder TEXT Path to the output folder [default: None] [required] │ +│ --max-file-size INTEGER Maximum file size threshold to pass the quality [default: 2147483648] │ +│ --max-region-number INTEGER Maximum number of regions threshold to pass the quality [default: 5000000] │ +│ --min-region-width INTEGER Minimum region width threshold to pass the quality [default: 10] │ +│ --multi --no-multi Run multiple samples [default: no-multi] │ +│ --recover --no-recover Recover from previous run [default: recover] │ +│ --dirty --no-dirty Run without removing existing files [default: no-dirty] │ +│ --help Show this message and exit. │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -options: - -h, --help show this help message and exit - --bedbase-config BEDBASE_CONFIG - a path to the bedbase configuration file [Required] - --bedset-name BEDSET_NAME - a name of the bedset [Required] - --bedset-pep BEDSET_PEP - bedset pep path or pephub registry path containing - bedset pep [Required] - --base-api BEDBASE_API - Bedbase API to use. Default is https://api.bedbase.org - --cache-path CACHE_PATH - Path to the cache folder. Default is ./bedabse_cache - --heavy whether to use heavy processing (Calculate and crate - plots using R script). ``` -## `bedboss index --help` +## `bedboss run-stats --help` ```console -usage: bedboss index [-h] --bedbase-config BEDBASE_CONFIG - [--bedbase-api BEDBASE_API] + + Usage: bedboss run-stats [OPTIONS] + + Create the statistics for a single bed file. + +╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ * --bed-file TEXT Path to the bed file [default: None] [required] │ +│ * --genome TEXT Genome name. Example: 'hg38' [default: None] [required] │ +│ * --outfolder TEXT Path to the output folder [default: None] [required] │ +│ --ensdb TEXT Path to the EnsDb database file [default: None] │ +│ --open-signal-matrix TEXT Path to the open signal matrix file [default: None] │ +│ --just-db-commit --no-just-db-commit Just commit to the database? [default: no-just-db-commit] │ +│ --multi --no-multi Run multiple samples [default: no-multi] │ +│ --recover --no-recover Recover from previous run [default: recover] │ +│ --dirty --no-dirty Run without removing existing files [default: no-dirty] │ +│ --help Show this message and exit. │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -options: - -h, --help show this help message and exit - --bedbase-config BEDBASE_CONFIG - a path to the bedbase configuration file [Required] - --bedbase-api BEDBASE_API - URL of the Bedbase API [Default: - https://api.bedbase.org] ``` From ffa67eabc5cc47c71c3991ee925e762db35ff62a Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 15 Apr 2024 12:09:58 -0400 Subject: [PATCH 4/7] updated bedboss tutorials --- docs/bedboss/README.md | 74 ++++++++++--------- docs/bedboss/bedboss-insert.md | 22 ------ docs/bedboss/changelog.md | 21 +++++- docs/bedboss/how-to-configure.md | 27 ++++--- docs/bedboss/how-to-create-database.md | 2 +- docs/bedboss/how-to-develop.md | 1 - ...cies.md => how-to-install-requirements.md} | 10 +++ docs/bedboss/tutorials/bedbuncher_tutorial.md | 18 ++++- docs/bedboss/tutorials/bedindex_tutorial.md | 8 +- docs/bedboss/tutorials/bedmaker_tutorial.md | 14 ++-- docs/bedboss/tutorials/bedqc_tutorial.md | 4 +- docs/bedboss/tutorials/bedstat_tutorial.md | 4 +- docs/bedboss/tutorials/tutorial_all.md | 24 +++--- ...tutorial_insert.md => tutorial_run_pep.md} | 16 ++-- mkdocs.yml | 5 +- 15 files changed, 133 insertions(+), 117 deletions(-) delete mode 100644 docs/bedboss/bedboss-insert.md delete mode 100644 docs/bedboss/how-to-develop.md rename docs/bedboss/{how-to-install-r-dependencies.md => how-to-install-requirements.md} (51%) rename docs/bedboss/tutorials/{tutorial_insert.md => tutorial_run_pep.md} (86%) diff --git a/docs/bedboss/README.md b/docs/bedboss/README.md index 0507d39..07b12a2 100644 --- a/docs/bedboss/README.md +++ b/docs/bedboss/README.md @@ -9,22 +9,49 @@

+bedboss is a command-line BED base manager tool and pipelines that manages region set files, calculates statistics for them +and enters the results into a BEDbase database. +Main features: +1) bedmaker - pipeline to convert supported file types into BED format and bigBed format. +2) bedqc - pipeline to flag bed files for further evaluation to determine whether they should be included in the downstream analysis. +3) bedstat - pipeline for obtaining statistics about bed files. +4) bedbuncher - pipeline designed to create bedsets (sets of BED files) that will be retrieved from bedbase. +5) index - pipeline to create vectors of bedfiles and insert them into vector database for further search. +6) Other delete and update tools that manage bed and bedset files in the BEDbase database. -bedboss is a command-line pipeline that standardizes and calculates statistics for genomic interval data, and enters the results into a BEDbase database. -It has 3 components: +Mainly pipelines are intended to be run from command line but nevertheless, +they are also available as a python function, so that user can implement them to his own code (e.g. automatic uploading tools). -1) bedmaker (`bedboss make`);
-2) bedqc (`bedboss qc`);
-3) bedstat (`bedboss stat`);
-4) bedbuncher (`bedboss bunch`). +--- -You may run all 3 pipelines together, or separately. +## Installation +To install `bedboss` use this command: +``` +pip install bedboss +``` +or install the latest version from the GitHub repository: +``` +pip install git+https://github.com/databio/bedboss.git +``` -Mainly pipelines are intended to be run from command line but nevertheless, -they are also available as a python function, so that user can implement them to his own code. ----- -## BEDboss consist of 3 main pipelines: +--- + +## BEDboss dependencies +Before running any of the pipelines, you need to install the required dependencies. + +To check if all dependencies are installed, you can run the following command: + +```bash +bedboss check-requirements +``` + +All dependencies can be using this how to documentation: [How to install dependencies](./how-to-install-requirements.md) + + +--- + +## Pipelines information ### bedmaker bedmaker - pipeline to convert supported file types* into BED format and bigBed format. Currently supported formats: @@ -64,31 +91,8 @@ Pipeline designed to create **bedsets** (sets of BED files) that will be retriev Example bedsets: - Bed files from the AML database. - Bed files from the [Excluderanges](https://github.com/dozmorovlab/excluderanges#bedbase-data-download) database. -- Bed files from the LOLA database. +- Bed files from the LOLA database [http://lolaweb.databio.org/](http://lolaweb.databio.org/) Bedbuncher calculates statistics: - Bedset statistics (currenty means and standard deviations). - ----- -# Additional information - -## bedmaker - -### Additional dependencies - -- bedToBigBed: http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/bedToBigBed -- bigBedToBed: http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/bigBedToBed -- bigWigToBedGraph: http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/bigWigToBedGraph -- wigToBigWig: http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/wigToBigWig - -## bedstat - -### Additional dependencies -regionstat.R script is used to calculate the bed file statistics, so the pipeline also depends on several R packages: - -All dependencies you can find in R helper script, and use it to easily install the required packages: - -- Rscript scripts/installRdeps.R [How to install R dependencies](./how_to_install_r_dep.md) - - diff --git a/docs/bedboss/bedboss-insert.md b/docs/bedboss/bedboss-insert.md deleted file mode 100644 index 92c924d..0000000 --- a/docs/bedboss/bedboss-insert.md +++ /dev/null @@ -1,22 +0,0 @@ -# BEDboss run-pep - -Bedboss supports inserting and running all pipelines using a single command: `bedboss run-pep`. - -To run `bedboss insert` you need to have few things set up:
-0) Installed bedboss dependencies. See [bedboss dependencies](./how_to_install_r_dependencies.md) for more information
-1) Created **config file** with all the necessary information. See [bedboss config](./how-to_configure.md) for more information
-2) **PEP** project with all the necessary information. Project can be stored locally or on [PEPhub](https://pephub.databio.org/)
-Before running pipeline PEP should be validated using eido or pephub schema. Bedboss insert schema: [bedboss insert schema](https://schema.databio.org/pipelines/bedboss.yaml)
-3) Provide path to **output directory**
- -Additional information can be found in the [bedboss insert](../bedboss_usage.md) documentation. - -### Example PEP: -[https://pephub.databio.org/khoroshevskyi/example](https://pephub.databio.org/khoroshevskyi/example) - - -### Example run: -When we have all the necessary information we can run the pipeline: -```bash -bedboss insert --bedbase-config bedboss_config.yaml --pep khoroshevskyi/example --output-folder ./bedboss_output -``` \ No newline at end of file diff --git a/docs/bedboss/changelog.md b/docs/bedboss/changelog.md index 9a4ce67..cdd875e 100644 --- a/docs/bedboss/changelog.md +++ b/docs/bedboss/changelog.md @@ -1,7 +1,24 @@ -# Changelog for bedboss +# Changelog This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. +# [0.2.1] - 2024-04-09 +## Changed +- small naming tweaks + +## Added +- added requirement check to cli + + +# [0.2.0] - 2024-04-08 +## Changed +- moved all uploading functionality to the `bbconf` package + +## Added +- added commands for indexing bedfiles +- added commands for deleting bedfiles and bedsets + + ## [0.1.0] - 2024-01-26 ### Added -- Initial release +- Initial alpha release diff --git a/docs/bedboss/how-to-configure.md b/docs/bedboss/how-to-configure.md index 3e855f5..81af38d 100644 --- a/docs/bedboss/how-to-configure.md +++ b/docs/bedboss/how-to-configure.md @@ -6,36 +6,39 @@ - qdrant credentials - server information - remote info +- pephub info +- s3 credentials ### Example: ```yaml path: remote_url_base: http://data.bedbase.org/ - pipeline_output_path: /data/outputs - bedstat_dir: outputs/bedstat_output - bedbuncher_dir: outputs/bedbuncher_output - region2vec: databio/r2v-ChIP-atlas-hg38-v2 - vec2vec: databio/v2v-MiniLM-v2-ATAC-hg38 + region2vec: databio/r2v-encode-hg38 + vec2vec: databio/v2v-geo-hg38 text2vec: sentence-transformers/all-MiniLM-L6-v2 database: host: $POSTGRES_HOST port: 5432 password: $POSTGRES_PASSWORD user: $POSTGRES_USER - name: bedbase - bed_table: bedfiles - bedset_table: bedsets - relationship_table: bedset_bedfiles - dialect: postgresql - driver: psycopg + database: bedbase2 qdrant: host: $QDRANT_HOST port: 6333 api_key: $QDRANT_API_KEY - collection: bedbase + collection: bedbase2 server: host: 0.0.0.0 port: 8000 +s3: + endpoint_url: $AWS_ENDPOINT_URL + aws_access_key_id: $AWS_ACCESS_KEY_ID + aws_secret_access_key: $AWS_SECRET_ACCESS_KEY + bucket: bedbase +phc: + namespace: databio + name: bedbase_all + tag: default access_methods: http: type: "https" diff --git a/docs/bedboss/how-to-create-database.md b/docs/bedboss/how-to-create-database.md index 7dbac03..f9658c3 100644 --- a/docs/bedboss/how-to-create-database.md +++ b/docs/bedboss/how-to-create-database.md @@ -26,4 +26,4 @@ docker run -d --name bedbase-postgres -p 5432:5432 \ ``` Now we have created docker and can run pipelines. -To connect to the database, change your credentials in the `bedbase_config.yaml` file. +To connect to the database, change your credentials in your configuration file. diff --git a/docs/bedboss/how-to-develop.md b/docs/bedboss/how-to-develop.md deleted file mode 100644 index 37d900b..0000000 --- a/docs/bedboss/how-to-develop.md +++ /dev/null @@ -1 +0,0 @@ -### 🚧 docs in progress! Stay tuned for updates. We're working hard to bring you valuable content soon! \ No newline at end of file diff --git a/docs/bedboss/how-to-install-r-dependencies.md b/docs/bedboss/how-to-install-requirements.md similarity index 51% rename from docs/bedboss/how-to-install-r-dependencies.md rename to docs/bedboss/how-to-install-requirements.md index ec54fc6..24ac979 100644 --- a/docs/bedboss/how-to-install-r-dependencies.md +++ b/docs/bedboss/how-to-install-requirements.md @@ -5,3 +5,13 @@ 2. Download this script: [installRdeps.R](https://github.com/databio/bedboss/blob/dev/scripts/installRdeps.R) 3. Install dependencies by running this command in your terminal: ```Rscript installRdeps.R``` 4. Run `bedboss requirements-check` to check if everything was installed correctly. + + +# How to install regionset conversion tools: + +- bedToBigBed: http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/bedToBigBed +- bigBedToBed: http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/bigBedToBed +- bigWigToBedGraph: http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/bigWigToBedGraph +- wigToBigWig: http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/wigToBigWig + + diff --git a/docs/bedboss/tutorials/bedbuncher_tutorial.md b/docs/bedboss/tutorials/bedbuncher_tutorial.md index 3b18380..761cf98 100644 --- a/docs/bedboss/tutorials/bedbuncher_tutorial.md +++ b/docs/bedboss/tutorials/bedbuncher_tutorial.md @@ -19,11 +19,25 @@ bedboss bunch \ --bedbase-config path/to/bedbase_config.yaml \ --bedset-name bedset1 \ --pep path/to/pep.yaml \ - --bedset-pep bedset_pep.yaml \ - --cache-path CACHE_PATH + --outfolder path/to/output/dir \ + --heavy \ + --upload-pephub \ + --upload-s3 ``` ### Run bedboss bunch from within Python ```python +from bedboss.bedbuncher.bedbuncher import run_bedbuncher_form_pep +run_bedbuncher_form_pep( + bedbase_config=bedbase_config, + bedset_pep=pep, + output_folder=outfolder, + bedset_name=bedset_name, + heavy=heavy, + upload_pephub=upload_pephub, + upload_s3=upload_s3, + no_fail=no_fail, + force_overwrite=force_overwrite, + ) ``` \ No newline at end of file diff --git a/docs/bedboss/tutorials/bedindex_tutorial.md b/docs/bedboss/tutorials/bedindex_tutorial.md index 1e58111..1902796 100644 --- a/docs/bedboss/tutorials/bedindex_tutorial.md +++ b/docs/bedboss/tutorials/bedindex_tutorial.md @@ -5,7 +5,7 @@ #### From command line ```bash -bedboss index --bedbase-config path/to/bedbase_config.yaml +bedboss reindex --bedbase-config path/to/bedbase_config.yaml ``` After running this comman all files that are in the database and weren't indexed will be indexed to qdrant database. @@ -13,9 +13,7 @@ After running this comman all files that are in the database and weren't indexed #### From within Python ```python -from bedboss.qdrant_index import add_to_qdrant +from bedboss.qdrant_index.qdrant_index import add_to_qdrant -add_to_qdrant( - bedbase_config="path/to/bedbase_config.yaml" -) +add_to_qdrant(config=bedbase_config) ``` \ No newline at end of file diff --git a/docs/bedboss/tutorials/bedmaker_tutorial.md b/docs/bedboss/tutorials/bedmaker_tutorial.md index 76a748f..419e3c3 100644 --- a/docs/bedboss/tutorials/bedmaker_tutorial.md +++ b/docs/bedboss/tutorials/bedmaker_tutorial.md @@ -10,18 +10,16 @@ Currently supported formats are: Before running pipeline first, you have to install bedboss and check if bedmaker requirements are satisfied. To do so, you can run the next command: ```bash -bedboss requirements-check +bedboss check-requirements ``` ### Run BEDmaker from command line ```bash -bedboss make \ +bedboss make-bed \ --input-file path/to/input/file \ --input-type bed\ - --output-folder path/to/output/dir \ - --genome hg38 \ - --sample-name sample1 - --bigbed "path/to/bigbedfile.bigbed" # optional + --outfolder path/to/output/dir \ + --genome hg38 ``` ### Run BEDmaker from within Python @@ -31,9 +29,7 @@ from bedboss.bedmaker.bedmaker import make_all make_all( input_file="path/to/input/file", input_type="bed", - output_folder="path/to/output/dir", + output_path="path/to/output/dir", genome="hg38", - sample_name="sample1", - bigbed="path/to/bigbedfile.bigbed" # optional ) ``` \ No newline at end of file diff --git a/docs/bedboss/tutorials/bedqc_tutorial.md b/docs/bedboss/tutorials/bedqc_tutorial.md index 3ff9a46..fe291cc 100644 --- a/docs/bedboss/tutorials/bedqc_tutorial.md +++ b/docs/bedboss/tutorials/bedqc_tutorial.md @@ -10,8 +10,8 @@ As for now, it checks: ### Run BEDqc from command line ```bash bedboss qc \ - --bedfile path/to/bedfile.bed \ - --outfolder path/to/output/dir \ + --bed-file path/to/bedfile.bed \ + --outfolder path/to/output/dir ``` --- diff --git a/docs/bedboss/tutorials/bedstat_tutorial.md b/docs/bedboss/tutorials/bedstat_tutorial.md index cc34146..f8c6c99 100644 --- a/docs/bedboss/tutorials/bedstat_tutorial.md +++ b/docs/bedboss/tutorials/bedstat_tutorial.md @@ -23,7 +23,7 @@ It produces BED file Statistics: First you have to install bedboss and check if all requirements are satisfied. To do so, you can run next command: ```bash -bedboss requirements-check +bedboss check-requirements ``` If requirements are not satisfied, you will see the list of missing packages. @@ -36,7 +36,6 @@ bedboss stats \ --bedfile path/to/bedfile.bed \ --outfolder path/to/output/dir \ --genome hg38 \ - --bigbed "path/to/bigbedfile.bigbed" # optional ``` ---- @@ -48,7 +47,6 @@ bedstat( bedfile="path/to/bedfile.bed", outfolder="path/to/output/dir", genome="hg19", - bigbed="path/to/bigbedfile.bigbed", # optional ) ``` diff --git a/docs/bedboss/tutorials/tutorial_all.md b/docs/bedboss/tutorials/tutorial_all.md index fb7d23c..e642a75 100644 --- a/docs/bedboss/tutorials/tutorial_all.md +++ b/docs/bedboss/tutorials/tutorial_all.md @@ -1,6 +1,6 @@ -## Bedboss all +## Bedboss run-all -Bedboss run-all is intended to run on sample (bed file) and run all bedboss pipelines: +Bedboss run-all is intended to run on ONE sample (bed file) and run all bedboss pipelines: bedmaker (+ bedclassifier + bedqc) -> bedstat. After that optionally it can run bedbuncher, qdrant indexing and upload metadata to PEPhub. ### Step 1: Install all dependencies @@ -8,7 +8,7 @@ Bedboss run-all is intended to run on sample (bed file) and run all bedboss pipe First you have to install bedboss and check if all requirements are satisfied. To do so, you can run next command: ```bash -bedboss requirements-check +bedboss check-requirements ``` If requirements are not satisfied, you will see the list of missing packages. @@ -22,7 +22,7 @@ To run bedboss, you need to run the next command: bedboss all \ --bedbase-config bedconf.yaml \ --input-file path/to/bedfile.bed \ - --output-dir path/to/output/dir \ + --outfolder path/to/output/dir \ --input-type bed \ --genome hg38 \ @@ -31,11 +31,11 @@ bedboss all \ Above command will run bedboss on the bed file and create a bedstat file in the output directory. It contains only required parameters. For more details, please check the usage section. -By default, results will be uploaded only to postgres database. -- To upload results to PEPhub, you need to make `databio` org available on GitHub, then login to PEPhub, and add `--upload-pephub` flag to the command. -- To upload results to Qdrant, you need to add `--upload-qdrant` flag to the command. -- To upload actual files to s3, you need to add `--upload-s3` flag to the command, and Before uploading you have to set up all necessary env vars: AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, and AWS_ENDPOINT_URL. - +By default, results will be uploaded only to the PostgreSQL database. +- To upload results to PEPhub, you need to make the `databio` org available on GitHub, then login to PEPhub, and add the `--upload-pephub` flag to the command. +- To upload results to Qdrant, you need to add the `--upload-qdrant` flag to the command. +- To upload actual files to S3, you need to add the `--upload-s3` flag to the command, and before uploading, you have to set up all necessary environment variables: AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, and AWS_ENDPOINT_URL. +- To ignore errors and continue processing, you need to add the `--no-fail` flag to the command. --- @@ -47,15 +47,15 @@ To run bedboss all from within Python, instead of using the command line in the from bedboss import bedboss bedboss.run_all( - sample_name="sample1", + name="sample1", input_file="path/to/bedfile.bed", input_type="bed", outfolder="path/to/output/dir", genome="hg38", bedbase_config="bedconf.yaml", - narrowpeak=False, # optional - standardardize=True, # optional other_metadata=None, # optional upload_pephub=True, # optional + upload_qdrant=True, # optional + upload_s3=True, # optional ) ``` \ No newline at end of file diff --git a/docs/bedboss/tutorials/tutorial_insert.md b/docs/bedboss/tutorials/tutorial_run_pep.md similarity index 86% rename from docs/bedboss/tutorials/tutorial_insert.md rename to docs/bedboss/tutorials/tutorial_run_pep.md index 6105a38..71df030 100644 --- a/docs/bedboss/tutorials/tutorial_insert.md +++ b/docs/bedboss/tutorials/tutorial_run_pep.md @@ -1,4 +1,4 @@ -## Bedboss insert +## Bedboss run-pep Bedboss insert is designed to process each sample in the provided PEP. The PEP can be provided either as a path to config file or as a registry path of the PEPhub. @@ -9,19 +9,19 @@ The PEP can be provided either as a path to config file or as a registry path of First, you have to install bedboss and check if all requirements are satisfied. To do so, you can run the following command: ```bash -bedboss requirements-check +bedboss check-requirements ``` If requirements are not satisfied, you will see the list of missing packages. ### Step 2: Create bedconf.yaml file -To run bedboss insert, you need to create a bedconf.yaml file with configuration. +To run bedboss run-pep, you need to create a bedconf.yaml file with configuration. Detailed instructions are in the configuration section. ### Step 3: Create PEP with bed files. BEDboss PEP should contain next fields: sample_name, input_file, input_type, genome. Before running bedboss, you need to validate provided PEP with [bedboss_insert schema](https://schema.databio.org/?namespace=pipelines&schema=bedboss). The easiest way to do so is to use [PEPhub](https://pephub.databio.org/), where you create a new PEP and validate it with the schema. -Example PEP: [https://pephub.databio.org/databio/excluderanges?tag=bedbase](https://pephub.databio.org/databio/excluderanges?tag=bedbase) +Example PEP: [https://pephub.databio.org/databio/excluderanges?tag=default](https://pephub.databio.org/databio/excluderanges?tag=default) ### Step 4: Run bedboss insert To run bedboss insert , you need to run the next command: @@ -29,8 +29,7 @@ To run bedboss insert , you need to run the next command: bedboss insert \ --bedbase-config bedconf.yaml \ --pep path/to/pep.yaml \ - --output-folder path/to/output/dir - + --outfolder path/to/output/dir ``` Above command will run bedboss on the bed file and create a file with statistics in the output directory. @@ -41,7 +40,7 @@ By default, results will be uploaded only to the PostgreSQL database. - To upload results to Qdrant, you need to add the `--upload-qdrant` flag to the command. - To upload actual files to S3, you need to add the `--upload-s3` flag to the command, and before uploading, you have to set up all necessary environment variables: AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, and AWS_ENDPOINT_URL. - To create a bedset of provided pep files, you need to add the `--create-bedset` flag to the command. - +- To ignore errors and continue processing, you need to add the `--no-fail` flag to the command. --- @@ -59,6 +58,7 @@ bedboss.insert_pep( upload_pephub=True, # optional upload_qdrant=True, # optional upload_s3=True, # optional - create_bedset=True # optional + create_bedset=True, # optional + no_fail=True, # optional ) ``` \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index a578a66..d33825a 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -64,7 +64,7 @@ nav: - BEDboss: - BEDboss: bedboss/README.md - Tutorial: - - BEDboss insert: bedboss/tutorials/tutorial_insert.md + - BEDboss run-pep: bedboss/tutorials/tutorial_run_pep.md - BEDboss-all pipeline: bedboss/tutorials/tutorial_all.md - BEDmaker tutorial: bedboss/tutorials/bedmaker_tutorial.md - BEDqc tutorial: bedboss/tutorials/bedqc_tutorial.md @@ -75,8 +75,7 @@ nav: - How to guides: - Create BEDbase database: bedboss/how-to-create-database.md - Create config file: bedboss/how-to-configure.md - - Install R dependencies: bedboss/how-to-install-r-dependencies.md - - Develop process: bedboss/how-to-develop.md + - Install dependencies: bedboss/how-to-install-requirements.md - Reference: - How to cite: citations.md - Usage: bedboss/usage.md From 880aeaf0fe6b1ab1625247fa8a75d88f60ef268c Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 15 Apr 2024 12:36:34 -0400 Subject: [PATCH 5/7] fix links and cleaning --- docs/bedboss/README.md | 30 +- docs/bedboss/code/bedbase-tutorial.md | 2534 ------------- docs/bedboss/code/bedmaker-tutorial.md | 212 -- docs/bedboss/code/bedqc-tutorial.md | 71 - docs/bedboss/code/bedstat-tutorial.md | 371 -- docs/bedboss/code/tutorial-all.md | 499 --- docs/bedboss/how-to-install-requirements.md | 12 +- docs/bedboss/notebooks/bedbase-tutorial.ipynb | 3224 ----------------- .../bedboss/notebooks/bedmaker-tutorial.ipynb | 300 -- docs/bedboss/notebooks/bedqc-tutorial.ipynb | 124 - docs/bedboss/notebooks/bedstat-tutorial.ipynb | 528 --- docs/bedboss/notebooks/tutorial-all.ipynb | 691 ---- docs/bedboss/tutorials/bedbuncher_tutorial.md | 4 + docs/bedboss/tutorials/bedindex_tutorial.md | 4 + docs/bedboss/tutorials/tutorial_all.md | 5 +- docs/bedboss/tutorials/tutorial_run_pep.md | 3 +- mkdocs.yml | 4 +- 17 files changed, 40 insertions(+), 8576 deletions(-) delete mode 100644 docs/bedboss/code/bedbase-tutorial.md delete mode 100644 docs/bedboss/code/bedmaker-tutorial.md delete mode 100644 docs/bedboss/code/bedqc-tutorial.md delete mode 100644 docs/bedboss/code/bedstat-tutorial.md delete mode 100644 docs/bedboss/code/tutorial-all.md delete mode 100644 docs/bedboss/notebooks/bedbase-tutorial.ipynb delete mode 100644 docs/bedboss/notebooks/bedmaker-tutorial.ipynb delete mode 100644 docs/bedboss/notebooks/bedqc-tutorial.ipynb delete mode 100644 docs/bedboss/notebooks/bedstat-tutorial.ipynb delete mode 100644 docs/bedboss/notebooks/tutorial-all.ipynb diff --git a/docs/bedboss/README.md b/docs/bedboss/README.md index 07b12a2..c03c413 100644 --- a/docs/bedboss/README.md +++ b/docs/bedboss/README.md @@ -9,16 +9,16 @@

-bedboss is a command-line BED base manager tool and pipelines that manages region set files, calculates statistics for them -and enters the results into a BEDbase database. +A command-line and manager tool for calculating statistics for region set files (BED files) and managing them in the BEDbase database. -Main features: -1) bedmaker - pipeline to convert supported file types into BED format and bigBed format. -2) bedqc - pipeline to flag bed files for further evaluation to determine whether they should be included in the downstream analysis. -3) bedstat - pipeline for obtaining statistics about bed files. -4) bedbuncher - pipeline designed to create bedsets (sets of BED files) that will be retrieved from bedbase. -5) index - pipeline to create vectors of bedfiles and insert them into vector database for further search. -6) Other delete and update tools that manage bed and bedset files in the BEDbase database. +### Main features: + +1) **bedmaker** - pipeline to convert supported file types into BED format and bigBed format.
+2) **bedqc** - pipeline to flag bed files for further evaluation to determine whether they should be included in the downstream analysis.
+3) **bedstat** - pipeline for obtaining statistics about bed files.
+4) **bedbuncher** - pipeline designed to create bedsets (sets of BED files) that will be retrieved from bedbase.
+5) **index** - pipeline to create vectors of bedfiles and insert them into vector database for further search.
+6) Other delete and update tools that manage bed and bedset files in the BEDbase database.
Mainly pipelines are intended to be run from command line but nevertheless, they are also available as a python function, so that user can implement them to his own code (e.g. automatic uploading tools). @@ -49,6 +49,15 @@ bedboss check-requirements All dependencies can be using this how to documentation: [How to install dependencies](./how-to-install-requirements.md) +--- + +## BEDbase configuration file + +To run most of the pipelines, you need to create a BEDbase configuration file. + +How to create a BEDbase configuration file is described in the [configuration section](./how-to-configure.md). + + --- ## Pipelines information @@ -84,11 +93,12 @@ It produces BED file Statistics: - **5' UTR percentage**. The percentage of the regions in the BED file that are annotated as 5'-UTR. - **3' UTR percentage**. The percentage of the regions in the BED file that are annotated as 3'-UTR. -## bedbuncher +### bedbuncher Pipeline designed to create **bedsets** (sets of BED files) that will be retrieved from bedbase. Example bedsets: + - Bed files from the AML database. - Bed files from the [Excluderanges](https://github.com/dozmorovlab/excluderanges#bedbase-data-download) database. - Bed files from the LOLA database [http://lolaweb.databio.org/](http://lolaweb.databio.org/) diff --git a/docs/bedboss/code/bedbase-tutorial.md b/docs/bedboss/code/bedbase-tutorial.md deleted file mode 100644 index b30fced..0000000 --- a/docs/bedboss/code/bedbase-tutorial.md +++ /dev/null @@ -1,2534 +0,0 @@ -jupyter:True -# BEDBASE workflow tutorial - -This demo demonstrates how to process, analyze, visualize, and serve BED files. The process has 5 steps: First, the [bedmaker](https://github.com/databio/bedmaker) pipeline converts different region data files (bed, bedGraph, bigBed, bigWig, and wig) into BED format and generates bigBed format for each file for visualization in Genome Browser. An optional step, the [bedqc](https://github.com/databio/bedqc) pipline, flags the BED files that you might not want to include in the downstream analysis. Second, individual BED files are analyzed using the [bedstat](https://github.com/databio/bedstat) pipeline. Third, BED files are grouped and then analyzed as groups using the [bedbuncher](https://github.com/databio/bedbuncher) pipeline. Fourth, [bedembed](https://github.com/databio/bedembed) uses the StarSpace method to embed the bed files and the meta data, and the distances between the file labels and trained search terms will be calculated with cosine distance. Finally, the BED files, along with statistics, plots, and grouping information, is served via a web interface and RESTful API using the [bedhost](https://github.com/databio/bedhost) package. - -**Glossary of terms:** - -- *bedfile*: a tab-delimited file with one genomic region per line. Each genomic region is decribed by 3 required columns: chrom, start and end. -- *bedset*: a collection of BED files grouped by with a shared biological, experimental, or logical criterion. - - -

Table of Contents

- - -## 1. Preparation - -First, we will create a tutorial directory where we'll store the bedbase pipelines and files to be processed. We'll also need to create an environment variable that points to the tutorial directory (we'll need this variable later). - - -```bash -# mkdir bedbase_tutorial -cd bedbase_tutorial -export BEDBASE_DATA_PATH_HOST=`pwd` -export CODE=`pwd` -``` - -```.output -bash: cd: bedbase_tutorial: No such file or directory - -``` - -Download some example BED files: - - -```bash -wget http://big.databio.org/example_data/bedbase_tutorial/bed_files.tar.gz -``` - -```.output ---2023-08-11 08:10:02-- http://big.databio.org/example_data/bedbase_tutorial/bed_files.tar.gz -Resolving big.databio.org (big.databio.org)... 128.143.223.179 -Connecting to big.databio.org (big.databio.org)|128.143.223.179|:80... connected. -HTTP request sent, awaiting response... 200 OK -Length: 44549692 (42M) [application/octet-stream] -Saving to: ‘bed_files.tar.gz’ - -bed_files.tar.gz 100%[===================>] 42.49M 303KB/s in 95s - -2023-08-11 08:11:37 (458 KB/s) - ‘bed_files.tar.gz’ saved [44549692/44549692] - - -``` - -The downloaded files are compressed so we'll need to untar them: - - -```bash -tar -zxvf bed_files.tar.gz && mv bed_files files -``` - -```.output -bed_files/ -bed_files/GSE105587_ENCFF018NNF_conservative_idr_thresholded_peaks_GRCh38.bed.gz -bed_files/GSM2423312_ENCFF155HVK_peaks_GRCh38.bed.gz -bed_files/GSE105977_ENCFF617QGK_optimal_idr_thresholded_peaks_GRCh38.bed.gz -bed_files/GSE91663_ENCFF316ASR_peaks_GRCh38.bed.gz -bed_files/GSM2423313_ENCFF722AOG_peaks_GRCh38.bed.gz -bed_files/GSM2827349_ENCFF196DNQ_peaks_GRCh38.bed.gz -bed_files/GSE91663_ENCFF553KIK_optimal_idr_thresholded_peaks_GRCh38.bed.gz -bed_files/GSE91663_ENCFF319TPR_conservative_idr_thresholded_peaks_GRCh38.bed.gz -bed_files/GSE105977_ENCFF937CGY_peaks_GRCh38.bed.gz -bed_files/GSM2827350_ENCFF928JXU_peaks_GRCh38.bed.gz -bed_files/GSE105977_ENCFF793SZW_conservative_idr_thresholded_peaks_GRCh38.bed.gz - -``` - - -```bash -rm bed_files.tar.gz -``` - -Additionally, we'll download a matrix we need to provide if we wish to plot the tissue specificity of our set of genomic ranges: - -Lastly, we'll download the core pipelines and tools needed to complete this tutorial: `bedmaker`, `bedqc`, `bedstat`, `bedbuncher` , `bedhost`, and `bedhost-ui` - - -```bash -pip install looper==1.5.1 -pip install refgenie -``` - -```.output -Collecting looper==1.5.1 - Downloading looper-1.5.1-py3-none-any.whl (121 kB) - ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 121.8/121.8 KB 734.8 kB/s eta 0:00:00 kB/s eta 0:00:01 -[?25hCollecting pipestat>=0.5.1 - Downloading pipestat-0.5.1-py3-none-any.whl (61 kB) - ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 61.2/61.2 KB 724.9 kB/s eta 0:00:00 kB/s eta 0:00:01 -[?25hRequirement already satisfied: pandas>=2.0.2 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from looper==1.5.1) (2.0.3) -Requirement already satisfied: logmuse>=0.2.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from looper==1.5.1) (0.2.7) -Requirement already satisfied: ubiquerg>=0.5.2 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from looper==1.5.1) (0.6.2) -Requirement already satisfied: pyyaml>=3.12 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from looper==1.5.1) (6.0) -Requirement already satisfied: eido>=0.2.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from looper==1.5.1) (0.2.1) -Requirement already satisfied: peppy>=0.35.4 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from looper==1.5.1) (0.35.7) -Requirement already satisfied: yacman>=0.9 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from looper==1.5.1) (0.9.1) -Requirement already satisfied: colorama>=0.3.9 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from looper==1.5.1) (0.4.6) -Requirement already satisfied: divvy>=0.5.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from looper==1.5.1) (0.6.0) -Requirement already satisfied: rich>=9.10.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from looper==1.5.1) (13.3.1) -Requirement already satisfied: jinja2 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from looper==1.5.1) (3.1.2) -Requirement already satisfied: pephubclient in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from looper==1.5.1) (0.1.0) -Requirement already satisfied: attmap>=0.12.9 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from divvy>=0.5.0->looper==1.5.1) (0.13.2) -Requirement already satisfied: jsonschema>=3.0.1 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from eido>=0.2.0->looper==1.5.1) (4.17.3) -Requirement already satisfied: pytz>=2020.1 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pandas>=2.0.2->looper==1.5.1) (2022.7.1) -Requirement already satisfied: numpy>=1.21.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pandas>=2.0.2->looper==1.5.1) (1.22.0) -Requirement already satisfied: tzdata>=2022.1 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pandas>=2.0.2->looper==1.5.1) (2023.3) -Requirement already satisfied: python-dateutil>=2.8.2 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pandas>=2.0.2->looper==1.5.1) (2.8.2) -Requirement already satisfied: oyaml in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pipestat>=0.5.1->looper==1.5.1) (1.0) -Requirement already satisfied: psycopg2-binary in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pipestat>=0.5.1->looper==1.5.1) (2.9.5) -Requirement already satisfied: pydantic<2.0.0,>=1.10.7 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pipestat>=0.5.1->looper==1.5.1) (1.10.7) -Requirement already satisfied: sqlmodel>=0.0.8 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pipestat>=0.5.1->looper==1.5.1) (0.0.8) -Requirement already satisfied: pygments<3.0.0,>=2.14.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from rich>=9.10.0->looper==1.5.1) (2.14.0) -Requirement already satisfied: markdown-it-py<3.0.0,>=2.1.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from rich>=9.10.0->looper==1.5.1) (2.1.0) -Requirement already satisfied: MarkupSafe>=2.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from jinja2->looper==1.5.1) (2.1.2) -Requirement already satisfied: requests>=2.28.2 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pephubclient->looper==1.5.1) (2.28.2) -Requirement already satisfied: typer>=0.7.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pephubclient->looper==1.5.1) (0.8.0) -Requirement already satisfied: attrs>=17.4.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from jsonschema>=3.0.1->eido>=0.2.0->looper==1.5.1) (22.2.0) -Requirement already satisfied: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from jsonschema>=3.0.1->eido>=0.2.0->looper==1.5.1) (0.19.3) -Requirement already satisfied: mdurl~=0.1 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from markdown-it-py<3.0.0,>=2.1.0->rich>=9.10.0->looper==1.5.1) (0.1.2) -Requirement already satisfied: typing-extensions>=4.2.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pydantic<2.0.0,>=1.10.7->pipestat>=0.5.1->looper==1.5.1) (4.4.0) -Requirement already satisfied: six>=1.5 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas>=2.0.2->looper==1.5.1) (1.16.0) -Requirement already satisfied: charset-normalizer<4,>=2 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from requests>=2.28.2->pephubclient->looper==1.5.1) (3.0.1) -Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from requests>=2.28.2->pephubclient->looper==1.5.1) (1.26.14) -Requirement already satisfied: idna<4,>=2.5 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from requests>=2.28.2->pephubclient->looper==1.5.1) (3.4) -Requirement already satisfied: certifi>=2017.4.17 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from requests>=2.28.2->pephubclient->looper==1.5.1) (2022.12.7) -Requirement already satisfied: SQLAlchemy<=1.4.41,>=1.4.17 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from sqlmodel>=0.0.8->pipestat>=0.5.1->looper==1.5.1) (1.4.41) -Requirement already satisfied: sqlalchemy2-stubs in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from sqlmodel>=0.0.8->pipestat>=0.5.1->looper==1.5.1) (0.0.2a35) -Requirement already satisfied: click<9.0.0,>=7.1.1 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from typer>=0.7.0->pephubclient->looper==1.5.1) (8.1.3) -Requirement already satisfied: greenlet!=0.4.17 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from SQLAlchemy<=1.4.41,>=1.4.17->sqlmodel>=0.0.8->pipestat>=0.5.1->looper==1.5.1) (2.0.2) -Installing collected packages: pipestat, looper - Attempting uninstall: pipestat - Found existing installation: pipestat 0.5.0 - Uninstalling pipestat-0.5.0: - Successfully uninstalled pipestat-0.5.0 - Attempting uninstall: looper - Found existing installation: looper 1.5.0 - Uninstalling looper-1.5.0: - Successfully uninstalled looper-1.5.0 -Successfully installed looper-1.5.1 pipestat-0.5.1 -Requirement already satisfied: refgenie in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (0.12.1) -Requirement already satisfied: pyfaidx>=0.5.5.2 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from refgenie) (0.7.1) -Requirement already satisfied: refgenconf>=0.12.2 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from refgenie) (0.12.2) -Requirement already satisfied: piper>=0.12.1 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from refgenie) (0.13.2) -Requirement already satisfied: logmuse>=0.2.6 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from refgenie) (0.2.7) -Requirement already satisfied: yacman>=0.8.3 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from refgenie) (0.9.1) -Requirement already satisfied: ubiquerg>=0.4.5 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from piper>=0.12.1->refgenie) (0.6.2) -Requirement already satisfied: pipestat>=0.4.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from piper>=0.12.1->refgenie) (0.5.1) -Requirement already satisfied: pandas in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from piper>=0.12.1->refgenie) (2.0.3) -Requirement already satisfied: attmap>=0.12.5 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from piper>=0.12.1->refgenie) (0.13.2) -Requirement already satisfied: psutil in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from piper>=0.12.1->refgenie) (5.9.4) -Requirement already satisfied: setuptools>=0.7 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pyfaidx>=0.5.5.2->refgenie) (59.6.0) -Requirement already satisfied: six in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pyfaidx>=0.5.5.2->refgenie) (1.16.0) -Requirement already satisfied: future in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from refgenconf>=0.12.2->refgenie) (0.18.3) -Requirement already satisfied: jsonschema>=3.0.1 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from refgenconf>=0.12.2->refgenie) (4.17.3) -Requirement already satisfied: pyyaml in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from refgenconf>=0.12.2->refgenie) (6.0) -Requirement already satisfied: tqdm in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from refgenconf>=0.12.2->refgenie) (4.64.1) -Requirement already satisfied: rich>=9.0.1 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from refgenconf>=0.12.2->refgenie) (13.3.1) -Requirement already satisfied: requests in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from refgenconf>=0.12.2->refgenie) (2.28.2) -Requirement already satisfied: oyaml in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from yacman>=0.8.3->refgenie) (1.0) -Requirement already satisfied: attrs>=17.4.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from jsonschema>=3.0.1->refgenconf>=0.12.2->refgenie) (22.2.0) -Requirement already satisfied: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from jsonschema>=3.0.1->refgenconf>=0.12.2->refgenie) (0.19.3) -Requirement already satisfied: pydantic<2.0.0,>=1.10.7 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pipestat>=0.4.0->piper>=0.12.1->refgenie) (1.10.7) -Requirement already satisfied: sqlmodel>=0.0.8 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pipestat>=0.4.0->piper>=0.12.1->refgenie) (0.0.8) -Requirement already satisfied: eido in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pipestat>=0.4.0->piper>=0.12.1->refgenie) (0.2.1) -Requirement already satisfied: psycopg2-binary in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pipestat>=0.4.0->piper>=0.12.1->refgenie) (2.9.5) -Requirement already satisfied: pygments<3.0.0,>=2.14.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from rich>=9.0.1->refgenconf>=0.12.2->refgenie) (2.14.0) -Requirement already satisfied: markdown-it-py<3.0.0,>=2.1.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from rich>=9.0.1->refgenconf>=0.12.2->refgenie) (2.1.0) -Requirement already satisfied: pytz>=2020.1 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pandas->piper>=0.12.1->refgenie) (2022.7.1) -Requirement already satisfied: python-dateutil>=2.8.2 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pandas->piper>=0.12.1->refgenie) (2.8.2) -Requirement already satisfied: tzdata>=2022.1 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pandas->piper>=0.12.1->refgenie) (2023.3) -Requirement already satisfied: numpy>=1.21.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pandas->piper>=0.12.1->refgenie) (1.22.0) -Requirement already satisfied: certifi>=2017.4.17 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from requests->refgenconf>=0.12.2->refgenie) (2022.12.7) -Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from requests->refgenconf>=0.12.2->refgenie) (1.26.14) -Requirement already satisfied: idna<4,>=2.5 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from requests->refgenconf>=0.12.2->refgenie) (3.4) -Requirement already satisfied: charset-normalizer<4,>=2 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from requests->refgenconf>=0.12.2->refgenie) (3.0.1) -Requirement already satisfied: mdurl~=0.1 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from markdown-it-py<3.0.0,>=2.1.0->rich>=9.0.1->refgenconf>=0.12.2->refgenie) (0.1.2) -Requirement already satisfied: typing-extensions>=4.2.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pydantic<2.0.0,>=1.10.7->pipestat>=0.4.0->piper>=0.12.1->refgenie) (4.4.0) -Requirement already satisfied: sqlalchemy2-stubs in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from sqlmodel>=0.0.8->pipestat>=0.4.0->piper>=0.12.1->refgenie) (0.0.2a35) -Requirement already satisfied: SQLAlchemy<=1.4.41,>=1.4.17 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from sqlmodel>=0.0.8->pipestat>=0.4.0->piper>=0.12.1->refgenie) (1.4.41) -Requirement already satisfied: peppy>=0.35.5 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from eido->pipestat>=0.4.0->piper>=0.12.1->refgenie) (0.35.7) -Requirement already satisfied: greenlet!=0.4.17 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from SQLAlchemy<=1.4.41,>=1.4.17->sqlmodel>=0.0.8->pipestat>=0.4.0->piper>=0.12.1->refgenie) (2.0.2) - -``` - - -```bash -git clone -b dev-bedboss git@github.com:databio/bedbase.git -pip install bedboss==0.1.0a2 -# git clone -b validate_genome_assembly git@github.com:databio/bedbuncher -# git clone git@github.com:databio/bedembed -# git clone -b dev git@github.com:databio/bedhost -# git clone git@github.com:databio/bedhost-ui -``` - -```.output -Cloning into 'bedbase'... -remote: Enumerating objects: 664, done. -remote: Counting objects: 100% (317/317), done. -remote: Compressing objects: 100% (159/159), done. -remote: Total 664 (delta 188), reused 250 (delta 148), pack-reused 347 -Receiving objects: 100% (664/664), 695.03 KiB | 386.00 KiB/s, done. -Resolving deltas: 100% (337/337), done. -Collecting bedboss==0.1.0a2 - Downloading bedboss-0.1.0a2-py3-none-any.whl (24 kB) -Requirement already satisfied: requests>=2.28.2 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from bedboss==0.1.0a2) (2.28.2) -Collecting bbconf==0.4.0a1 - Using cached bbconf-0.4.0a1-py3-none-any.whl (11 kB) -Requirement already satisfied: refgenconf>=0.12.2 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from bedboss==0.1.0a2) (0.12.2) -Requirement already satisfied: yacman>=0.8.4 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from bedboss==0.1.0a2) (0.9.1) -Collecting piper>=0.13.2 - Using cached piper-0.13.2-py3-none-any.whl (72 kB) -Requirement already satisfied: peppy>=0.35.7 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from bedboss==0.1.0a2) (0.35.7) -Requirement already satisfied: pandas>=1.5.3 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from bedboss==0.1.0a2) (2.0.0) -Requirement already satisfied: ubiquerg>=0.6.2 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from bedboss==0.1.0a2) (0.6.2) -Requirement already satisfied: logmuse>=0.2.7 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from bedboss==0.1.0a2) (0.2.7) -Requirement already satisfied: sqlalchemy<2.0.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from bbconf==0.4.0a1->bedboss==0.1.0a2) (1.4.41) -Requirement already satisfied: pipestat>=0.4.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from bbconf==0.4.0a1->bedboss==0.1.0a2) (0.4.1) -Requirement already satisfied: pytz>=2020.1 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pandas>=1.5.3->bedboss==0.1.0a2) (2022.7.1) -Requirement already satisfied: tzdata>=2022.1 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pandas>=1.5.3->bedboss==0.1.0a2) (2023.3) -Requirement already satisfied: numpy>=1.21.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pandas>=1.5.3->bedboss==0.1.0a2) (1.22.0) -Requirement already satisfied: python-dateutil>=2.8.2 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pandas>=1.5.3->bedboss==0.1.0a2) (2.8.2) -Requirement already satisfied: pyyaml in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from peppy>=0.35.7->bedboss==0.1.0a2) (6.0) -Requirement already satisfied: attmap>=0.13.2 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from peppy>=0.35.7->bedboss==0.1.0a2) (0.13.2) -Requirement already satisfied: rich>=10.3.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from peppy>=0.35.7->bedboss==0.1.0a2) (13.3.1) -Requirement already satisfied: psutil in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from piper>=0.13.2->bedboss==0.1.0a2) (5.9.4) -Requirement already satisfied: future in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from refgenconf>=0.12.2->bedboss==0.1.0a2) (0.18.3) -Requirement already satisfied: pyfaidx in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from refgenconf>=0.12.2->bedboss==0.1.0a2) (0.7.1) -Requirement already satisfied: jsonschema>=3.0.1 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from refgenconf>=0.12.2->bedboss==0.1.0a2) (4.17.3) -Requirement already satisfied: tqdm in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from refgenconf>=0.12.2->bedboss==0.1.0a2) (4.64.1) -Requirement already satisfied: charset-normalizer<4,>=2 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from requests>=2.28.2->bedboss==0.1.0a2) (3.0.1) -Requirement already satisfied: idna<4,>=2.5 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from requests>=2.28.2->bedboss==0.1.0a2) (3.4) -Requirement already satisfied: certifi>=2017.4.17 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from requests>=2.28.2->bedboss==0.1.0a2) (2022.12.7) -Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from requests>=2.28.2->bedboss==0.1.0a2) (1.26.14) -Requirement already satisfied: oyaml in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from yacman>=0.8.4->bedboss==0.1.0a2) (1.0) -Requirement already satisfied: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from jsonschema>=3.0.1->refgenconf>=0.12.2->bedboss==0.1.0a2) (0.19.3) -Requirement already satisfied: attrs>=17.4.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from jsonschema>=3.0.1->refgenconf>=0.12.2->bedboss==0.1.0a2) (22.2.0) -Requirement already satisfied: eido in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pipestat>=0.4.0->bbconf==0.4.0a1->bedboss==0.1.0a2) (0.2.1) -Requirement already satisfied: sqlmodel>=0.0.8 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pipestat>=0.4.0->bbconf==0.4.0a1->bedboss==0.1.0a2) (0.0.8) -Requirement already satisfied: psycopg2-binary in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pipestat>=0.4.0->bbconf==0.4.0a1->bedboss==0.1.0a2) (2.9.5) -Requirement already satisfied: pydantic<2.0.0,>=1.10.7 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pipestat>=0.4.0->bbconf==0.4.0a1->bedboss==0.1.0a2) (1.10.7) -Requirement already satisfied: six>=1.5 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas>=1.5.3->bedboss==0.1.0a2) (1.16.0) -Requirement already satisfied: markdown-it-py<3.0.0,>=2.1.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from rich>=10.3.0->peppy>=0.35.7->bedboss==0.1.0a2) (2.1.0) -Requirement already satisfied: pygments<3.0.0,>=2.14.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from rich>=10.3.0->peppy>=0.35.7->bedboss==0.1.0a2) (2.14.0) -Requirement already satisfied: greenlet!=0.4.17 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from sqlalchemy<2.0.0->bbconf==0.4.0a1->bedboss==0.1.0a2) (2.0.2) -Requirement already satisfied: setuptools>=0.7 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pyfaidx->refgenconf>=0.12.2->bedboss==0.1.0a2) (59.6.0) -Requirement already satisfied: mdurl~=0.1 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from markdown-it-py<3.0.0,>=2.1.0->rich>=10.3.0->peppy>=0.35.7->bedboss==0.1.0a2) (0.1.2) -Requirement already satisfied: typing-extensions>=4.2.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pydantic<2.0.0,>=1.10.7->pipestat>=0.4.0->bbconf==0.4.0a1->bedboss==0.1.0a2) (4.4.0) -Requirement already satisfied: sqlalchemy2-stubs in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from sqlmodel>=0.0.8->pipestat>=0.4.0->bbconf==0.4.0a1->bedboss==0.1.0a2) (0.0.2a35) -Installing collected packages: piper, bbconf, bedboss - Attempting uninstall: piper - Found existing installation: piper 0.12.3 - Uninstalling piper-0.12.3: - Successfully uninstalled piper-0.12.3 - Attempting uninstall: bbconf - Found existing installation: bbconf 0.4.0 - Uninstalling bbconf-0.4.0: - Successfully uninstalled bbconf-0.4.0 - Attempting uninstall: bedboss - Found existing installation: bedboss 0.1.0.dev2 - Uninstalling bedboss-0.1.0.dev2: - Successfully uninstalled bedboss-0.1.0.dev2 -Successfully installed bbconf-0.4.0a1 bedboss-0.1.0a2 piper-0.13.2 - -``` - -### Let's install this packages! - - I have problems with bedtoBigBed script, so I am downloading it too, and seting in bedmaker path to this script :/ - - -```bash -wget http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/bedToBigBed -chmod a+x bedToBigBed -``` - -```.output ---2023-08-11 07:51:37-- http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/bedToBigBed -Resolving hgdownload.soe.ucsc.edu (hgdownload.soe.ucsc.edu)... 128.114.119.163 -Connecting to hgdownload.soe.ucsc.edu (hgdownload.soe.ucsc.edu)|128.114.119.163|:80... connected. -HTTP request sent, awaiting response... 200 OK -Length: 9632264 (9.2M) -Saving to: ‘bedToBigBed’ - -bedToBigBed 100%[===================>] 9.19M 740KB/s in 18s - -2023-08-11 07:51:56 (524 KB/s) - ‘bedToBigBed’ saved [9632264/9632264] - - -``` - - -```bash -pwd -``` - -```.output -/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial - -``` - -Check if we have all tutorial files: - - -```bash -ls bedbase/tutorial_files -``` - -```.output -bedbase_configuration_compose_local.yaml bedboss scripts -bedbase_configuration_compose.yaml PEPs - -``` - -# 2. BEDBOSS: ALL TOGETHER - - -```bash -pip list | grep bbconf -``` - -```.output -bbconf 0.4.0a1 - -``` - - -```bash -pip list | grep bedboss -``` - -```.output -bedboss 0.1.0a2 - -``` - -### Check and update config files - - -```bash -ls bedbase/tutorial_files/bedboss -``` - -```.output -bedboss_pep_config.yaml looper_config_bedboss.yaml sample_table.csv -config_db_local.yaml pipeline_interface.yaml - -``` - -Let's create additional metadata for our database: - - -```bash -cat bedbase/tutorial_files/bedboss/bedboss_pep_config.yaml -``` - -```.output -pep_version: 2.1.0 -sample_table: sample_table.csv - -sample_modifiers: - append: - input_file_path: INPUT - output_folder: "$BEDBASE_DATA_PATH_HOST/outputs" - narrowpeak: TRUE - rfg_config_path: RFG - bedbase_config: "$BEDBASE_DATA_PATH_HOST/bedbase/tutorial_files/bedboss/config_db_local.yaml" - yaml_file: YAMLFILE - derive: - attributes: [input_file_path, rfg_config_path, yaml_file] - sources: - INPUT: "$BEDBASE_DATA_PATH_HOST/files/{file_name}" - RFG: "$REFGENIE" - YAMLFILE: "$BEDBASE_DATA_PATH_HOST/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/{sample_name}_sample.yaml" - imply: - - if: - antibody: [ H3K4me3, H3K27me3, H3K27ac, H3K9ac, H4K5ac, H3K4me, H3K36me3, H4K5ac, H3K9ac ] - then: - narrowpeak: FALSE - -``` - -Config for local db and bedstat - - -```bash -cat bedbase/tutorial_files/bedboss/config_db_local.yaml -``` - -```.output -path: - pipeline_output_path: $BEDBASE_DATA_PATH_HOST/outputs - bedstat_dir: outputs/bedstat_output - bedbuncher_dir: outputs/bedbuncher_output - remote_url_base: null -database: - host: $DB_HOST_URL - port: $POSTGRES_PORT - password: $POSTGRES_PASSWORD - user: $POSTGRES_USER - name: $POSTGRES_DB - dialect: postgresql - driver: psycopg2 -server: - host: 0.0.0.0 - port: 8080 - -``` - -looper for bedboss - - -```bash -cat bedbase/tutorial_files/bedboss//pipeline_interface.yaml -``` - -```.output -pipeline_name: BEDBOSS -pipeline_type: sample -pre_submit: - python_functions: - - looper.write_sample_yaml -command_template: > - bedboss all - --sample-name {sample.sample_name} - --input-file {sample.input_file_path} - --input-type {sample.format} - --genome {sample.genome} - --sample-yaml {sample.yaml_file} - --output_folder {sample.output_folder} - --narrowpeak {sample.narrowpeak} - --rfg-config {sample.rfg_config_path} - {% if sample.bedbase_config is defined %} --bedbase-config {sample.bedbase_config} {% endif %} - {% if sample.chrom_sizes is defined %} --chrom-sizes {sample.chrom_sizes} {% endif %} - {% if sample.open_signal_matrix is defined %} --open-signal-matrix {sample.open_signal_matrix} {% endif %} - {% if sample.ensdb is defined %} --ensdb {sample.ensdb} {% endif %} - {% if sample.fasta is defined %} --fasta {sample.fasta} {% endif %} - --outfolder $BEDBASE_DATA_PATH_HOST/outputs/outputs/bedstat_output/bedstat_pipeline_logs - -``` - -Looper config file: - - -```bash -ls bedbase/tutorial_files/bedboss -``` - -```.output -bedboss_pep_config.yaml looper_config_bedboss.yaml sample_table.csv -config_db_local.yaml pipeline_interface.yaml - -``` - - -```bash -cat bedbase/tutorial_files/bedboss/looper_config_bedboss.yaml -``` - -```.output -pep_config: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/bedboss_pep_config.yaml -output_dir: $BEDBASE_DATA_PATH_HOST/outputs/outputs/bedstat_output/bedstat_pipeline_logs - -pipeline_interfaces: - sample: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss//pipeline_interface.yaml - -``` - -### Start DB (bedbase-postgres) - -### Inititiate a local PostgreSQL instance - -In addition to generate statistics and plots, `bedstat` inserts JSON formatted metadata into relational [PostgreSQL] database. - -If you don't have docker installed, you can install it with `sudo apt-get update && apt-get install docker-engine -y`. - -Now, create a persistent volume to house PostgreSQL data: - - -```bash -docker volume create postgres-data -``` - -```.output -postgres-data - -``` - -Spin up a `postgres` container. Provide required environment variables (need to match the settings in bedbase configuration file) and bind the created docker volume to `/var/lib/postgresql/data` path in the container: - - -```bash -docker run -d --name bedbase-postgres -p 5432:5432 -e POSTGRES_PASSWORD=bedbasepassword -e POSTGRES_USER=postgres -e POSTGRES_DB=postgres -v postgres-data:/var/lib/postgresql/data postgres -``` - -```.output -42ed2028444042c3ceef801c0828ce016dde87f1c0ac0d9494ffb6274374f262 -docker: Error response from daemon: driver failed programming external connectivity on endpoint bedbase-postgres (fe853ffbf2fa584785686c319c5a657021a860dce6c9e81f67f5e805ef2133a0): Bind for 0.0.0.0:5432 failed: port is already allocated. - -``` - - - -If environment variables are not initialized with function above, We have to initialize them manually - - -```bash -export DB_HOST_URL=localhost -export POSTGRES_PORT=5432 -export POSTGRES_PASSWORD=docker -export POSTGRES_USER=postgres -export POSTGRES_DB=pep-db -``` - -### RUN BEDBoss - -Additionally, we have to initialize environment variable $REFGENIE - the path to the refgenie configuration file. If Refgenie is not initialize, we will have to initialize it localy. use `pip install --user refgenie` to install and add to the PATH with `export PATH=~/.local/bin:$PATH` - - -```bash -export REFGENIE='genome_config.yaml' -refgenie init -c $REFGENIE -``` - -```.output -Traceback (most recent call last): - File "/home/bnt4me/virginia/venv/jupyter/bin/refgenie", line 8, in - sys.exit(main()) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/refgenie/cli.py", line 133, in main - rgc.initialize_config_file(os.path.abspath(gencfg)) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/refgenconf/refgenconf.py", line 290, in initialize_config_file - _write_fail_err("file exists") - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/refgenconf/refgenconf.py", line 281, in _write_fail_err - raise OSError("Can't initialize, {}: {} ".format(reason, filepath)) -OSError: Can't initialize, file exists: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/genome_config.yaml - -``` - - - - -```bash -pwd -``` - -```.output -/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial - -``` - - -```bash -ls bedbase/tutorial_files/bedboss -``` - -```.output -bedboss_pep_config.yaml looper_config_bedboss.yaml sample_table.csv -config_db_local.yaml pipeline_interface.yaml - -``` - -##### Run bedboss - - -```bash -looper --version -``` - -```.output -looper 1.5.1 - - -``` - - -```bash -looper run --help -``` - -```.output -usage: looper run [-h] [-i] [-d] [-t S] [-x S] [-y S] [-f] [--divvy DIVCFG] [-p P] [-s S] - [-c K [K ...]] [-u X] [-n N] [--looper-config LOOPER_CONFIG] - [-S YAML [YAML ...]] [-P YAML [YAML ...]] [-l N] [-k N] - [--sel-attr ATTR] [--sel-excl [E ...] | --sel-incl [I ...]] - [-a A [A ...]] - [config_file] - -Run or submit sample jobs. - -positional arguments: - config_file Project configuration file (YAML) or pephub registry - path. - -options: - -h, --help show this help message and exit - -i, --ignore-flags Ignore run status flags? Default=False - -d, --dry-run Don't actually submit the jobs. Default=False - -t S, --time-delay S Time delay in seconds between job submissions - -x S, --command-extra S String to append to every command - -y S, --command-extra-override S Same as command-extra, but overrides values in PEP - -f, --skip-file-checks Do not perform input file checks - -u X, --lump X Total input file size (GB) to batch into one job - -n N, --lumpn N Number of commands to batch into one job - --looper-config LOOPER_CONFIG Looper configuration file (YAML) - -S YAML [YAML ...], --sample-pipeline-interfaces YAML [YAML ...] - Path to looper sample config file - -P YAML [YAML ...], --project-pipeline-interfaces YAML [YAML ...] - Path to looper project config file - -a A [A ...], --amend A [A ...] List of amendments to activate - -divvy arguments: - Configure divvy to change computing settings - - --divvy DIVCFG Path to divvy configuration file. Default=$DIVCFG env - variable. Currently: not set - -p P, --package P Name of computing resource package to use - -s S, --settings S Path to a YAML settings file with compute settings - -c K [K ...], --compute K [K ...] List of key-value pairs (k1=v1) - -sample selection arguments: - Specify samples to include or exclude based on sample attribute values - - -l N, --limit N Limit to n samples - -k N, --skip N Skip samples by numerical index - --sel-attr ATTR Attribute for sample exclusion OR inclusion - --sel-excl [E ...] Exclude samples with these values - --sel-incl [I ...] Include only samples with these values - - -``` - - -```bash -pwd -``` - -```.output -/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial - -``` - - -```bash -$BEDBASE_DATA_PATH_HOST -``` - -```.output -bash: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial: Is a directory - -``` - - - - -```bash -looper run --looper-config ./bedbase/tutorial_files/bedboss/looper_config_bedboss.yaml --output-dir $BEDBASE_DATA_PATH_HOST/outputs/outputs/bedstat_output/bedstat_pipeline_logs -``` - -```.output -Looper version: 1.5.1 -Command: run -Using default config. No config found in env var: ['DIVCFG'] -Pipestat compatible: False -## [1 of 11] sample: bedbase_demo_db1; pipeline: BEDBOSS -Calling pre-submit function: looper.write_sample_yaml -Writing script to /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedbase_demo_db1.sub -Job script (n=1; 0.00Gb): /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedbase_demo_db1.sub -Compute node: bnt4me-Precision-5560 -Start time: 2023-08-14 09:27:35 -Using default config. No config found in env var: PIPESTAT_CONFIG -Config: None. -No schema supplied. -Initialize FileBackend -Warning: You're running an interactive python session. This works, but pypiper cannot tee the output, so results are only logged to screen. -### Pipeline run code and environment: - -* Command: `/home/bnt4me/virginia/venv/jupyter/bin/bedboss all --sample-name bedbase_demo_db1 --input-file /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE105587_ENCFF018NNF_conservative_idr_thresholded_peaks_GRCh38.bed.gz --input-type bed --genome hg38 --sample-yaml /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/bedbase_demo_db1_sample.yaml --output_folder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs --narrowpeak True --rfg-config genome_config.yaml --bedbase-config /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml --outfolder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs` -* Compute host: bnt4me-Precision-5560 -* Working dir: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial -* Outfolder: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/ -* Pipeline started at: (08-14 09:27:35) elapsed: 0.0 _TIME_ - -### Version log: - -* Python version: 3.10.12 -* Pypiper dir: `/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper` -* Pypiper version: 0.13.2 -* Pipeline dir: `/home/bnt4me/virginia/venv/jupyter/bin` -* Pipeline version: 0.1.0a2 - -### Arguments passed to pipeline: - - -### Initialized Pipestat Object: - -* PipestatManager (bedboss-pipeline) -* Backend: File -* - results: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/stats.yaml -* - status: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs -* Multiple Pipelines Allowed: True -* Pipeline name: bedboss-pipeline -* Pipeline type: sample -* Status Schema key: None -* Results formatter: default_formatter -* Results schema source: None -* Status schema source: None -* Records count: 2 -* Sample name: DEFAULT_SAMPLE_NAME - - ----------------------------------------- - -Unused arguments: {'command': 'all', 'silent': False, 'verbosity': None, 'logdev': False} -Getting Open Signal Matrix file path... -output_bed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE105587_ENCFF018NNF_conservative_idr_thresholded_peaks_GRCh38.bed.gz -output_bigbed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bigbed_files -Got input type: bed -Converting /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE105587_ENCFF018NNF_conservative_idr_thresholded_peaks_GRCh38.bed.gz to BED format. -Target exists: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE105587_ENCFF018NNF_conservative_idr_thresholded_peaks_GRCh38.bed.gz` -Running bedqc... -Unused arguments: {} -Target to produce: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db1/33xf84g5` - -> `zcat /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE105587_ENCFF018NNF_conservative_idr_thresholded_peaks_GRCh38.bed.gz > /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db1/33xf84g5` (24312) -
-
-Command completed. Elapsed time: 0:00:00. Running peak memory: 0.003GB. - PID: 24312; Command: zcat; Return code: 0; Memory used: 0.003GB - - -> `bash /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedqc/est_line.sh /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db1/33xf84g5 ` -File (/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db1/33xf84g5) has passed Quality Control! -Generating bigBed files for: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE105587_ENCFF018NNF_conservative_idr_thresholded_peaks_GRCh38.bed.gz -Determining path to chrom.sizes asset via Refgenie. -Reading refgenie genome configuration file from file: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/genome_config.yaml -/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes -Determined path to chrom.sizes asset: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes -Config: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml. -Initialize DBBackend -/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/parsed_schema.py:284: RuntimeWarning: fields may not start with an underscore, ignoring "_pipeline_name" - return create_model( -Traceback (most recent call last): - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py", line 689, in _engine -Using default schema: /home/bnt4me/virginia/venv/jupyter/bin/pipestat_output_schema.yaml - return self.db_engine_key -AttributeError: 'DBBackend' object has no attribute 'db_engine_key' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/bnt4me/virginia/venv/jupyter/bin/bedboss", line 8, in - sys.exit(main()) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py", line 180, in main - run_all(pm=pm, **args_dict) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py", line 138, in run_all - bedstat( - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedstat/bedstat.py", line 103, in bedstat - bbc = bbconf.BedBaseConf(config_path=bedbase_config, database_only=True) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bbconf/bbconf.py", line 72, in __init__ - BED_TABLE: pipestat.PipestatManager( - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/pipestat.py", line 161, in __init__ - self.backend = DBBackend( - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py", line 63, in __init__ - SQLModel.metadata.create_all(self._engine) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py", line 694, in _engine - self.db_engine_key = create_engine(self.db_url, echo=self.show_db_logs) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlmodel/engine/create.py", line 139, in create_engine - return _create_engine(url, **current_kwargs) # type: ignore - File "", line 2, in create_engine - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/util/deprecations.py", line 309, in warned - return fn(*args, **kwargs) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/create.py", line 518, in create_engine - u = _url.make_url(url) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py", line 725, in make_url - return _parse_rfc1738_args(name_or_url) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py", line 781, in _parse_rfc1738_args - components["port"] = int(components["port"]) -ValueError: invalid literal for int() with base 10: '%24POSTGRES_PORT' -Starting cleanup: 1 files; 0 conditional files for cleanup - -Cleaning up flagged intermediate files. . . - -### Pipeline failed at: (08-14 09:27:35) elapsed: 0.0 _TIME_ - -Total time: 0:00:00 -Failure reason: Pipeline failure. See details above. -Exception ignored in atexit callback: > -Traceback (most recent call last): - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py", line 2191, in _exit_handler - self.fail_pipeline(Exception("Pipeline failure. See details above.")) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py", line 2036, in fail_pipeline - raise exc -Exception: Pipeline failure. See details above. -## [2 of 11] sample: bedbase_demo_db2; pipeline: BEDBOSS -Calling pre-submit function: looper.write_sample_yaml -Writing script to /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedbase_demo_db2.sub -Job script (n=1; 0.00Gb): /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedbase_demo_db2.sub -Compute node: bnt4me-Precision-5560 -Start time: 2023-08-14 09:27:35 -Using default config. No config found in env var: PIPESTAT_CONFIG -Config: None. -No schema supplied. -Initialize FileBackend -Warning: You're running an interactive python session. This works, but pypiper cannot tee the output, so results are only logged to screen. -### Pipeline run code and environment: - -* Command: `/home/bnt4me/virginia/venv/jupyter/bin/bedboss all --sample-name bedbase_demo_db2 --input-file /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE105977_ENCFF617QGK_optimal_idr_thresholded_peaks_GRCh38.bed.gz --input-type bed --genome hg38 --sample-yaml /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/bedbase_demo_db2_sample.yaml --output_folder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs --narrowpeak True --rfg-config genome_config.yaml --bedbase-config /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml --outfolder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs` -* Compute host: bnt4me-Precision-5560 -* Working dir: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial -* Outfolder: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/ -* Pipeline started at: (08-14 09:27:36) elapsed: 0.0 _TIME_ - -### Version log: - -* Python version: 3.10.12 -* Pypiper dir: `/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper` -* Pypiper version: 0.13.2 -* Pipeline dir: `/home/bnt4me/virginia/venv/jupyter/bin` -* Pipeline version: 0.1.0a2 - -### Arguments passed to pipeline: - - -### Initialized Pipestat Object: - -* PipestatManager (bedboss-pipeline) -* Backend: File -* - results: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/stats.yaml -* - status: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs -* Multiple Pipelines Allowed: True -* Pipeline name: bedboss-pipeline -* Pipeline type: sample -* Status Schema key: None -* Results formatter: default_formatter -* Results schema source: None -* Status schema source: None -* Records count: 2 -* Sample name: DEFAULT_SAMPLE_NAME - - ----------------------------------------- - -Unused arguments: {'command': 'all', 'silent': False, 'verbosity': None, 'logdev': False} -Getting Open Signal Matrix file path... -output_bed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE105977_ENCFF617QGK_optimal_idr_thresholded_peaks_GRCh38.bed.gz -output_bigbed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bigbed_files -Got input type: bed -Converting /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE105977_ENCFF617QGK_optimal_idr_thresholded_peaks_GRCh38.bed.gz to BED format. -Target exists: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE105977_ENCFF617QGK_optimal_idr_thresholded_peaks_GRCh38.bed.gz` -Running bedqc... -Unused arguments: {} -Target to produce: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db2/lypwq5fe` - -> `zcat /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE105977_ENCFF617QGK_optimal_idr_thresholded_peaks_GRCh38.bed.gz > /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db2/lypwq5fe` (24344) -
-
-Command completed. Elapsed time: 0:00:00. Running peak memory: 0GB. - PID: 24344; Command: zcat; Return code: 0; Memory used: 0.0GB - - -> `bash /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedqc/est_line.sh /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db2/lypwq5fe ` -File (/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db2/lypwq5fe) has passed Quality Control! -Generating bigBed files for: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE105977_ENCFF617QGK_optimal_idr_thresholded_peaks_GRCh38.bed.gz -Determining path to chrom.sizes asset via Refgenie. -Reading refgenie genome configuration file from file: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/genome_config.yaml -/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes -Determined path to chrom.sizes asset: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes -Config: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml. -Initialize DBBackend -/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/parsed_schema.py:284: RuntimeWarning: fields may not start with an underscore, ignoring "_pipeline_name" - return create_model( -Using default schema: /home/bnt4me/virginia/venv/jupyter/bin/pipestat_output_schema.yaml -Traceback (most recent call last): - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py", line 689, in _engine - return self.db_engine_key -AttributeError: 'DBBackend' object has no attribute 'db_engine_key' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/bnt4me/virginia/venv/jupyter/bin/bedboss", line 8, in - sys.exit(main()) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py", line 180, in main - run_all(pm=pm, **args_dict) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py", line 138, in run_all - bedstat( - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedstat/bedstat.py", line 103, in bedstat - bbc = bbconf.BedBaseConf(config_path=bedbase_config, database_only=True) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bbconf/bbconf.py", line 72, in __init__ - BED_TABLE: pipestat.PipestatManager( - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/pipestat.py", line 161, in __init__ - self.backend = DBBackend( - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py", line 63, in __init__ - SQLModel.metadata.create_all(self._engine) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py", line 694, in _engine - self.db_engine_key = create_engine(self.db_url, echo=self.show_db_logs) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlmodel/engine/create.py", line 139, in create_engine - return _create_engine(url, **current_kwargs) # type: ignore - File "", line 2, in create_engine - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/util/deprecations.py", line 309, in warned - return fn(*args, **kwargs) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/create.py", line 518, in create_engine - u = _url.make_url(url) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py", line 725, in make_url - return _parse_rfc1738_args(name_or_url) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py", line 781, in _parse_rfc1738_args - components["port"] = int(components["port"]) -ValueError: invalid literal for int() with base 10: '%24POSTGRES_PORT' -Starting cleanup: 1 files; 0 conditional files for cleanup - -Cleaning up flagged intermediate files. . . - -### Pipeline failed at: (08-14 09:27:36) elapsed: 0.0 _TIME_ - -Total time: 0:00:00 -Failure reason: Pipeline failure. See details above. -Exception ignored in atexit callback: > -Traceback (most recent call last): - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py", line 2191, in _exit_handler - self.fail_pipeline(Exception("Pipeline failure. See details above.")) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py", line 2036, in fail_pipeline - raise exc -Exception: Pipeline failure. See details above. -## [3 of 11] sample: bedbase_demo_db3; pipeline: BEDBOSS -Calling pre-submit function: looper.write_sample_yaml -Writing script to /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedbase_demo_db3.sub -Job script (n=1; 0.00Gb): /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedbase_demo_db3.sub -Compute node: bnt4me-Precision-5560 -Start time: 2023-08-14 09:27:36 -Using default config. No config found in env var: PIPESTAT_CONFIG -Config: None. -No schema supplied. -Initialize FileBackend -Warning: You're running an interactive python session. This works, but pypiper cannot tee the output, so results are only logged to screen. -### Pipeline run code and environment: - -* Command: `/home/bnt4me/virginia/venv/jupyter/bin/bedboss all --sample-name bedbase_demo_db3 --input-file /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE105977_ENCFF793SZW_conservative_idr_thresholded_peaks_GRCh38.bed.gz --input-type bed --genome hg38 --sample-yaml /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/bedbase_demo_db3_sample.yaml --output_folder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs --narrowpeak True --rfg-config genome_config.yaml --bedbase-config /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml --outfolder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs` -* Compute host: bnt4me-Precision-5560 -* Working dir: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial -* Outfolder: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/ -* Pipeline started at: (08-14 09:27:37) elapsed: 0.0 _TIME_ - -### Version log: - -* Python version: 3.10.12 -* Pypiper dir: `/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper` -* Pypiper version: 0.13.2 -* Pipeline dir: `/home/bnt4me/virginia/venv/jupyter/bin` -* Pipeline version: 0.1.0a2 - -### Arguments passed to pipeline: - - -### Initialized Pipestat Object: - -* PipestatManager (bedboss-pipeline) -* Backend: File -* - results: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/stats.yaml -* - status: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs -* Multiple Pipelines Allowed: True -* Pipeline name: bedboss-pipeline -* Pipeline type: sample -* Status Schema key: None -* Results formatter: default_formatter -* Results schema source: None -* Status schema source: None -* Records count: 2 -* Sample name: DEFAULT_SAMPLE_NAME - - ----------------------------------------- - -Unused arguments: {'command': 'all', 'silent': False, 'verbosity': None, 'logdev': False} -Getting Open Signal Matrix file path... -output_bed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE105977_ENCFF793SZW_conservative_idr_thresholded_peaks_GRCh38.bed.gz -output_bigbed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bigbed_files -Got input type: bed -Converting /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE105977_ENCFF793SZW_conservative_idr_thresholded_peaks_GRCh38.bed.gz to BED format. -Target exists: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE105977_ENCFF793SZW_conservative_idr_thresholded_peaks_GRCh38.bed.gz` -Running bedqc... -Unused arguments: {} -Target to produce: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db3/_5zvvg7p` - -> `zcat /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE105977_ENCFF793SZW_conservative_idr_thresholded_peaks_GRCh38.bed.gz > /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db3/_5zvvg7p` (24374) -
-
-Command completed. Elapsed time: 0:00:00. Running peak memory: 0GB. - PID: 24374; Command: zcat; Return code: 0; Memory used: 0.0GB - - -> `bash /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedqc/est_line.sh /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db3/_5zvvg7p ` -File (/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db3/_5zvvg7p) has passed Quality Control! -Generating bigBed files for: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE105977_ENCFF793SZW_conservative_idr_thresholded_peaks_GRCh38.bed.gz -Determining path to chrom.sizes asset via Refgenie. -Reading refgenie genome configuration file from file: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/genome_config.yaml -/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes -Determined path to chrom.sizes asset: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes -Config: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml. -Initialize DBBackend -/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/parsed_schema.py:284: RuntimeWarning: fields may not start with an underscore, ignoring "_pipeline_name" - return create_model( -Traceback (most recent call last): - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py", line 689, in _engine -Using default schema: /home/bnt4me/virginia/venv/jupyter/bin/pipestat_output_schema.yaml - return self.db_engine_key -AttributeError: 'DBBackend' object has no attribute 'db_engine_key' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/bnt4me/virginia/venv/jupyter/bin/bedboss", line 8, in - sys.exit(main()) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py", line 180, in main - run_all(pm=pm, **args_dict) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py", line 138, in run_all - bedstat( - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedstat/bedstat.py", line 103, in bedstat - bbc = bbconf.BedBaseConf(config_path=bedbase_config, database_only=True) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bbconf/bbconf.py", line 72, in __init__ - BED_TABLE: pipestat.PipestatManager( - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/pipestat.py", line 161, in __init__ - self.backend = DBBackend( - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py", line 63, in __init__ - SQLModel.metadata.create_all(self._engine) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py", line 694, in _engine - self.db_engine_key = create_engine(self.db_url, echo=self.show_db_logs) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlmodel/engine/create.py", line 139, in create_engine - return _create_engine(url, **current_kwargs) # type: ignore - File "", line 2, in create_engine - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/util/deprecations.py", line 309, in warned - return fn(*args, **kwargs) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/create.py", line 518, in create_engine - u = _url.make_url(url) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py", line 725, in make_url - return _parse_rfc1738_args(name_or_url) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py", line 781, in _parse_rfc1738_args - components["port"] = int(components["port"]) -ValueError: invalid literal for int() with base 10: '%24POSTGRES_PORT' -Starting cleanup: 1 files; 0 conditional files for cleanup - -Cleaning up flagged intermediate files. . . - -### Pipeline failed at: (08-14 09:27:37) elapsed: 0.0 _TIME_ - -Total time: 0:00:00 -Failure reason: Pipeline failure. See details above. -Exception ignored in atexit callback: > -Traceback (most recent call last): - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py", line 2191, in _exit_handler - self.fail_pipeline(Exception("Pipeline failure. See details above.")) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py", line 2036, in fail_pipeline - raise exc -Exception: Pipeline failure. See details above. -## [4 of 11] sample: bedbase_demo_db4; pipeline: BEDBOSS -Calling pre-submit function: looper.write_sample_yaml -Writing script to /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedbase_demo_db4.sub -Job script (n=1; 0.00Gb): /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedbase_demo_db4.sub -Compute node: bnt4me-Precision-5560 -Start time: 2023-08-14 09:27:37 -Using default config. No config found in env var: PIPESTAT_CONFIG -Config: None. -No schema supplied. -Initialize FileBackend -Warning: You're running an interactive python session. This works, but pypiper cannot tee the output, so results are only logged to screen. -### Pipeline run code and environment: - -* Command: `/home/bnt4me/virginia/venv/jupyter/bin/bedboss all --sample-name bedbase_demo_db4 --input-file /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE105977_ENCFF937CGY_peaks_GRCh38.bed.gz --input-type bed --genome hg38 --sample-yaml /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/bedbase_demo_db4_sample.yaml --output_folder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs --narrowpeak True --rfg-config genome_config.yaml --bedbase-config /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml --outfolder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs` -* Compute host: bnt4me-Precision-5560 -* Working dir: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial -* Outfolder: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/ -* Pipeline started at: (08-14 09:27:37) elapsed: 0.0 _TIME_ - -### Version log: - -* Python version: 3.10.12 -* Pypiper dir: `/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper` -* Pypiper version: 0.13.2 -* Pipeline dir: `/home/bnt4me/virginia/venv/jupyter/bin` -* Pipeline version: 0.1.0a2 - -### Arguments passed to pipeline: - - -### Initialized Pipestat Object: - -* PipestatManager (bedboss-pipeline) -* Backend: File -* - results: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/stats.yaml -* - status: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs -* Multiple Pipelines Allowed: True -* Pipeline name: bedboss-pipeline -* Pipeline type: sample -* Status Schema key: None -* Results formatter: default_formatter -* Results schema source: None -* Status schema source: None -* Records count: 2 -* Sample name: DEFAULT_SAMPLE_NAME - - ----------------------------------------- - -Unused arguments: {'command': 'all', 'silent': False, 'verbosity': None, 'logdev': False} -Getting Open Signal Matrix file path... -output_bed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE105977_ENCFF937CGY_peaks_GRCh38.bed.gz -output_bigbed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bigbed_files -Got input type: bed -Converting /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE105977_ENCFF937CGY_peaks_GRCh38.bed.gz to BED format. -Target exists: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE105977_ENCFF937CGY_peaks_GRCh38.bed.gz` -Running bedqc... -Unused arguments: {} -Target to produce: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db4/gig106fd` - -> `zcat /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE105977_ENCFF937CGY_peaks_GRCh38.bed.gz > /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db4/gig106fd` (24404) -
-
-Command completed. Elapsed time: 0:00:00. Running peak memory: 0.003GB. - PID: 24404; Command: zcat; Return code: 0; Memory used: 0.003GB - - -> `bash /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedqc/est_line.sh /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db4/gig106fd ` -File (/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db4/gig106fd) has passed Quality Control! -Generating bigBed files for: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE105977_ENCFF937CGY_peaks_GRCh38.bed.gz -Determining path to chrom.sizes asset via Refgenie. -Reading refgenie genome configuration file from file: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/genome_config.yaml -/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes -Determined path to chrom.sizes asset: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes -Config: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml. -Initialize DBBackend -/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/parsed_schema.py:284: RuntimeWarning: fields may not start with an underscore, ignoring "_pipeline_name" - return create_model( -Traceback (most recent call last): - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py", line 689, in _engine -Using default schema: /home/bnt4me/virginia/venv/jupyter/bin/pipestat_output_schema.yaml - return self.db_engine_key -AttributeError: 'DBBackend' object has no attribute 'db_engine_key' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/bnt4me/virginia/venv/jupyter/bin/bedboss", line 8, in - sys.exit(main()) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py", line 180, in main - run_all(pm=pm, **args_dict) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py", line 138, in run_all - bedstat( - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedstat/bedstat.py", line 103, in bedstat - bbc = bbconf.BedBaseConf(config_path=bedbase_config, database_only=True) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bbconf/bbconf.py", line 72, in __init__ - BED_TABLE: pipestat.PipestatManager( - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/pipestat.py", line 161, in __init__ - self.backend = DBBackend( - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py", line 63, in __init__ - SQLModel.metadata.create_all(self._engine) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py", line 694, in _engine - self.db_engine_key = create_engine(self.db_url, echo=self.show_db_logs) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlmodel/engine/create.py", line 139, in create_engine - return _create_engine(url, **current_kwargs) # type: ignore - File "", line 2, in create_engine - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/util/deprecations.py", line 309, in warned - return fn(*args, **kwargs) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/create.py", line 518, in create_engine - u = _url.make_url(url) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py", line 725, in make_url - return _parse_rfc1738_args(name_or_url) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py", line 781, in _parse_rfc1738_args - components["port"] = int(components["port"]) -ValueError: invalid literal for int() with base 10: '%24POSTGRES_PORT' -Starting cleanup: 1 files; 0 conditional files for cleanup - -Cleaning up flagged intermediate files. . . - -### Pipeline failed at: (08-14 09:27:38) elapsed: 0.0 _TIME_ - -Total time: 0:00:00 -Failure reason: Pipeline failure. See details above. -Exception ignored in atexit callback: > -Traceback (most recent call last): - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py", line 2191, in _exit_handler - self.fail_pipeline(Exception("Pipeline failure. See details above.")) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py", line 2036, in fail_pipeline - raise exc -Exception: Pipeline failure. See details above. -## [5 of 11] sample: bedbase_demo_db5; pipeline: BEDBOSS -Calling pre-submit function: looper.write_sample_yaml -Writing script to /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedbase_demo_db5.sub -Job script (n=1; 0.00Gb): /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedbase_demo_db5.sub -Compute node: bnt4me-Precision-5560 -Start time: 2023-08-14 09:27:38 -Using default config. No config found in env var: PIPESTAT_CONFIG -Config: None. -No schema supplied. -Initialize FileBackend -Warning: You're running an interactive python session. This works, but pypiper cannot tee the output, so results are only logged to screen. -### Pipeline run code and environment: - -* Command: `/home/bnt4me/virginia/venv/jupyter/bin/bedboss all --sample-name bedbase_demo_db5 --input-file /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE91663_ENCFF316ASR_peaks_GRCh38.bed.gz --input-type bed --genome hg38 --sample-yaml /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/bedbase_demo_db5_sample.yaml --output_folder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs --narrowpeak True --rfg-config genome_config.yaml --bedbase-config /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml --outfolder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs` -* Compute host: bnt4me-Precision-5560 -* Working dir: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial -* Outfolder: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/ -* Pipeline started at: (08-14 09:27:38) elapsed: 0.0 _TIME_ - -### Version log: - -* Python version: 3.10.12 -* Pypiper dir: `/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper` -* Pypiper version: 0.13.2 -* Pipeline dir: `/home/bnt4me/virginia/venv/jupyter/bin` -* Pipeline version: 0.1.0a2 - -### Arguments passed to pipeline: - - -### Initialized Pipestat Object: - -* PipestatManager (bedboss-pipeline) -* Backend: File -* - results: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/stats.yaml -* - status: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs -* Multiple Pipelines Allowed: True -* Pipeline name: bedboss-pipeline -* Pipeline type: sample -* Status Schema key: None -* Results formatter: default_formatter -* Results schema source: None -* Status schema source: None -* Records count: 2 -* Sample name: DEFAULT_SAMPLE_NAME - - ----------------------------------------- - -Unused arguments: {'command': 'all', 'silent': False, 'verbosity': None, 'logdev': False} -Getting Open Signal Matrix file path... -output_bed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE91663_ENCFF316ASR_peaks_GRCh38.bed.gz -output_bigbed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bigbed_files -Got input type: bed -Converting /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE91663_ENCFF316ASR_peaks_GRCh38.bed.gz to BED format. -Target exists: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE91663_ENCFF316ASR_peaks_GRCh38.bed.gz` -Running bedqc... -Unused arguments: {} -Target to produce: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db5/ix1s2r3k` - -> `zcat /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE91663_ENCFF316ASR_peaks_GRCh38.bed.gz > /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db5/ix1s2r3k` (24435) -
-
-Command completed. Elapsed time: 0:00:00. Running peak memory: 0.003GB. - PID: 24435; Command: zcat; Return code: 0; Memory used: 0.003GB - - -> `bash /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedqc/est_line.sh /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db5/ix1s2r3k ` -File (/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db5/ix1s2r3k) has passed Quality Control! -Generating bigBed files for: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE91663_ENCFF316ASR_peaks_GRCh38.bed.gz -Determining path to chrom.sizes asset via Refgenie. -Reading refgenie genome configuration file from file: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/genome_config.yaml -/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes -Determined path to chrom.sizes asset: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes -Config: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml. -Initialize DBBackend -/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/parsed_schema.py:284: RuntimeWarning: fields may not start with an underscore, ignoring "_pipeline_name" - return create_model( -Traceback (most recent call last): - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py", line 689, in _engine -Using default schema: /home/bnt4me/virginia/venv/jupyter/bin/pipestat_output_schema.yaml - return self.db_engine_key -AttributeError: 'DBBackend' object has no attribute 'db_engine_key' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/bnt4me/virginia/venv/jupyter/bin/bedboss", line 8, in - sys.exit(main()) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py", line 180, in main - run_all(pm=pm, **args_dict) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py", line 138, in run_all - bedstat( - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedstat/bedstat.py", line 103, in bedstat - bbc = bbconf.BedBaseConf(config_path=bedbase_config, database_only=True) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bbconf/bbconf.py", line 72, in __init__ - BED_TABLE: pipestat.PipestatManager( - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/pipestat.py", line 161, in __init__ - self.backend = DBBackend( - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py", line 63, in __init__ - SQLModel.metadata.create_all(self._engine) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py", line 694, in _engine - self.db_engine_key = create_engine(self.db_url, echo=self.show_db_logs) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlmodel/engine/create.py", line 139, in create_engine - return _create_engine(url, **current_kwargs) # type: ignore - File "", line 2, in create_engine - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/util/deprecations.py", line 309, in warned - return fn(*args, **kwargs) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/create.py", line 518, in create_engine - u = _url.make_url(url) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py", line 725, in make_url - return _parse_rfc1738_args(name_or_url) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py", line 781, in _parse_rfc1738_args - components["port"] = int(components["port"]) -ValueError: invalid literal for int() with base 10: '%24POSTGRES_PORT' -Starting cleanup: 1 files; 0 conditional files for cleanup - -Cleaning up flagged intermediate files. . . - -### Pipeline failed at: (08-14 09:27:39) elapsed: 0.0 _TIME_ - -Total time: 0:00:00 -Failure reason: Pipeline failure. See details above. -Exception ignored in atexit callback: > -Traceback (most recent call last): - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py", line 2191, in _exit_handler - self.fail_pipeline(Exception("Pipeline failure. See details above.")) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py", line 2036, in fail_pipeline - raise exc -Exception: Pipeline failure. See details above. -## [6 of 11] sample: bedbase_demo_db6; pipeline: BEDBOSS -Calling pre-submit function: looper.write_sample_yaml -Writing script to /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedbase_demo_db6.sub -Job script (n=1; 0.00Gb): /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedbase_demo_db6.sub -Compute node: bnt4me-Precision-5560 -Start time: 2023-08-14 09:27:39 -Using default config. No config found in env var: PIPESTAT_CONFIG -Config: None. -No schema supplied. -Initialize FileBackend -Warning: You're running an interactive python session. This works, but pypiper cannot tee the output, so results are only logged to screen. -### Pipeline run code and environment: - -* Command: `/home/bnt4me/virginia/venv/jupyter/bin/bedboss all --sample-name bedbase_demo_db6 --input-file /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE91663_ENCFF319TPR_conservative_idr_thresholded_peaks_GRCh38.bed.gz --input-type bed --genome hg38 --sample-yaml /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/bedbase_demo_db6_sample.yaml --output_folder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs --narrowpeak True --rfg-config genome_config.yaml --bedbase-config /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml --outfolder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs` -* Compute host: bnt4me-Precision-5560 -* Working dir: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial -* Outfolder: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/ -* Pipeline started at: (08-14 09:27:40) elapsed: 0.0 _TIME_ - -### Version log: - -* Python version: 3.10.12 -* Pypiper dir: `/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper` -* Pypiper version: 0.13.2 -* Pipeline dir: `/home/bnt4me/virginia/venv/jupyter/bin` -* Pipeline version: 0.1.0a2 - -### Arguments passed to pipeline: - - -### Initialized Pipestat Object: - -* PipestatManager (bedboss-pipeline) -* Backend: File -* - results: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/stats.yaml -* - status: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs -* Multiple Pipelines Allowed: True -* Pipeline name: bedboss-pipeline -* Pipeline type: sample -* Status Schema key: None -* Results formatter: default_formatter -* Results schema source: None -* Status schema source: None -* Records count: 2 -* Sample name: DEFAULT_SAMPLE_NAME - - ----------------------------------------- - -Unused arguments: {'command': 'all', 'silent': False, 'verbosity': None, 'logdev': False} -Getting Open Signal Matrix file path... -output_bed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE91663_ENCFF319TPR_conservative_idr_thresholded_peaks_GRCh38.bed.gz -output_bigbed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bigbed_files -Got input type: bed -Converting /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE91663_ENCFF319TPR_conservative_idr_thresholded_peaks_GRCh38.bed.gz to BED format. -Target exists: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE91663_ENCFF319TPR_conservative_idr_thresholded_peaks_GRCh38.bed.gz` -Running bedqc... -Unused arguments: {} -Target to produce: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db6/jrhj1l5n` - -> `zcat /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE91663_ENCFF319TPR_conservative_idr_thresholded_peaks_GRCh38.bed.gz > /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db6/jrhj1l5n` (24466) -
-
-Command completed. Elapsed time: 0:00:00. Running peak memory: 0.003GB. - PID: 24466; Command: zcat; Return code: 0; Memory used: 0.003GB - - -> `bash /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedqc/est_line.sh /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db6/jrhj1l5n ` -File (/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db6/jrhj1l5n) has passed Quality Control! -Generating bigBed files for: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE91663_ENCFF319TPR_conservative_idr_thresholded_peaks_GRCh38.bed.gz -Determining path to chrom.sizes asset via Refgenie. -Reading refgenie genome configuration file from file: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/genome_config.yaml -/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes -Determined path to chrom.sizes asset: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes -Config: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml. -Initialize DBBackend -/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/parsed_schema.py:284: RuntimeWarning: fields may not start with an underscore, ignoring "_pipeline_name" - return create_model( -Traceback (most recent call last): - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py", line 689, in _engine -Using default schema: /home/bnt4me/virginia/venv/jupyter/bin/pipestat_output_schema.yaml - return self.db_engine_key -AttributeError: 'DBBackend' object has no attribute 'db_engine_key' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/bnt4me/virginia/venv/jupyter/bin/bedboss", line 8, in - sys.exit(main()) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py", line 180, in main - run_all(pm=pm, **args_dict) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py", line 138, in run_all - bedstat( - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedstat/bedstat.py", line 103, in bedstat - bbc = bbconf.BedBaseConf(config_path=bedbase_config, database_only=True) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bbconf/bbconf.py", line 72, in __init__ - BED_TABLE: pipestat.PipestatManager( - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/pipestat.py", line 161, in __init__ - self.backend = DBBackend( - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py", line 63, in __init__ - SQLModel.metadata.create_all(self._engine) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py", line 694, in _engine - self.db_engine_key = create_engine(self.db_url, echo=self.show_db_logs) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlmodel/engine/create.py", line 139, in create_engine - return _create_engine(url, **current_kwargs) # type: ignore - File "", line 2, in create_engine - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/util/deprecations.py", line 309, in warned - return fn(*args, **kwargs) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/create.py", line 518, in create_engine - u = _url.make_url(url) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py", line 725, in make_url - return _parse_rfc1738_args(name_or_url) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py", line 781, in _parse_rfc1738_args - components["port"] = int(components["port"]) -ValueError: invalid literal for int() with base 10: '%24POSTGRES_PORT' -Starting cleanup: 1 files; 0 conditional files for cleanup - -Cleaning up flagged intermediate files. . . - -### Pipeline failed at: (08-14 09:27:40) elapsed: 0.0 _TIME_ - -Total time: 0:00:00 -Failure reason: Pipeline failure. See details above. -Exception ignored in atexit callback: > -Traceback (most recent call last): - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py", line 2191, in _exit_handler - self.fail_pipeline(Exception("Pipeline failure. See details above.")) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py", line 2036, in fail_pipeline - raise exc -Exception: Pipeline failure. See details above. -## [7 of 11] sample: bedbase_demo_db7; pipeline: BEDBOSS -Calling pre-submit function: looper.write_sample_yaml -Writing script to /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedbase_demo_db7.sub -Job script (n=1; 0.00Gb): /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedbase_demo_db7.sub -Compute node: bnt4me-Precision-5560 -Start time: 2023-08-14 09:27:40 -Using default config. No config found in env var: PIPESTAT_CONFIG -Config: None. -No schema supplied. -Initialize FileBackend -Warning: You're running an interactive python session. This works, but pypiper cannot tee the output, so results are only logged to screen. -### Pipeline run code and environment: - -* Command: `/home/bnt4me/virginia/venv/jupyter/bin/bedboss all --sample-name bedbase_demo_db7 --input-file /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE91663_ENCFF553KIK_optimal_idr_thresholded_peaks_GRCh38.bed.gz --input-type bed --genome hg38 --sample-yaml /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/bedbase_demo_db7_sample.yaml --output_folder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs --narrowpeak True --rfg-config genome_config.yaml --bedbase-config /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml --outfolder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs` -* Compute host: bnt4me-Precision-5560 -* Working dir: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial -* Outfolder: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/ -* Pipeline started at: (08-14 09:27:40) elapsed: 0.0 _TIME_ - -### Version log: - -* Python version: 3.10.12 -* Pypiper dir: `/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper` -* Pypiper version: 0.13.2 -* Pipeline dir: `/home/bnt4me/virginia/venv/jupyter/bin` -* Pipeline version: 0.1.0a2 - -### Arguments passed to pipeline: - - -### Initialized Pipestat Object: - -* PipestatManager (bedboss-pipeline) -* Backend: File -* - results: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/stats.yaml -* - status: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs -* Multiple Pipelines Allowed: True -* Pipeline name: bedboss-pipeline -* Pipeline type: sample -* Status Schema key: None -* Results formatter: default_formatter -* Results schema source: None -* Status schema source: None -* Records count: 2 -* Sample name: DEFAULT_SAMPLE_NAME - - ----------------------------------------- - -Unused arguments: {'command': 'all', 'silent': False, 'verbosity': None, 'logdev': False} -Getting Open Signal Matrix file path... -output_bed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE91663_ENCFF553KIK_optimal_idr_thresholded_peaks_GRCh38.bed.gz -output_bigbed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bigbed_files -Got input type: bed -Converting /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE91663_ENCFF553KIK_optimal_idr_thresholded_peaks_GRCh38.bed.gz to BED format. -Target exists: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE91663_ENCFF553KIK_optimal_idr_thresholded_peaks_GRCh38.bed.gz` -Running bedqc... -Unused arguments: {} -Target to produce: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db7/9r0q9410` - -> `zcat /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE91663_ENCFF553KIK_optimal_idr_thresholded_peaks_GRCh38.bed.gz > /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db7/9r0q9410` (24496) -
-
-Command completed. Elapsed time: 0:00:00. Running peak memory: 0.003GB. - PID: 24496; Command: zcat; Return code: 0; Memory used: 0.003GB - - -> `bash /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedqc/est_line.sh /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db7/9r0q9410 ` -File (/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db7/9r0q9410) has passed Quality Control! -Generating bigBed files for: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE91663_ENCFF553KIK_optimal_idr_thresholded_peaks_GRCh38.bed.gz -Determining path to chrom.sizes asset via Refgenie. -Reading refgenie genome configuration file from file: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/genome_config.yaml -/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes -Determined path to chrom.sizes asset: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes -Config: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml. -Initialize DBBackend -/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/parsed_schema.py:284: RuntimeWarning: fields may not start with an underscore, ignoring "_pipeline_name" - return create_model( -Traceback (most recent call last): - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py", line 689, in _engine -Using default schema: /home/bnt4me/virginia/venv/jupyter/bin/pipestat_output_schema.yaml - return self.db_engine_key -AttributeError: 'DBBackend' object has no attribute 'db_engine_key' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/bnt4me/virginia/venv/jupyter/bin/bedboss", line 8, in - sys.exit(main()) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py", line 180, in main - run_all(pm=pm, **args_dict) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py", line 138, in run_all - bedstat( - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedstat/bedstat.py", line 103, in bedstat - bbc = bbconf.BedBaseConf(config_path=bedbase_config, database_only=True) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bbconf/bbconf.py", line 72, in __init__ - BED_TABLE: pipestat.PipestatManager( - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/pipestat.py", line 161, in __init__ - self.backend = DBBackend( - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py", line 63, in __init__ - SQLModel.metadata.create_all(self._engine) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py", line 694, in _engine - self.db_engine_key = create_engine(self.db_url, echo=self.show_db_logs) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlmodel/engine/create.py", line 139, in create_engine - return _create_engine(url, **current_kwargs) # type: ignore - File "", line 2, in create_engine - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/util/deprecations.py", line 309, in warned - return fn(*args, **kwargs) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/create.py", line 518, in create_engine - u = _url.make_url(url) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py", line 725, in make_url - return _parse_rfc1738_args(name_or_url) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py", line 781, in _parse_rfc1738_args - components["port"] = int(components["port"]) -ValueError: invalid literal for int() with base 10: '%24POSTGRES_PORT' -Starting cleanup: 1 files; 0 conditional files for cleanup - -Cleaning up flagged intermediate files. . . - -### Pipeline failed at: (08-14 09:27:40) elapsed: 0.0 _TIME_ - -Total time: 0:00:00 -Failure reason: Pipeline failure. See details above. -Exception ignored in atexit callback: > -Traceback (most recent call last): - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py", line 2191, in _exit_handler - self.fail_pipeline(Exception("Pipeline failure. See details above.")) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py", line 2036, in fail_pipeline - raise exc -Exception: Pipeline failure. See details above. -## [8 of 11] sample: bedbase_demo_db8; pipeline: BEDBOSS -Calling pre-submit function: looper.write_sample_yaml -Writing script to /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedbase_demo_db8.sub -Job script (n=1; 0.00Gb): /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedbase_demo_db8.sub -Compute node: bnt4me-Precision-5560 -Start time: 2023-08-14 09:27:40 -Using default config. No config found in env var: PIPESTAT_CONFIG -Config: None. -No schema supplied. -Initialize FileBackend -Warning: You're running an interactive python session. This works, but pypiper cannot tee the output, so results are only logged to screen. -### Pipeline run code and environment: - -* Command: `/home/bnt4me/virginia/venv/jupyter/bin/bedboss all --sample-name bedbase_demo_db8 --input-file /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSM2423312_ENCFF155HVK_peaks_GRCh38.bed.gz --input-type bed --genome hg38 --sample-yaml /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/bedbase_demo_db8_sample.yaml --output_folder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs --narrowpeak True --rfg-config genome_config.yaml --bedbase-config /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml --outfolder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs` -* Compute host: bnt4me-Precision-5560 -* Working dir: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial -* Outfolder: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/ -* Pipeline started at: (08-14 09:27:41) elapsed: 0.0 _TIME_ - -### Version log: - -* Python version: 3.10.12 -* Pypiper dir: `/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper` -* Pypiper version: 0.13.2 -* Pipeline dir: `/home/bnt4me/virginia/venv/jupyter/bin` -* Pipeline version: 0.1.0a2 - -### Arguments passed to pipeline: - - -### Initialized Pipestat Object: - -* PipestatManager (bedboss-pipeline) -* Backend: File -* - results: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/stats.yaml -* - status: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs -* Multiple Pipelines Allowed: True -* Pipeline name: bedboss-pipeline -* Pipeline type: sample -* Status Schema key: None -* Results formatter: default_formatter -* Results schema source: None -* Status schema source: None -* Records count: 2 -* Sample name: DEFAULT_SAMPLE_NAME - - ----------------------------------------- - -Unused arguments: {'command': 'all', 'silent': False, 'verbosity': None, 'logdev': False} -Getting Open Signal Matrix file path... -output_bed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSM2423312_ENCFF155HVK_peaks_GRCh38.bed.gz -output_bigbed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bigbed_files -Got input type: bed -Converting /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSM2423312_ENCFF155HVK_peaks_GRCh38.bed.gz to BED format. -Target exists: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSM2423312_ENCFF155HVK_peaks_GRCh38.bed.gz` -Running bedqc... -Unused arguments: {} -Target to produce: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db8/ny2pxb01` - -> `zcat /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSM2423312_ENCFF155HVK_peaks_GRCh38.bed.gz > /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db8/ny2pxb01` (24527) -
-
-Command completed. Elapsed time: 0:00:00. Running peak memory: 0.003GB. - PID: 24527; Command: zcat; Return code: 0; Memory used: 0.003GB - - -> `bash /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedqc/est_line.sh /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db8/ny2pxb01 ` -File (/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db8/ny2pxb01) has passed Quality Control! -Generating bigBed files for: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSM2423312_ENCFF155HVK_peaks_GRCh38.bed.gz -Determining path to chrom.sizes asset via Refgenie. -Reading refgenie genome configuration file from file: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/genome_config.yaml -/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes -Determined path to chrom.sizes asset: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes -Config: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml. -Initialize DBBackend -/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/parsed_schema.py:284: RuntimeWarning: fields may not start with an underscore, ignoring "_pipeline_name" - return create_model( -Using default schema: /home/bnt4me/virginia/venv/jupyter/bin/pipestat_output_schema.yaml -Traceback (most recent call last): - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py", line 689, in _engine - return self.db_engine_key -AttributeError: 'DBBackend' object has no attribute 'db_engine_key' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/bnt4me/virginia/venv/jupyter/bin/bedboss", line 8, in - sys.exit(main()) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py", line 180, in main - run_all(pm=pm, **args_dict) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py", line 138, in run_all - bedstat( - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedstat/bedstat.py", line 103, in bedstat - bbc = bbconf.BedBaseConf(config_path=bedbase_config, database_only=True) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bbconf/bbconf.py", line 72, in __init__ - BED_TABLE: pipestat.PipestatManager( - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/pipestat.py", line 161, in __init__ - self.backend = DBBackend( - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py", line 63, in __init__ - SQLModel.metadata.create_all(self._engine) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py", line 694, in _engine - self.db_engine_key = create_engine(self.db_url, echo=self.show_db_logs) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlmodel/engine/create.py", line 139, in create_engine - return _create_engine(url, **current_kwargs) # type: ignore - File "", line 2, in create_engine - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/util/deprecations.py", line 309, in warned - return fn(*args, **kwargs) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/create.py", line 518, in create_engine - u = _url.make_url(url) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py", line 725, in make_url - return _parse_rfc1738_args(name_or_url) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py", line 781, in _parse_rfc1738_args - components["port"] = int(components["port"]) -ValueError: invalid literal for int() with base 10: '%24POSTGRES_PORT' -Starting cleanup: 1 files; 0 conditional files for cleanup - -Cleaning up flagged intermediate files. . . - -### Pipeline failed at: (08-14 09:27:41) elapsed: 0.0 _TIME_ - -Total time: 0:00:00 -Failure reason: Pipeline failure. See details above. -Exception ignored in atexit callback: > -Traceback (most recent call last): - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py", line 2191, in _exit_handler - self.fail_pipeline(Exception("Pipeline failure. See details above.")) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py", line 2036, in fail_pipeline - raise exc -Exception: Pipeline failure. See details above. -## [9 of 11] sample: bedhost_demo_db9; pipeline: BEDBOSS -Calling pre-submit function: looper.write_sample_yaml -Writing script to /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedhost_demo_db9.sub -Job script (n=1; 0.00Gb): /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedhost_demo_db9.sub -Compute node: bnt4me-Precision-5560 -Start time: 2023-08-14 09:27:41 -Using default config. No config found in env var: PIPESTAT_CONFIG -Config: None. -No schema supplied. -Initialize FileBackend -Warning: You're running an interactive python session. This works, but pypiper cannot tee the output, so results are only logged to screen. -### Pipeline run code and environment: - -* Command: `/home/bnt4me/virginia/venv/jupyter/bin/bedboss all --sample-name bedhost_demo_db9 --input-file /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSM2423313_ENCFF722AOG_peaks_GRCh38.bed.gz --input-type bed --genome hg38 --sample-yaml /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/bedhost_demo_db9_sample.yaml --output_folder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs --narrowpeak True --rfg-config genome_config.yaml --bedbase-config /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml --outfolder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs` -* Compute host: bnt4me-Precision-5560 -* Working dir: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial -* Outfolder: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/ -* Pipeline started at: (08-14 09:27:42) elapsed: 0.0 _TIME_ - -### Version log: - -* Python version: 3.10.12 -* Pypiper dir: `/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper` -* Pypiper version: 0.13.2 -* Pipeline dir: `/home/bnt4me/virginia/venv/jupyter/bin` -* Pipeline version: 0.1.0a2 - -### Arguments passed to pipeline: - - -### Initialized Pipestat Object: - -* PipestatManager (bedboss-pipeline) -* Backend: File -* - results: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/stats.yaml -* - status: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs -* Multiple Pipelines Allowed: True -* Pipeline name: bedboss-pipeline -* Pipeline type: sample -* Status Schema key: None -* Results formatter: default_formatter -* Results schema source: None -* Status schema source: None -* Records count: 2 -* Sample name: DEFAULT_SAMPLE_NAME - - ----------------------------------------- - -Unused arguments: {'command': 'all', 'silent': False, 'verbosity': None, 'logdev': False} -Getting Open Signal Matrix file path... -output_bed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSM2423313_ENCFF722AOG_peaks_GRCh38.bed.gz -output_bigbed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bigbed_files -Got input type: bed -Converting /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSM2423313_ENCFF722AOG_peaks_GRCh38.bed.gz to BED format. -Target exists: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSM2423313_ENCFF722AOG_peaks_GRCh38.bed.gz` -Running bedqc... -Unused arguments: {} -Target to produce: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedhost_demo_db9/h6i4w9_0` - -> `zcat /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSM2423313_ENCFF722AOG_peaks_GRCh38.bed.gz > /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedhost_demo_db9/h6i4w9_0` (24559) -
-
-Command completed. Elapsed time: 0:00:00. Running peak memory: 0.003GB. - PID: 24559; Command: zcat; Return code: 0; Memory used: 0.003GB - - -> `bash /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedqc/est_line.sh /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedhost_demo_db9/h6i4w9_0 ` -File (/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedhost_demo_db9/h6i4w9_0) has passed Quality Control! -Generating bigBed files for: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSM2423313_ENCFF722AOG_peaks_GRCh38.bed.gz -Determining path to chrom.sizes asset via Refgenie. -Reading refgenie genome configuration file from file: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/genome_config.yaml -/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes -Determined path to chrom.sizes asset: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes -Config: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml. -Initialize DBBackend -/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/parsed_schema.py:284: RuntimeWarning: fields may not start with an underscore, ignoring "_pipeline_name" - return create_model( -Traceback (most recent call last): - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py", line 689, in _engine -Using default schema: /home/bnt4me/virginia/venv/jupyter/bin/pipestat_output_schema.yaml - return self.db_engine_key -AttributeError: 'DBBackend' object has no attribute 'db_engine_key' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/bnt4me/virginia/venv/jupyter/bin/bedboss", line 8, in - sys.exit(main()) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py", line 180, in main - run_all(pm=pm, **args_dict) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py", line 138, in run_all - bedstat( - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedstat/bedstat.py", line 103, in bedstat - bbc = bbconf.BedBaseConf(config_path=bedbase_config, database_only=True) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bbconf/bbconf.py", line 72, in __init__ - BED_TABLE: pipestat.PipestatManager( - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/pipestat.py", line 161, in __init__ - self.backend = DBBackend( - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py", line 63, in __init__ - SQLModel.metadata.create_all(self._engine) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py", line 694, in _engine - self.db_engine_key = create_engine(self.db_url, echo=self.show_db_logs) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlmodel/engine/create.py", line 139, in create_engine - return _create_engine(url, **current_kwargs) # type: ignore - File "", line 2, in create_engine - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/util/deprecations.py", line 309, in warned - return fn(*args, **kwargs) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/create.py", line 518, in create_engine - u = _url.make_url(url) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py", line 725, in make_url - return _parse_rfc1738_args(name_or_url) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py", line 781, in _parse_rfc1738_args - components["port"] = int(components["port"]) -ValueError: invalid literal for int() with base 10: '%24POSTGRES_PORT' -Starting cleanup: 1 files; 0 conditional files for cleanup - -Cleaning up flagged intermediate files. . . - -### Pipeline failed at: (08-14 09:27:42) elapsed: 0.0 _TIME_ - -Total time: 0:00:00 -Failure reason: Pipeline failure. See details above. -Exception ignored in atexit callback: > -Traceback (most recent call last): - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py", line 2191, in _exit_handler - self.fail_pipeline(Exception("Pipeline failure. See details above.")) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py", line 2036, in fail_pipeline - raise exc -Exception: Pipeline failure. See details above. -## [10 of 11] sample: bedbase_demo_db10; pipeline: BEDBOSS -Calling pre-submit function: looper.write_sample_yaml -Writing script to /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedbase_demo_db10.sub -Job script (n=1; 0.00Gb): /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedbase_demo_db10.sub -Compute node: bnt4me-Precision-5560 -Start time: 2023-08-14 09:27:42 -Using default config. No config found in env var: PIPESTAT_CONFIG -Config: None. -No schema supplied. -Initialize FileBackend -Warning: You're running an interactive python session. This works, but pypiper cannot tee the output, so results are only logged to screen. -### Pipeline run code and environment: - -* Command: `/home/bnt4me/virginia/venv/jupyter/bin/bedboss all --sample-name bedbase_demo_db10 --input-file /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSM2827349_ENCFF196DNQ_peaks_GRCh38.bed.gz --input-type bed --genome hg38 --sample-yaml /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/bedbase_demo_db10_sample.yaml --output_folder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs --narrowpeak True --rfg-config genome_config.yaml --bedbase-config /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml --outfolder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs` -* Compute host: bnt4me-Precision-5560 -* Working dir: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial -* Outfolder: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/ -* Pipeline started at: (08-14 09:27:43) elapsed: 0.0 _TIME_ - -### Version log: - -* Python version: 3.10.12 -* Pypiper dir: `/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper` -* Pypiper version: 0.13.2 -* Pipeline dir: `/home/bnt4me/virginia/venv/jupyter/bin` -* Pipeline version: 0.1.0a2 - -### Arguments passed to pipeline: - - -### Initialized Pipestat Object: - -* PipestatManager (bedboss-pipeline) -* Backend: File -* - results: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/stats.yaml -* - status: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs -* Multiple Pipelines Allowed: True -* Pipeline name: bedboss-pipeline -* Pipeline type: sample -* Status Schema key: None -* Results formatter: default_formatter -* Results schema source: None -* Status schema source: None -* Records count: 2 -* Sample name: DEFAULT_SAMPLE_NAME - - ----------------------------------------- - -Unused arguments: {'command': 'all', 'silent': False, 'verbosity': None, 'logdev': False} -Getting Open Signal Matrix file path... -output_bed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSM2827349_ENCFF196DNQ_peaks_GRCh38.bed.gz -output_bigbed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bigbed_files -Got input type: bed -Converting /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSM2827349_ENCFF196DNQ_peaks_GRCh38.bed.gz to BED format. -Target exists: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSM2827349_ENCFF196DNQ_peaks_GRCh38.bed.gz` -Running bedqc... -Unused arguments: {} -Target to produce: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db10/l3b3cyqx` - -> `zcat /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSM2827349_ENCFF196DNQ_peaks_GRCh38.bed.gz > /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db10/l3b3cyqx` (24590) -
-
-Command completed. Elapsed time: 0:00:00. Running peak memory: 0.003GB. - PID: 24590; Command: zcat; Return code: 0; Memory used: 0.003GB - - -> `bash /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedqc/est_line.sh /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db10/l3b3cyqx ` -File (/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db10/l3b3cyqx) has passed Quality Control! -Generating bigBed files for: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSM2827349_ENCFF196DNQ_peaks_GRCh38.bed.gz -Determining path to chrom.sizes asset via Refgenie. -Reading refgenie genome configuration file from file: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/genome_config.yaml -/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes -Determined path to chrom.sizes asset: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes -Config: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml. -Initialize DBBackend -/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/parsed_schema.py:284: RuntimeWarning: fields may not start with an underscore, ignoring "_pipeline_name" - return create_model( -Traceback (most recent call last): - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py", line 689, in _engine -Using default schema: /home/bnt4me/virginia/venv/jupyter/bin/pipestat_output_schema.yaml - return self.db_engine_key -AttributeError: 'DBBackend' object has no attribute 'db_engine_key' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/bnt4me/virginia/venv/jupyter/bin/bedboss", line 8, in - sys.exit(main()) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py", line 180, in main - run_all(pm=pm, **args_dict) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py", line 138, in run_all - bedstat( - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedstat/bedstat.py", line 103, in bedstat - bbc = bbconf.BedBaseConf(config_path=bedbase_config, database_only=True) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bbconf/bbconf.py", line 72, in __init__ - BED_TABLE: pipestat.PipestatManager( - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/pipestat.py", line 161, in __init__ - self.backend = DBBackend( - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py", line 63, in __init__ - SQLModel.metadata.create_all(self._engine) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py", line 694, in _engine - self.db_engine_key = create_engine(self.db_url, echo=self.show_db_logs) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlmodel/engine/create.py", line 139, in create_engine - return _create_engine(url, **current_kwargs) # type: ignore - File "", line 2, in create_engine - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/util/deprecations.py", line 309, in warned - return fn(*args, **kwargs) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/create.py", line 518, in create_engine - u = _url.make_url(url) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py", line 725, in make_url - return _parse_rfc1738_args(name_or_url) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py", line 781, in _parse_rfc1738_args - components["port"] = int(components["port"]) -ValueError: invalid literal for int() with base 10: '%24POSTGRES_PORT' -Starting cleanup: 1 files; 0 conditional files for cleanup - -Cleaning up flagged intermediate files. . . - -### Pipeline failed at: (08-14 09:27:43) elapsed: 0.0 _TIME_ - -Total time: 0:00:00 -Failure reason: Pipeline failure. See details above. -Exception ignored in atexit callback: > -Traceback (most recent call last): - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py", line 2191, in _exit_handler - self.fail_pipeline(Exception("Pipeline failure. See details above.")) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py", line 2036, in fail_pipeline - raise exc -Exception: Pipeline failure. See details above. -## [11 of 11] sample: bedbase_demo_db11; pipeline: BEDBOSS -Calling pre-submit function: looper.write_sample_yaml -Writing script to /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedbase_demo_db11.sub -Job script (n=1; 0.00Gb): /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedbase_demo_db11.sub -Compute node: bnt4me-Precision-5560 -Start time: 2023-08-14 09:27:43 -Using default config. No config found in env var: PIPESTAT_CONFIG -Config: None. -No schema supplied. -Initialize FileBackend -Warning: You're running an interactive python session. This works, but pypiper cannot tee the output, so results are only logged to screen. -### Pipeline run code and environment: - -* Command: `/home/bnt4me/virginia/venv/jupyter/bin/bedboss all --sample-name bedbase_demo_db11 --input-file /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSM2827350_ENCFF928JXU_peaks_GRCh38.bed.gz --input-type bed --genome hg38 --sample-yaml /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/bedbase_demo_db11_sample.yaml --output_folder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs --narrowpeak True --rfg-config genome_config.yaml --bedbase-config /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml --outfolder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs` -* Compute host: bnt4me-Precision-5560 -* Working dir: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial -* Outfolder: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/ -* Pipeline started at: (08-14 09:27:44) elapsed: 0.0 _TIME_ - -### Version log: - -* Python version: 3.10.12 -* Pypiper dir: `/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper` -* Pypiper version: 0.13.2 -* Pipeline dir: `/home/bnt4me/virginia/venv/jupyter/bin` -* Pipeline version: 0.1.0a2 - -### Arguments passed to pipeline: - - -### Initialized Pipestat Object: - -* PipestatManager (bedboss-pipeline) -* Backend: File -* - results: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/stats.yaml -* - status: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs -* Multiple Pipelines Allowed: True -* Pipeline name: bedboss-pipeline -* Pipeline type: sample -* Status Schema key: None -* Results formatter: default_formatter -* Results schema source: None -* Status schema source: None -* Records count: 2 -* Sample name: DEFAULT_SAMPLE_NAME - - ----------------------------------------- - -Unused arguments: {'command': 'all', 'silent': False, 'verbosity': None, 'logdev': False} -Getting Open Signal Matrix file path... -output_bed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSM2827350_ENCFF928JXU_peaks_GRCh38.bed.gz -output_bigbed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bigbed_files -Got input type: bed -Converting /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSM2827350_ENCFF928JXU_peaks_GRCh38.bed.gz to BED format. -Target exists: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSM2827350_ENCFF928JXU_peaks_GRCh38.bed.gz` -Running bedqc... -Unused arguments: {} -Target to produce: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db11/2pfkxwx0` - -> `zcat /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSM2827350_ENCFF928JXU_peaks_GRCh38.bed.gz > /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db11/2pfkxwx0` (24621) -
-
-Command completed. Elapsed time: 0:00:00. Running peak memory: 0.003GB. - PID: 24621; Command: zcat; Return code: 0; Memory used: 0.003GB - - -> `bash /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedqc/est_line.sh /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db11/2pfkxwx0 ` -File (/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db11/2pfkxwx0) has passed Quality Control! -Generating bigBed files for: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSM2827350_ENCFF928JXU_peaks_GRCh38.bed.gz -Determining path to chrom.sizes asset via Refgenie. -Reading refgenie genome configuration file from file: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/genome_config.yaml -/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes -Determined path to chrom.sizes asset: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes -Config: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml. -Initialize DBBackend -/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/parsed_schema.py:284: RuntimeWarning: fields may not start with an underscore, ignoring "_pipeline_name" - return create_model( -Traceback (most recent call last): - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py", line 689, in _engine -Using default schema: /home/bnt4me/virginia/venv/jupyter/bin/pipestat_output_schema.yaml - return self.db_engine_key -AttributeError: 'DBBackend' object has no attribute 'db_engine_key' - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/bnt4me/virginia/venv/jupyter/bin/bedboss", line 8, in - sys.exit(main()) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py", line 180, in main - run_all(pm=pm, **args_dict) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py", line 138, in run_all - bedstat( - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedstat/bedstat.py", line 103, in bedstat - bbc = bbconf.BedBaseConf(config_path=bedbase_config, database_only=True) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bbconf/bbconf.py", line 72, in __init__ - BED_TABLE: pipestat.PipestatManager( - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/pipestat.py", line 161, in __init__ - self.backend = DBBackend( - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py", line 63, in __init__ - SQLModel.metadata.create_all(self._engine) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py", line 694, in _engine - self.db_engine_key = create_engine(self.db_url, echo=self.show_db_logs) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlmodel/engine/create.py", line 139, in create_engine - return _create_engine(url, **current_kwargs) # type: ignore - File "", line 2, in create_engine - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/util/deprecations.py", line 309, in warned - return fn(*args, **kwargs) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/create.py", line 518, in create_engine - u = _url.make_url(url) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py", line 725, in make_url - return _parse_rfc1738_args(name_or_url) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py", line 781, in _parse_rfc1738_args - components["port"] = int(components["port"]) -ValueError: invalid literal for int() with base 10: '%24POSTGRES_PORT' -Starting cleanup: 1 files; 0 conditional files for cleanup - -Cleaning up flagged intermediate files. . . - -### Pipeline failed at: (08-14 09:27:44) elapsed: 0.0 _TIME_ - -Total time: 0:00:00 -Failure reason: Pipeline failure. See details above. -Exception ignored in atexit callback: > -Traceback (most recent call last): - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py", line 2191, in _exit_handler - self.fail_pipeline(Exception("Pipeline failure. See details above.")) - File "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py", line 2036, in fail_pipeline - raise exc -Exception: Pipeline failure. See details above. - -Looper finished -Samples valid for job generation: 11 of 11 -Commands submitted: 11 of 11 -Jobs submitted: 11 - - -``` - -### ❗❗ If You have errors in bedstat requirements: - - -```bash -pip install -r bedstat/requirements.txt --user > requirements_log.txt -``` - - - -Install R dependencies - - -```bash -Rscript bedstat/scripts/installRdeps.R > R_deps.txt -``` - -In case there is an issue installing `GenomicDistributionsData`, try: -``` -wget http://big.databio.org/GenomicDistributionsData/GenomicDistributionsData_0.0.2.tar.gz -Rscript -e 'install.packages("GenomicDistributionsData_0.0.2.tar.gz", type="source", repos=NULL)' -``` - -There's an additional dependency needed by `bedstat` if we wish to calculate and plot the GC content of our bedfiles. Depending on the genome assemblies of the files listed on a PEP, the appropriate BSgenome packages should be installed. The following is an example of how we can do so: - - -```bash -cat bedbase/tutorial_files/scripts/BSgenome_install.R -``` - -```.output -if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager") - -BiocManager::install("BSgenome.Hsapiens.UCSC.hg38.masked") -``` - - -```bash -Rscript bedbase/tutorial_files/scripts/BSgenome_install.R > BSgenome.txt -``` - -We'll need to create a directory where we can store the stats and plots generated by `bedstat`. Additionally, we'll create a directory where we can store log and metadata files that we'll need later on. - -## 4. BEDBUNCHER: Create bedsets and their respective statistics - -### Create a new PEP describing the bedset name and specific JSON query - -Now that we've processed several individual BED files, we'll turn to the next task: grouping them together into collections of BED files, which we call *bedsets*. For this, we use the `bedbuncher` pipeline, which produces outputs for each bedset, such as a bedset PEP, bedset-level statistics and plots, and an `IGD` database. To run `bedbuncher`, we will need another PEP describing each bedset. Though the annotation sheet below specifies attributes for one bedset, you can create as many as you wish using additional rows. For each bedset, you need to provide the query to retrieve certain collection BED files. - -The following example PEP shows the attributes we need to provide for each bedset and the config.yaml file that will grab the files needed to run `bedbuncher`: - - -```bash -cat bedbase/tutorial_files/PEPs/bedbuncher_query.csv -``` - -```.output -sample_name,bedset_name,genome,query,operator,query_val,bbconfig_name,bedbase_config -sample1,bedsetOver1kRegions,hg38,'regions_no',gt,"""1000""",bedbase_configuration_compose,source1 -sample2,bedsetOver50GCContent,hg38,'gc_content',gt,"""0.5""",bedbase_configuration_compose,source1 -sample3,bedsetUnder500MeanWidth,hg38,'mean_region_width',lt,"""500""",bedbase_configuration_compose,source1 -sample4,bedsetTestSelectCellType,hg38,"""other::text~~:str_1 or other::text~~:str_2""","""str_1,str_2""","""%GM12878%,%HEK293%""",bedbase_configuration_compose,source1 -sample5,bedsetTestSelectGenome,hg38,"""name=:name_1 or name=:name_2""","""name_1,name_2""","""GSE105587_ENCFF018NNF_conservative_idr_thresholded_peaks_GRCh38,GSE91663_ENCFF553KIK_optimal_idr_thresholded_peaks_GRCh38""",bedbase_configuration_compose,source1 -sample6,bedsetTestCellType,hg38,"""other""",contains,"""""{\""cell_type\"":\ \""K562\""}""""",bedbase_configuration_compose,source1 -sample7,bedsetTestSpace,hg38,"""other""",contains,"""""{\""description\"":\ \""IKZF1\ ChIP-seq\ on\ human\ GM12878\""}""""",bedbase_configuration_compose,source1 -sample8,bedsetTestsSpaceMult,hg38,"""other::text~~:str_1 or other::text~~:str_2""","""str_1,str_2""","""%IKZF1 ChIP-seq on human GM12878%,%ZEB2 ChIP-seq on human K562 (ENCODE)%""",bedbase_configuration_compose,source1 -sample9,bedsetTestSpace2,hg38,"""other""",contains,"""""{\""description\"":\ \""HEK293\ cell\ line\ stably\ expressing\ N-terminal\ tagged\ eGFP-GLI2\ under\ the\ control\ of\ a\ CMV\ promoter\""}""""",bedbase_configuration_compose,source1 -sample10,bedsetTestsSpaceMult2,hg38,"""other::text~~:str_1 or other::text~~:str_2""","""str_1,str_2""","""%ZEB2 ChIP-seq on human K562 (ENCODE)%,%HEK293 cell line stably expressing N-terminal tagged eGFP-GLI2 under the control of a CMV promoter %""",bedbase_configuration_compose,source1 - -``` - - -```bash -cat bedbase/tutorial_files/PEPs/bedbuncher_config.yaml -``` - -```.output -pep_version: 2.0.0 -sample_table: bedbuncher_query.csv - -looper: - output_dir: $BEDBASE_DATA_PATH_HOST/outputs/bedbuncher_output/bedbuncher_pipeline_logs - -sample_modifiers: - append: - pipeline_interfaces: $CODE/bedbuncher/pipeline_interface.yaml - derive: - attributes: [bedbase_config] - sources: - source1: $CODE/bedbase/tutorial_files/{bbconfig_name}.yaml - -``` - -Running `bedbuncher` with arguments defined in the example PEP above will result in a bedset with bedfiles that consist of at least 1000 regions. - -### Create outputs directory and install bedbuncher command line dependencies - -We need a folder where we can store bedset related outputs. Though not required, we'll also create a directory where we can store the `bedbuncher` pipeline logs. - - -```bash -mkdir -p outputs/bedbuncher_output/bedbuncher_pipeline_logs -``` - -One of the feats of `bedbuncher` includes [IGD](https://github.com/databio/IGD) database creation from the files in the bedset. `IGD` can be installed by cloning the repository from github, executing the make file to create the binary, and pointing the binary location with the `$PATH` environment variable. - - -```bash -git clone git@github.com:databio/IGD -cd IGD -make > igd_make_log.txt 2>&1 -cd .. - -export PATH=$BEDBASE_DATA_PATH_HOST/IGD/bin/:$PATH -``` - -```.output -Cloning into 'IGD'... -remote: Enumerating objects: 1297, done. -remote: Counting objects: 100% (67/67), done. -remote: Compressing objects: 100% (50/50), done. -remote: Total 1297 (delta 35), reused 40 (delta 17), pack-reused 1230 -Receiving objects: 100% (1297/1297), 949.45 KiB | 10.79 MiB/s, done. -Resolving deltas: 100% (804/804), done. - -``` - -### Run bedbuncher using Looper - -Once we have cloned the `bedbuncher` repository, set our local Postgres cluster and created the `iGD` binary, we can run the pipeline by pointing `looper run` to the appropriate `PEP` config file. As mentioned earlier, if the path to the bedbase configuration file has been stored in the `$BEDBASE` environment variable, it's not neccesary to pass the `--bedbase-config` argument. - - -```bash -looper run bedbase/tutorial_files/PEPs/bedbuncher_config.yaml --package local \ ---command-extra="-R" > outputs/bedbuncher_output/bedbuncher_pipeline_logs/looper_logs.txt -``` - -```.output -Looper version: 1.3.1 -Command: run -/home/bnt4me/.local/lib/python3.8/site-packages/divvy/compute.py:150: UserWarning: The '_file_path' property is deprecated and will be removed in a future release. Use ComputingConfiguration["__internal"]["_file_path"] instead. - os.path.dirname(self._file_path), -/home/bnt4me/.local/lib/python3.8/site-packages/divvy/compute.py:58: UserWarning: The '_file_path' property is deprecated and will be removed in a future release. Use ComputingConfiguration["__internal"]["_file_path"] instead. - self.config_file = self._file_path -Activating compute package 'local' -## [1 of 10] sample: sample1; pipeline: BEDBUNCHER -Writing script to /home/bnt4me/Virginia/bed_maker/bedbase_tutorial/outputs/bedbuncher_output/bedbuncher_pipeline_logs/submission/BEDBUNCHER_sample1.sub -Job script (n=1; 0.00Gb): /home/bnt4me/Virginia/bed_maker/bedbase_tutorial/outputs/bedbuncher_output/bedbuncher_pipeline_logs/submission/BEDBUNCHER_sample1.sub -## [2 of 10] sample: sample2; pipeline: BEDBUNCHER -Writing script to /home/bnt4me/Virginia/bed_maker/bedbase_tutorial/outputs/bedbuncher_output/bedbuncher_pipeline_logs/submission/BEDBUNCHER_sample2.sub -Job script (n=1; 0.00Gb): /home/bnt4me/Virginia/bed_maker/bedbase_tutorial/outputs/bedbuncher_output/bedbuncher_pipeline_logs/submission/BEDBUNCHER_sample2.sub -## [3 of 10] sample: sample3; pipeline: BEDBUNCHER -Writing script to /home/bnt4me/Virginia/bed_maker/bedbase_tutorial/outputs/bedbuncher_output/bedbuncher_pipeline_logs/submission/BEDBUNCHER_sample3.sub -Job script (n=1; 0.00Gb): /home/bnt4me/Virginia/bed_maker/bedbase_tutorial/outputs/bedbuncher_output/bedbuncher_pipeline_logs/submission/BEDBUNCHER_sample3.sub -## [4 of 10] sample: sample4; pipeline: BEDBUNCHER -Writing script to /home/bnt4me/Virginia/bed_maker/bedbase_tutorial/outputs/bedbuncher_output/bedbuncher_pipeline_logs/submission/BEDBUNCHER_sample4.sub -Job script (n=1; 0.00Gb): /home/bnt4me/Virginia/bed_maker/bedbase_tutorial/outputs/bedbuncher_output/bedbuncher_pipeline_logs/submission/BEDBUNCHER_sample4.sub -## [5 of 10] sample: sample5; pipeline: BEDBUNCHER -Writing script to /home/bnt4me/Virginia/bed_maker/bedbase_tutorial/outputs/bedbuncher_output/bedbuncher_pipeline_logs/submission/BEDBUNCHER_sample5.sub -Job script (n=1; 0.00Gb): /home/bnt4me/Virginia/bed_maker/bedbase_tutorial/outputs/bedbuncher_output/bedbuncher_pipeline_logs/submission/BEDBUNCHER_sample5.sub -## [6 of 10] sample: sample6; pipeline: BEDBUNCHER -Writing script to /home/bnt4me/Virginia/bed_maker/bedbase_tutorial/outputs/bedbuncher_output/bedbuncher_pipeline_logs/submission/BEDBUNCHER_sample6.sub -Job script (n=1; 0.00Gb): /home/bnt4me/Virginia/bed_maker/bedbase_tutorial/outputs/bedbuncher_output/bedbuncher_pipeline_logs/submission/BEDBUNCHER_sample6.sub -## [7 of 10] sample: sample7; pipeline: BEDBUNCHER -Writing script to /home/bnt4me/Virginia/bed_maker/bedbase_tutorial/outputs/bedbuncher_output/bedbuncher_pipeline_logs/submission/BEDBUNCHER_sample7.sub -Job script (n=1; 0.00Gb): /home/bnt4me/Virginia/bed_maker/bedbase_tutorial/outputs/bedbuncher_output/bedbuncher_pipeline_logs/submission/BEDBUNCHER_sample7.sub -## [8 of 10] sample: sample8; pipeline: BEDBUNCHER -Writing script to /home/bnt4me/Virginia/bed_maker/bedbase_tutorial/outputs/bedbuncher_output/bedbuncher_pipeline_logs/submission/BEDBUNCHER_sample8.sub -Job script (n=1; 0.00Gb): /home/bnt4me/Virginia/bed_maker/bedbase_tutorial/outputs/bedbuncher_output/bedbuncher_pipeline_logs/submission/BEDBUNCHER_sample8.sub -## [9 of 10] sample: sample9; pipeline: BEDBUNCHER -Writing script to /home/bnt4me/Virginia/bed_maker/bedbase_tutorial/outputs/bedbuncher_output/bedbuncher_pipeline_logs/submission/BEDBUNCHER_sample9.sub -Job script (n=1; 0.00Gb): /home/bnt4me/Virginia/bed_maker/bedbase_tutorial/outputs/bedbuncher_output/bedbuncher_pipeline_logs/submission/BEDBUNCHER_sample9.sub -## [10 of 10] sample: sample10; pipeline: BEDBUNCHER -Writing script to /home/bnt4me/Virginia/bed_maker/bedbase_tutorial/outputs/bedbuncher_output/bedbuncher_pipeline_logs/submission/BEDBUNCHER_sample10.sub -Job script (n=1; 0.00Gb): /home/bnt4me/Virginia/bed_maker/bedbase_tutorial/outputs/bedbuncher_output/bedbuncher_pipeline_logs/submission/BEDBUNCHER_sample10.sub - -Looper finished -Samples valid for job generation: 10 of 10 -Commands submitted: 10 of 10 -Jobs submitted: 10 - -``` - -## 5. BEDEMBED: - -### bedembed_train: Uses the StarSpace method to embed the bed files and the meta data. - -We need to install [StarSpace](https://github.com/facebookresearch/StarSpace) first. - - -```bash -mkdir -p bedembed/tools -``` - -We need to install [Boost](http://www.boost.org/) library and specify the path of boost library in makefile in order to run StarSpace. - - -```bash -wget https://boostorg.jfrog.io/artifactory/main/release/1.78.0/source/boost_1_78_0.zip -unzip boost_1_78_0.zip -sudo mv boost_1_78_0 /usr/local/bin -cd /usr/local/bin/boost_1_78_0 -./bootstrap.sh -./b2 -``` - -In order to build StarSpace on Mac OS or Linux, use the following: - - -```bash -cd $BEDBASE_DATA_PATH_HOST/bedembed/tools -git clone https://github.com/facebookresearch/Starspace.git -cd Starspace -make -make embed_doc -``` - -We need a folder where we can store bedembed related outputs. Though not required, we'll also create a directory where we can store the bedembed pipeline logs. - - -```bash -mkdir -p outputs/bedembed_output/bedembed_pipeline_logs -``` - - -```bash -path_starspace=$BEDBASE_DATA_PATH_HOST'/bedembed/tools/Starspace/starspace' -path_meta=$BEDBASE_DATA_PATH_HOST'/bedbase/tutorial_files/PEPs/bedstat_annotation_sheet.csv' -# download Universe file from rivanna -path_universe=$BEDBASE_DATA_PATH_HOST'/tiles1000.hg19.bed' -path_output=$BEDBASE_DATA_PATH_HOST'/outputs/bedembed_output/' -assembly='hg38' -path_data=$BEDBASE_DATA_PATH_HOST'/bed_files/' -labels="exp_protocol,cell_type,tissue,antibody,treatment" -no_files=10 -start_line=0 -dim=50 -epochs=20 -learning_rate=0.001 - -python ./bedembed/pipeline/bedembed_train.py -star $path_starspace -i $path_data -g $assembly -meta $path_meta -univ $path_universe \ --l $labels -nof $no_files -o $path_output -startline $start_line -dim $dim -epochs $epochs -lr $learning_rate -``` - -### bedembed_test: calculate the distances between file labels and trained search terms - -### Get a PEP describing the bedfiles to process - -We'll use the standard [PEP](http://pep.databio.org) format for the annotation, which consists of 1) a sample table (.csv) that annotates the files, and 2) a project config.yaml file that points to the sample annotation sheet. The config file also has other components, such as derived attributes, that in this case point to the bedfiles to be processed. Here is the PEP config file for this example project: - - -```bash -cat bedbase/tutorial_files/PEPs/bedembed_test_config.yaml -``` - -```.output -bedembed_version: 0.0.0 -sample_table: bedstat_annotation_sheet.csv - -looper: - output-dir: $BEDBASE_DATA_PATH_HOST/outputs/bedembed_output/bedembed_pipeline_logs -sample_modifiers: - append: - bedbase_config: $BEDBASE_DATA_PATH_HOST/bedbase/tutorial_files/bedbase_configuration_compose.yaml - pipeline_interfaces: $BEDBASE_DATA_PATH_HOST/bedembed/pipeline_interface_test.yaml - universe: /project/shefflab/data/StarSpace/universe/universe_tilelen1000.bed - input_file_path: INPUT - output_file_path: $BEDBASE_DATA_PATH_HOST/outputs/bedembed_output - yaml_file: SAMPLE_YAML - derive: - attributes: [yaml_file, input_file_path] - sources: - INPUT: "/project/shefflab/data/encode/{file_name}" - SAMPLE_YAML: "$BEDBASE_DATA_PATH_HOST/outputs/bedembed_output/bedembed_pipeline_logs/submission/{sample_name}_sample.yaml" - -``` - -### Run bedembed using Looper - -Once we have cloned the `bedembed` repository, set our local postgres cluster, we can run the pipeline by pointing `looper run` to the appropriate `PEP` config file. As mentioned earlier, if the path to the bedbase configuration file is provided, the calculated distances will report to the postgres database, if not it will save as a csv file in the `output_file_path` - - -```bash -looper run bedbase/tutorial_files/PEPs/bedembed_test_config.yaml --package local -``` - -## 5. BEDHOST: Serve BED files and API to explore pipeline outputs - -The last part of the tutorial consists on running a local instance of `bedhost` (a REST API for `bedstat` and `bedbuncher` produced outputs) in order to explore plots, statistics and download pipeline outputs. -To run `bedhost`, frist use `bedhost-ui` to built the bedhost user interface with React. - - -```bash -cd bedhost-ui -# Install node modules defined in package.json -npm install -# Build the app for production to the ./build folder -npm run build -# copy the contents of the ./build directory to bedhost/bedhost/static/bedhost-ui -cp -avr ./build ../bedhost/bedhost/static/bedhost-ui - -cd .. -``` - -To run `bedhost`, we'll pip install the package from the previously cloned repository: - - -```bash -pip install bedhost/. --user > bedhost_log.txt -``` - -To start `bedhost`, we simply need to run the following command passing the location of the bedbase configuration file to the `-c` flag. - - -```bash -bedhost serve -c $BEDBASE_DATA_PATH_HOST/bedbase/tutorial_files/bedbase_configuration_compose.yaml -``` - -```.output -Serving data for columns: ['md5sum'] -Serving data for columns: ['md5sum'] -Generating GraphQL schema -running bedhost app -INFO: Started server process [648505] -INFO: Waiting for application startup. -INFO: Application startup complete. -INFO: Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit) -INFO: 127.0.0.1:47532 - "GET / HTTP/1.1" 200 OK -INFO: 127.0.0.1:47532 - "GET /ui/static/css/2.fa6c921b.chunk.css HTTP/1.1" 200 OK -INFO: 127.0.0.1:47534 - "GET /ui/static/css/main.4620a2c9.chunk.css HTTP/1.1" 200 OK -INFO: 127.0.0.1:47536 - "GET /ui/static/js/2.b0639060.chunk.js HTTP/1.1" 200 OK -INFO: 127.0.0.1:47534 - "GET /ui/static/js/main.56118e82.chunk.js HTTP/1.1" 200 OK -INFO: 127.0.0.1:47536 - "GET /api/bed/all/data/count HTTP/1.1" 200 OK -[(None,), ({'alias': 'hg38', 'digest': '2230c535660fb4774114bfa966a62f823fdb6d21acf138d4'},)] -INFO: 127.0.0.1:47532 - "GET /api/bed/genomes HTTP/1.1" 200 OK -INFO: 127.0.0.1:47534 - "GET /api/versions HTTP/1.1" 200 OK -INFO: 127.0.0.1:47538 - "GET /ui/bedbase_logo.svg HTTP/1.1" 200 OK -INFO: 127.0.0.1:47538 - "GET /api/bedset/all/data/count HTTP/1.1" 200 OK -Serving data for columns: ['md5sum'] -INFO: 127.0.0.1:47538 - "GET /api/bed/all/data?ids=md5sum&limit=1 HTTP/1.1" 200 OK -Serving data for columns: ['md5sum'] -INFO: 127.0.0.1:47538 - "GET /api/bedset/all/data?ids=md5sum&limt=1 HTTP/1.1" 200 OK -INFO: 127.0.0.1:47538 - "GET /openapi.json HTTP/1.1" 200 OK -INFO: 127.0.0.1:47538 - "GET /ui/favicon.ico HTTP/1.1" 200 OK - -``` - -If we have stored the path to the bedbase config in the environment variable `$BEDBASE` (suggested), it's not neccesary to use said flag. - - -```bash -bedhost serve -``` - -The `bedhost` API can be opened in the url [http://0.0.0.0:8000](http://0.0.0.0:8000). We can now explore the plots and statistics generated by the `bedstat` and `bedbuncher` pipelines. - -## or optionally run BEDHOST using containers - -Alternatively, you can run the application inside a container. - -For that we'll use [docker compose](https://docs.docker.com/compose/), a tool that makes running multi-contaier Docker applications possible. The `docker-compose.yaml` file defines two services: -- `fastapi-api`: runs the fastAPI server -- `postgres-db`: runs the PostgeSQL database used by the server - - - -```bash -cd $BEDBASE_DATA_PATH_HOST -``` - -Use the `BEDBASE_DATA_PATH_HOST` environment variable to point to the host directory with the pipeline results that will be mounted in the container as a volume. - -The environment variables are passed to the container via `.env` file, which the `docker-compose.yaml` points to for each service. Additionally, you can just export the environment variables before issuing the `docker-compose` command. -When you set the same environment variable in multiple files, here’s the priority used by Compose to choose which value to use: - -1. Compose file -2. Shell environment variables -3. Environment file -4. Dockerfile -4. Variable is not defined - - -```bash -cd bedhost; docker-compose up -``` - -```.output -Pulling postgres-db (postgres:)... -latest: Pulling from library/postgres -Digest: sha256:8f7c3c9b61d82a4a021da5d9618faf056633e089302a726d619fa467c73609e4 -Status: Downloaded newer image for postgres:latest -Recreating postgreSQL-bedbase ... -Recreating fastAPI-bedbase ... mdone -Attaching to postgreSQL-bedbase, fastAPI-bedbase -postgreSQL-bedbase | -postgreSQL-bedbase | PostgreSQL Database directory appears to contain a database; Skipping initialization -postgreSQL-bedbase | -postgreSQL-bedbase | 2020-11-02 23:10:28.883 UTC [1] LOG: starting PostgreSQL 13.0 (Debian 13.0-1.pgdg100+1) on x86_64-pc-linux-gnu, compiled by gcc (Debian 8.3.0-6) 8.3.0, 64-bit -postgreSQL-bedbase | 2020-11-02 23:10:28.885 UTC [1] LOG: listening on IPv4 address "0.0.0.0", port 5432 -postgreSQL-bedbase | 2020-11-02 23:10:28.885 UTC [1] LOG: listening on IPv6 address "::", port 5432 -postgreSQL-bedbase | 2020-11-02 23:10:28.891 UTC [1] LOG: listening on Unix socket "/var/run/postgresql/.s.PGSQL.5432" -postgreSQL-bedbase | 2020-11-02 23:10:28.901 UTC [25] LOG: database system was shut down at 2020-11-02 23:03:14 UTC -postgreSQL-bedbase | 2020-11-02 23:10:28.909 UTC [1] LOG: database system is ready to accept connections -fastAPI-bedbase | wait-for-it.sh: waiting 60 seconds for postgres-db:5432 -fastAPI-bedbase | wait-for-it.sh: postgres-db:5432 is available after 0 seconds -fastAPI-bedbase | DEBU 2020-11-02 23:10:30,246 | bedhost:est:265 > Configured logger 'bedhost' using logmuse v0.2.6 -fastAPI-bedbase | DEBU 23:10:30 | bbconf:est:265 > Configured logger 'bbconf' using logmuse v0.2.6 -fastAPI-bedbase | DEBU 23:10:30 | bbconf:bbconf:105 > Established connection with PostgreSQL: postgres-db -fastAPI-bedbase | DEBU 2020-11-02 23:10:30,299 | bedhost:main:503 > Determined React UI path: /app/bedhost/static/bedhost-ui -fastAPI-bedbase | INFO 2020-11-02 23:10:30,299 | bedhost:main:510 > running bedhost app -fastAPI-bedbase | INFO: Started server process [1] -fastAPI-bedbase | INFO: Waiting for application startup. -fastAPI-bedbase | INFO: Application startup complete. -fastAPI-bedbase | INFO: Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit) -Gracefully stopping... (press Ctrl+C again to force) -Stopping fastAPI-bedbase ... -Stopping postgreSQL-bedbase ... -ping postgreSQL-bedbase ... done -``` - - -```bash - -``` diff --git a/docs/bedboss/code/bedmaker-tutorial.md b/docs/bedboss/code/bedmaker-tutorial.md deleted file mode 100644 index b66ea38..0000000 --- a/docs/bedboss/code/bedmaker-tutorial.md +++ /dev/null @@ -1,212 +0,0 @@ -jupyter:True -# bedmaker tutorial - -To create bed and bigbed files we will need to use bedmaker: `bedboss make` - - -```bash -bedboss --help -``` - -```.output -version: 0.1.0-dev1 -usage: bedboss [-h] [--version] {all,make,qc,stat} ... - -Warehouse of pipelines for BED-like files: bedmaker, bedstat, and bedqc. - -positional arguments: - {all,make,qc,stat} - all Run all bedboss pipelines and insert data into bedbase - make A pipeline to convert bed, bigbed, bigwig or bedgraph - files into bed and bigbed formats - qc Run quality control on bed file (bedqc) - stat A pipeline to read a file in BED format and produce - metadata in JSON format. - -options: - -h, --help show this help message and exit - --version show program's version number and exit - -``` - - -```bash -bedboss make --help -``` - -```.output -usage: bedboss make [-h] -f INPUT_FILE [-n NARROWPEAK] -t INPUT_TYPE -g GENOME - -r RFG_CONFIG -o OUTPUT_BED --output-bigbed OUTPUT_BIGBED - -s SAMPLE_NAME [--chrom-sizes CHROM_SIZES] - [--standard-chrom] - -options: - -h, --help show this help message and exit - -f INPUT_FILE, --input-file INPUT_FILE - path to the input file - -n NARROWPEAK, --narrowpeak NARROWPEAK - whether the regions are narrow (transcription factor - implies narrow, histone mark implies broad peaks) - -t INPUT_TYPE, --input-type INPUT_TYPE - a bigwig or a bedgraph file that will be converted - into BED format - -g GENOME, --genome GENOME - reference genome - -r RFG_CONFIG, --rfg-config RFG_CONFIG - file path to the genome config file - -o OUTPUT_BED, --output-bed OUTPUT_BED - path to the output BED files - --output-bigbed OUTPUT_BIGBED - path to the folder of output bigBed files - -s SAMPLE_NAME, --sample-name SAMPLE_NAME - name of the sample used to systematically build the - output name - --chrom-sizes CHROM_SIZES - a full path to the chrom.sizes required for the - bedtobigbed conversion - --standard-chrom Standardize chromosome names. Default: False - -``` - - -```bash - bedboss make --sample-name test_bed \ - --input-file ../test/data/bed/hg19/correct/hg19_example1.bed \ - --input-type bed \ - --genome hg19 \ - --output-bed ./bed \ - --output-bigbed ./bigbed - -``` - -```.output -Output directory does not exist. Creating: ./bed -BigBed directory does not exist. Creating: ./bigbed -bedmaker logs directory doesn't exist. Creating one... -### Pipeline run code and environment: - -* Command: `/home/bnt4me/virginia/venv/jupyter/bin/bedboss make --sample-name test_bed --input-file ../test/data/bed/hg19/correct/hg19_example1.bed --input-type bed --genome hg19 --output-bed ./bed --output-bigbed ./bigbed` -* Compute host: bnt4me-Precision-5560 -* Working dir: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter -* Outfolder: ./bed/bedmaker_logs/test_bed/ -* Pipeline started at: (02-08 15:39:09) elapsed: 0.0 _TIME_ - -### Version log: - -* Python version: 3.10.6 -* Pypiper dir: `/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper` -* Pypiper version: 0.12.3 -* Pipeline dir: `/home/bnt4me/virginia/venv/jupyter/bin` -* Pipeline version: None - -### Arguments passed to pipeline: - - ----------------------------------------- - -Got input type: bed -Converting ../test/data/bed/hg19/correct/hg19_example1.bed to BED format. -Target to produce: `./bed/hg19_example1.bed.gz` - -> `cp ../test/data/bed/hg19/correct/hg19_example1.bed ./bed/hg19_example1.bed` (2477650) -
-
-Command completed. Elapsed time: 0:00:00. Running peak memory: 0GB. - PID: 2477650; Command: cp; Return code: 0; Memory used: 0.0GB - - -> `gzip ./bed/hg19_example1.bed ` (2477652) -
-
-Command completed. Elapsed time: 0:00:00. Running peak memory: 0GB. - PID: 2477652; Command: gzip; Return code: 0; Memory used: 0.0GB - -Running bedqc... -Target to produce: `./bed/bedmaker_logs/test_bed/xl67fcgi` - -> `zcat ./bed/hg19_example1.bed.gz > ./bed/bedmaker_logs/test_bed/xl67fcgi` (2477654) -
-
-Command completed. Elapsed time: 0:00:00. Running peak memory: 0GB. - PID: 2477654; Command: zcat; Return code: 0; Memory used: 0.0GB - -Targetless command, running... - -> `bash /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedqc/est_line.sh ./bed/bedmaker_logs/test_bed/xl67fcgi ` (2477656) -
-1000
-Command completed. Elapsed time: 0:00:00. Running peak memory: 0GB. - PID: 2477656; Command: bash; Return code: 0; Memory used: 0.0GB - -Starting cleanup: 1 files; 0 conditional files for cleanup - -Cleaning up flagged intermediate files. . . - -### Pipeline completed. Epilogue -* Elapsed time (this run): 0:00:00 -* Total elapsed time (all runs): 0:00:00 -* Peak memory (this run): 0 GB -* Pipeline completed time: 2023-02-08 15:39:09 -Generating bigBed files for: ../test/data/bed/hg19/correct/hg19_example1.bed -Determining path to chrom.sizes asset via Refgenie. -Creating refgenie genome config file... -Reading refgenie genome configuration file from file: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/genome_config.yaml -/home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/alias/hg19/fasta/default/hg19.chrom.sizes -Determined path to chrom.sizes asset: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/alias/hg19/fasta/default/hg19.chrom.sizes -Target to produce: `./bigbed/jckj3p1d` - -> `zcat ./bed/hg19_example1.bed.gz | sort -k1,1 -k2,2n > ./bigbed/jckj3p1d` (2477666,2477667) -
-
-Command completed. Elapsed time: 0:00:00. Running peak memory: 0GB. - PID: 2477666; Command: zcat; Return code: 0; Memory used: 0.0GB - PID: 2477667; Command: sort; Return code: 0; Memory used: 0.0GB - -Running: bedToBigBed -type=bed6+3 ./bigbed/jckj3p1d /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/alias/hg19/fasta/default/hg19.chrom.sizes ./bigbed/hg19_example1.bigBed -Target to produce: `./bigbed/hg19_example1.bigBed` - -> `bedToBigBed -type=bed6+3 ./bigbed/jckj3p1d /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/alias/hg19/fasta/default/hg19.chrom.sizes ./bigbed/hg19_example1.bigBed` (2477669) -
-pass1 - making usageList (1 chroms): 1 millis
-pass2 - checking and writing primary data (175 records, 9 fields): 0 millis
-
-Command completed. Elapsed time: 0:00:00. Running peak memory: 0GB. - PID: 2477669; Command: bedToBigBed; Return code: 0; Memory used: 0.0GB - -Starting cleanup: 2 files; 0 conditional files for cleanup - -Cleaning up flagged intermediate files. . . - -### Pipeline completed. Epilogue -* Elapsed time (this run): 0:00:00 -* Total elapsed time (all runs): 0:00:00 -* Peak memory (this run): 0 GB -* Pipeline completed time: 2023-02-08 15:39:09 - -``` - -### Let's check if bed file was created (or copied) - - -```bash -ls bed -``` - -```.output -bedmaker_logs hg19_example1.bed.gz - -``` - -### Let's check if bigbed file was created - - -```bash -ls bigbed -``` - -```.output -hg19_example1.bigBed - -``` - -### everything was finished successfuly and files are ready for further analysis! diff --git a/docs/bedboss/code/bedqc-tutorial.md b/docs/bedboss/code/bedqc-tutorial.md deleted file mode 100644 index e21dd71..0000000 --- a/docs/bedboss/code/bedqc-tutorial.md +++ /dev/null @@ -1,71 +0,0 @@ -jupyter:True -# bedqc tutorial - -To check Quality of bed file use this command: `badboss qc` - - -```bash -bedboss qc --help -``` - -```.output -usage: bedboss qc [-h] --bedfile BEDFILE --outfolder OUTFOLDER - -options: - -h, --help show this help message and exit - --bedfile BEDFILE a full path to bed file to process - --outfolder OUTFOLDER - a full path to output log folder. - -``` - -bedqc example: - - -```bash -bedboss qc --bedfile ../test/data/bed/hg19/correct/hg19_example1.bed --outfolder . -``` - -```.output -Running bedqc... -### Pipeline run code and environment: - -* Command: `/home/bnt4me/virginia/venv/jupyter/bin/bedboss qc --bedfile ../test/data/bed/hg19/correct/hg19_example1.bed --outfolder .` -* Compute host: bnt4me-Precision-5560 -* Working dir: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter -* Outfolder: ./ -* Pipeline started at: (02-08 15:44:57) elapsed: 0.0 _TIME_ - -### Version log: - -* Python version: 3.10.6 -* Pypiper dir: `/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper` -* Pypiper version: 0.12.3 -* Pipeline dir: `/home/bnt4me/virginia/venv/jupyter/bin` -* Pipeline version: None - -### Arguments passed to pipeline: - - ----------------------------------------- - -Target exists: `../test/data/bed/hg19/correct/hg19_example1.bed` -Targetless command, running... - -> `bash /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedqc/est_line.sh ../test/data/bed/hg19/correct/hg19_example1.bed ` (2478311) -
-1000
-Command completed. Elapsed time: 0:00:00. Running peak memory: 0GB. - PID: 2478311; Command: bash; Return code: 0; Memory used: 0.0GB - -Starting cleanup: 1 files; 0 conditional files for cleanup - -Cleaning up flagged intermediate files. . . - -### Pipeline completed. Epilogue -* Elapsed time (this run): 0:00:00 -* Total elapsed time (all runs): 0:00:00 -* Peak memory (this run): 0 GB -* Pipeline completed time: 2023-02-08 15:44:57 - -``` diff --git a/docs/bedboss/code/bedstat-tutorial.md b/docs/bedboss/code/bedstat-tutorial.md deleted file mode 100644 index d506a7e..0000000 --- a/docs/bedboss/code/bedstat-tutorial.md +++ /dev/null @@ -1,371 +0,0 @@ -jupyter:True -# bedboss stat - -This tutorial is intended to introduce you to bedstat, pipeline that produces statistics and plots based on bed and bigbed files - -### 1. Install all dependencies and initialize database for it - -- Install dependecies: [How to install R dependencies](./how_to_install_r_dep/) -- Initialize database: [How to initialize database](./how_to_create_database/) -- Create config file: [How to create config file](./how_to_bedbase_config/) - -### 2. Create working repository - - -```bash -mkdir stat_tutorial ; cd stat_tutorial -``` - -Create config file by downloading it and configuring it - - -```bash -cat bedbase_config_test.yaml -``` - -```.output -path: - pipeline_output_path: $BEDBOSS_OUTPUT_PATH # do not change it - bedstat_dir: bedstat_output - remote_url_base: null - bedbuncher_dir: bedbucher_output -database: - host: localhost - port: 5432 - password: docker - user: postgres - name: pep-db - dialect: postgresql - driver: psycopg2 -server: - host: 0.0.0.0 - port: 8000 -remotes: - http: - prefix: https://data.bedbase.org/ - description: HTTP compatible path - s3: - prefix: s3://data.bedbase.org/ - description: S3 compatible path - -``` - -### 3. Download bed and bigbed files - -Bed file - - -```bash -wget -O sample1.bed.gz https://github.com/bedbase/bedboss/raw/dev/test/data/bed/hg19/correct/sample1.bed.gz - -``` - -```.output ---2023-02-28 15:32:57-- https://github.com/bedbase/bedboss/raw/dev/test/data/bed/hg19/correct/sample1.bed.gz -Resolving github.com (github.com)... 140.82.113.3 -Connecting to github.com (github.com)|140.82.113.3|:443... connected. -HTTP request sent, awaiting response... 302 Found -Location: https://raw.githubusercontent.com/bedbase/bedboss/dev/test/data/bed/hg19/correct/sample1.bed.gz [following] ---2023-02-28 15:32:57-- https://raw.githubusercontent.com/bedbase/bedboss/dev/test/data/bed/hg19/correct/sample1.bed.gz -Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.111.133, 185.199.109.133, ... -Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected. -HTTP request sent, awaiting response... 200 OK -Length: 7087126 (6.8M) [application/octet-stream] -Saving to: ‘sample1.bed.gz’ - -sample1.bed.gz 100%[===================>] 6.76M --.-KB/s in 0.07s - -2023-02-28 15:32:58 (95.8 MB/s) - ‘sample1.bed.gz’ saved [7087126/7087126] - - -``` - -BigBed file - - -```bash -wget -O sample1.bigBed https://github.com/bedbase/bedboss/raw/dev/test/data/bigbed/hg19/correct/sample1.bigBed - -``` - -```.output ---2023-02-28 15:33:00-- https://github.com/bedbase/bedboss/raw/dev/test/data/bigbed/hg19/correct/sample1.bigBed -Resolving github.com (github.com)... 140.82.113.3 -Connecting to github.com (github.com)|140.82.113.3|:443... connected. -HTTP request sent, awaiting response... 302 Found -Location: https://raw.githubusercontent.com/bedbase/bedboss/dev/test/data/bigbed/hg19/correct/sample1.bigBed [following] ---2023-02-28 15:33:00-- https://raw.githubusercontent.com/bedbase/bedboss/dev/test/data/bigbed/hg19/correct/sample1.bigBed -Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.109.133, ... -Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected. -HTTP request sent, awaiting response... 200 OK -Length: 13092350 (12M) [application/octet-stream] -Saving to: ‘sample1.bigBed’ - -sample1.bigBed 100%[===================>] 12.49M --.-KB/s in 0.1s - -2023-02-28 15:33:00 (101 MB/s) - ‘sample1.bigBed’ saved [13092350/13092350] - - -``` - - -```bash -ls -``` - -```.output -bedbase_config_test.yaml sample1.bed.gz sample1.bigBed - -``` - -### 4. Run statistics: - -Additionally we need some metadata about files. 1) genome assembly, config file and know output folder. - - -```bash -bedboss stat --help -``` - -```.output -usage: bedboss stat [-h] --bedfile BEDFILE --outfolder OUTFOLDER - [--open-signal-matrix OPEN_SIGNAL_MATRIX] [--ensdb ENSDB] - [--bigbed BIGBED] --bedbase-config BEDBASE_CONFIG - [-y SAMPLE_YAML] --genome GENOME_ASSEMBLY [--no-db-commit] - [--just-db-commit] - -options: - -h, --help show this help message and exit - --bedfile BEDFILE a full path to bed file to process [Required] - --outfolder OUTFOLDER - Pipeline output folder [Required] - --open-signal-matrix OPEN_SIGNAL_MATRIX - a full path to the openSignalMatrix required for the - tissue specificity plots - --ensdb ENSDB a full path to the ensdb gtf file required for genomes - not in GDdata - --bigbed BIGBED a full path to the bigbed files - --bedbase-config BEDBASE_CONFIG - a path to the bedbase configuration file [Required] - -y SAMPLE_YAML, --sample-yaml SAMPLE_YAML - a yaml config file with sample attributes to pass on - more metadata into the database - --genome GENOME_ASSEMBLY - genome assembly of the sample [Required] - --no-db-commit whether the JSON commit to the database should be - skipped - --just-db-commit whether just to commit the JSON to the database - -``` - - -```bash -bedboss stat \ ---bedfile ./sample1.bed.gz \ ---bigbed ./sample1.bigBed \ ---outfolder ./test_output \ ---genome hg19 \ ---bedbase-config ./bedbase_config_test.yaml - -``` - -```.output -Warning: You're running an interactive python session. This works, but pypiper cannot tee the output, so results are only logged to screen. -### Pipeline run code and environment: - -* Command: `/home/bnt4me/virginia/venv/jupyter/bin/bedboss stat --bedfile ./sample1.bed.gz --bigbed ./sample1.bigBed --outfolder ./test_output --genome hg19 --bedbase-config ./bedbase_config_test.yaml` -* Compute host: bnt4me-Precision-5560 -* Working dir: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/stat_tutorial -* Outfolder: ./test_output/ -* Pipeline started at: (02-28 15:46:52) elapsed: 0.0 _TIME_ - -### Version log: - -* Python version: 3.10.6 -* Pypiper dir: `/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper` -* Pypiper version: 0.12.3 -* Pipeline dir: `/home/bnt4me/virginia/venv/jupyter/bin` -* Pipeline version: 0.1.0-dev1 - -### Arguments passed to pipeline: - - ----------------------------------------- - -Target to produce: `/home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/stat_tutorial/test_output/output/bedstat_output/c557c915a9901ce377ef724806ff7a2c/sample1.json` - -> `Rscript /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedstat/tools/regionstat.R --bedfilePath=./sample1.bed.gz --fileId=sample1 --openSignalMatrix=None --outputFolder=/home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/stat_tutorial/test_output/output/bedstat_output/c557c915a9901ce377ef724806ff7a2c --genome=hg19 --ensdb=None --digest=c557c915a9901ce377ef724806ff7a2c` (530529) -
-Loading required package: IRanges
-Loading required package: BiocGenerics
-
-Attaching package: ‘BiocGenerics’
-
-The following objects are masked from ‘package:stats’:
-
-    IQR, mad, sd, var, xtabs
-
-The following objects are masked from ‘package:base’:
-
-    anyDuplicated, append, as.data.frame, basename, cbind, colnames,
-    dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
-    grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
-    order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
-    rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
-    union, unique, unsplit, which.max, which.min
-
-Loading required package: S4Vectors
-Loading required package: stats4
-
-Attaching package: ‘S4Vectors’
-
-The following objects are masked from ‘package:base’:
-
-    expand.grid, I, unname
-
-Loading required package: GenomicRanges
-Loading required package: GenomeInfoDb
-[?25hsnapshotDate(): 2021-10-19
-[?25h[?25hLoading required package: GenomicFeatures
-Loading required package: AnnotationDbi
-Loading required package: Biobase
-Welcome to Bioconductor
-
-    Vignettes contain introductory material; view with
-    'browseVignettes()'. To cite Bioconductor, see
-    'citation("Biobase")', and for packages 'citation("pkgname")'.
-
-Loading required package: AnnotationFilter
-
-Attaching package: 'ensembldb'
-
-The following object is masked from 'package:stats':
-
-    filter
-
-[?25h[?25h[?25hLoading required package: R.oo
-Loading required package: R.methodsS3
-R.methodsS3 v1.8.2 (2022-06-13 22:00:14 UTC) successfully loaded. See ?R.methodsS3 for help.
-R.oo v1.25.0 (2022-06-12 02:20:02 UTC) successfully loaded. See ?R.oo for help.
-
-Attaching package: 'R.oo'
-
-The following object is masked from 'package:R.methodsS3':
-
-    throw
-
-The following object is masked from 'package:GenomicRanges':
-
-    trim
-
-The following object is masked from 'package:IRanges':
-
-    trim
-
-The following objects are masked from 'package:methods':
-
-    getClasses, getMethods
-
-The following objects are masked from 'package:base':
-
-    attach, detach, load, save
-
-R.utils v2.12.2 (2022-11-11 22:00:03 UTC) successfully loaded. See ?R.utils for help.
-
-Attaching package: 'R.utils'
-
-The following object is masked from 'package:utils':
-
-    timestamp
-
-The following objects are masked from 'package:base':
-
-    cat, commandArgs, getOption, isOpen, nullfile, parse, warnings
-
-[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[1] "Plotting: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/stat_tutorial/test_output/output/bedstat_output/c557c915a9901ce377ef724806ff7a2c/sample1_tssdist"
-Scale for x is already present.
-Adding another scale for x, which will replace the existing scale.
-[1] "Writing plot json: output/bedstat_output/c557c915a9901ce377ef724806ff7a2c/sample1_tssdist"
-Successfully calculated and plot TSS distance.
-[1] "Plotting: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/stat_tutorial/test_output/output/bedstat_output/c557c915a9901ce377ef724806ff7a2c/sample1_chrombins"
-[1] "Writing plot json: output/bedstat_output/c557c915a9901ce377ef724806ff7a2c/sample1_chrombins"
-Successfully calculated and plot chromosomes region distribution.
-Calculating overlaps...
-[1] "Plotting: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/stat_tutorial/test_output/output/bedstat_output/c557c915a9901ce377ef724806ff7a2c/sample1_paritions"
-[1] "Writing plot json: output/bedstat_output/c557c915a9901ce377ef724806ff7a2c/sample1_paritions"
-Successfully calculated and plot regions distribution over genomic partitions.
-[1] "Plotting: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/stat_tutorial/test_output/output/bedstat_output/c557c915a9901ce377ef724806ff7a2c/sample1_expected_partitions"
-[1] "Writing plot json: output/bedstat_output/c557c915a9901ce377ef724806ff7a2c/sample1_expected_partitions"
-Successfully calculated and plot expected distribution over genomic partitions.
-[1] "Plotting: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/stat_tutorial/test_output/output/bedstat_output/c557c915a9901ce377ef724806ff7a2c/sample1_cumulative_partitions"
-[1] "Writing plot json: output/bedstat_output/c557c915a9901ce377ef724806ff7a2c/sample1_cumulative_partitions"
-Successfully calculated and plot cumulative distribution over genomic partitions.
-[1] "Plotting: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/stat_tutorial/test_output/output/bedstat_output/c557c915a9901ce377ef724806ff7a2c/sample1_widths_histogram"
-[1] "Writing plot json: output/bedstat_output/c557c915a9901ce377ef724806ff7a2c/sample1_widths_histogram"
-Successfully calculated and plot quantile-trimmed histogram of widths.
-[1] "Plotting: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/stat_tutorial/test_output/output/bedstat_output/c557c915a9901ce377ef724806ff7a2c/sample1_neighbor_distances"
-[1] "Writing plot json: output/bedstat_output/c557c915a9901ce377ef724806ff7a2c/sample1_neighbor_distances"
-Successfully calculated and plot distance between neighbor regions.
-open signal matrix not provided. Skipping tissue specificity plot ... 
-[?25h[?25h
-Command completed. Elapsed time: 0:00:20. Running peak memory: 1.358GB. - PID: 530529; Command: Rscript; Return code: 0; Memory used: 1.358GB - -These results exist for 'c557c915a9901ce377ef724806ff7a2c': bedfile, genome - -### Pipeline completed. Epilogue -* Elapsed time (this run): 0:00:20 -* Total elapsed time (all runs): 0:00:20 -* Peak memory (this run): 1.3577 GB -* Pipeline completed time: 2023-02-28 15:47:12 - -``` - -After plots and statistics were produced, we can look at them - - -```bash -ls test_output/output/bedstat_output/c557c915a9901ce377ef724806ff7a2c -``` - -```.output -sample1_chrombins.pdf sample1_neighbor_distances.png -sample1_chrombins.png sample1_paritions.pdf -sample1_cumulative_partitions.pdf sample1_paritions.png -sample1_cumulative_partitions.png sample1_plots.json -sample1_expected_partitions.pdf sample1_tssdist.pdf -sample1_expected_partitions.png sample1_tssdist.png -sample1.json sample1_widths_histogram.pdf -sample1_neighbor_distances.pdf sample1_widths_histogram.png - -``` - - -```bash -cat test_output/output/bedstat_output/c557c915a9901ce377ef724806ff7a2c/sample1.json -``` - -```.output -{ - "name": ["sample1"], - "regions_no": [300000], - "mean_region_width": [663.9], - "md5sum": ["c557c915a9901ce377ef724806ff7a2c"], - "median_TSS_dist": [48580], - "exon_frequency": [14871], - "exon_percentage": [0.0496], - "fiveUTR_frequency": [8981], - "fiveUTR_percentage": [0.0299], - "intergenic_frequency": [141763], - "intergenic_percentage": [0.4725], - "intron_frequency": [106638], - "intron_percentage": [0.3555], - "promoterCore_frequency": [10150], - "promoterCore_percentage": [0.0338], - "promoterProx_frequency": [6851], - "promoterProx_percentage": [0.0228], - "threeUTR_frequency": [10746], - "threeUTR_percentage": [0.0358] -} - -``` diff --git a/docs/bedboss/code/tutorial-all.md b/docs/bedboss/code/tutorial-all.md deleted file mode 100644 index 450b9f6..0000000 --- a/docs/bedboss/code/tutorial-all.md +++ /dev/null @@ -1,499 +0,0 @@ -jupyter:True -# Bedboss-all tutorial - -This tutorial is attended to show base exaple of using bedboss all function that inclueds all 3 pipelines: bedmake, bedqc and bedstat - -### 1. First let's create new working repository - - -```bash -mkdir all_tutorial ; cd all_tutorial -``` - -### 2. To run our pipelines we need to check if we have installed all dependencies. To do so we can run dependencies check script that can be found in docs. - - -```bash -wget -O req_test.sh https://raw.githubusercontent.com/bedbase/bedboss/68910f5142a95d92c27ef53eafb9c35599af2fbd/test/bash_requirements_test.sh -``` - -```.output ---2023-08-11 06:58:27-- https://raw.githubusercontent.com/bedbase/bedboss/68910f5142a95d92c27ef53eafb9c35599af2fbd/test/bash_requirements_test.sh -Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ... -Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected. -HTTP request sent, awaiting response... 200 OK -Length: 3927 (3.8K) [text/plain] -Saving to: ‘req_test.sh’ - -req_test.sh 100%[===================>] 3.83K --.-KB/s in 0.006s - -2023-08-11 06:58:28 (608 KB/s) - ‘req_test.sh’ saved [3927/3927] - - -``` - - -```bash -chmod u+x ./req_test.sh -``` - - -```bash -./req_test.sh -``` - -```.output ------------------------------------------------------------ - - bedboss installation check - ------------------------------------------------------------ -Checking native installation... -Language compilers... ------------------------------------------------------------ -✔ python is installed correctly -✔ R is installed correctly ------------------------------------------------------------ -Checking bedmaker dependencies... ------------------------------------------------------------ -✔ package bedboss @ file:///home/bnt4me/virginia/repos/bedbase_all/bedboss -✔ package refgenconf==0.12.2 -✔ bedToBigBed is installed correctly -⚠ WARNING: 'bigBedToBed' is not installed. To install 'bigBedToBed' check bedboss documentation: https://bedboss.databio.org/ -⚠ WARNING: 'bigWigToBedGraph' is not installed. To install 'bigWigToBedGraph' check bedboss documentation: https://bedboss.databio.org/ -⚠ WARNING: 'wigToBigWig' is not installed. To install 'wigToBigWig' check bedboss documentation: https://bedboss.databio.org/ ------------------------------------------------------------ -Checking required R packages for bedstat... ------------------------------------------------------------ -✔ SUCCESS: R package: optparse -✔ SUCCESS: R package: ensembldb -✔ SUCCESS: R package: ExperimentHub -✔ SUCCESS: R package: AnnotationHub -✔ SUCCESS: R package: AnnotationFilter -✔ SUCCESS: R package: BSgenome -✔ SUCCESS: R package: GenomicFeatures -✔ SUCCESS: R package: GenomicDistributions -✔ SUCCESS: R package: GenomicDistributionsData -✔ SUCCESS: R package: GenomeInfoDb -✔ SUCCESS: R package: ensembldb -✔ SUCCESS: R package: tools -✔ SUCCESS: R package: R.utils -✔ SUCCESS: R package: LOLA -Number of WARNINGS: 3 - -``` - -### 3. All requirements are installed, now lets run our pipeline - -To run pipeline, we need to provide few required arguments: -1. sample_name -2. input_file -3. input_type -4. outfolder -5. genome -6. bedbase_config - -If you don't have bedbase config file, or initialized bedbase db you can check documnetation how to do it: https://bedboss.databio.org/ - - -```bash -pip install bedboss==0.1.0a2 -``` - -```.output -Requirement already satisfied: bedboss==0.1.0a2 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (0.1.0a2) -Requirement already satisfied: piper>=0.13.2 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from bedboss==0.1.0a2) (0.13.2) -Requirement already satisfied: pandas>=1.5.3 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from bedboss==0.1.0a2) (2.0.3) -Requirement already satisfied: peppy>=0.35.7 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from bedboss==0.1.0a2) (0.35.7) -Requirement already satisfied: requests>=2.28.2 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from bedboss==0.1.0a2) (2.28.2) -Requirement already satisfied: logmuse>=0.2.7 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from bedboss==0.1.0a2) (0.2.7) -Requirement already satisfied: yacman>=0.8.4 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from bedboss==0.1.0a2) (0.9.1) -Requirement already satisfied: refgenconf>=0.12.2 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from bedboss==0.1.0a2) (0.12.2) -Requirement already satisfied: bbconf==0.4.0a1 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from bedboss==0.1.0a2) (0.4.0a1) -Requirement already satisfied: ubiquerg>=0.6.2 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from bedboss==0.1.0a2) (0.6.2) -Requirement already satisfied: pipestat>=0.4.0 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from bbconf==0.4.0a1->bedboss==0.1.0a2) (0.4.1) -Requirement already satisfied: sqlalchemy<2.0.0 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from bbconf==0.4.0a1->bedboss==0.1.0a2) (1.4.41) -Requirement already satisfied: tzdata>=2022.1 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from pandas>=1.5.3->bedboss==0.1.0a2) (2023.3) -Requirement already satisfied: pytz>=2020.1 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from pandas>=1.5.3->bedboss==0.1.0a2) (2022.7.1) -Requirement already satisfied: python-dateutil>=2.8.2 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from pandas>=1.5.3->bedboss==0.1.0a2) (2.8.2) -Requirement already satisfied: numpy>=1.21.0 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from pandas>=1.5.3->bedboss==0.1.0a2) (1.24.1) -Requirement already satisfied: attmap>=0.13.2 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from peppy>=0.35.7->bedboss==0.1.0a2) (0.13.2) -Requirement already satisfied: pyyaml in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from peppy>=0.35.7->bedboss==0.1.0a2) (6.0) -Requirement already satisfied: rich>=10.3.0 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from peppy>=0.35.7->bedboss==0.1.0a2) (13.3.0) -Requirement already satisfied: psutil in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from piper>=0.13.2->bedboss==0.1.0a2) (5.9.4) -Requirement already satisfied: tqdm in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from refgenconf>=0.12.2->bedboss==0.1.0a2) (4.64.1) -Requirement already satisfied: pyfaidx in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from refgenconf>=0.12.2->bedboss==0.1.0a2) (0.7.1) -Requirement already satisfied: future in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from refgenconf>=0.12.2->bedboss==0.1.0a2) (0.18.3) -Requirement already satisfied: jsonschema>=3.0.1 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from refgenconf>=0.12.2->bedboss==0.1.0a2) (4.17.3) -Requirement already satisfied: charset-normalizer<4,>=2 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from requests>=2.28.2->bedboss==0.1.0a2) (3.0.1) -Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from requests>=2.28.2->bedboss==0.1.0a2) (1.26.14) -Requirement already satisfied: idna<4,>=2.5 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from requests>=2.28.2->bedboss==0.1.0a2) (3.4) -Requirement already satisfied: certifi>=2017.4.17 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from requests>=2.28.2->bedboss==0.1.0a2) (2022.12.7) -Requirement already satisfied: oyaml in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from yacman>=0.8.4->bedboss==0.1.0a2) (1.0) -Requirement already satisfied: attrs>=17.4.0 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from jsonschema>=3.0.1->refgenconf>=0.12.2->bedboss==0.1.0a2) (22.2.0) -Requirement already satisfied: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from jsonschema>=3.0.1->refgenconf>=0.12.2->bedboss==0.1.0a2) (0.19.3) -Requirement already satisfied: psycopg2-binary in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from pipestat>=0.4.0->bbconf==0.4.0a1->bedboss==0.1.0a2) (2.9.5) -Requirement already satisfied: eido in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from pipestat>=0.4.0->bbconf==0.4.0a1->bedboss==0.1.0a2) (0.2.1) -Requirement already satisfied: sqlmodel>=0.0.8 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from pipestat>=0.4.0->bbconf==0.4.0a1->bedboss==0.1.0a2) (0.0.8) -Requirement already satisfied: pydantic<2.0.0,>=1.10.7 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from pipestat>=0.4.0->bbconf==0.4.0a1->bedboss==0.1.0a2) (1.10.12) -Requirement already satisfied: six>=1.5 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas>=1.5.3->bedboss==0.1.0a2) (1.16.0) -Requirement already satisfied: markdown-it-py<3.0.0,>=2.1.0 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from rich>=10.3.0->peppy>=0.35.7->bedboss==0.1.0a2) (2.1.0) -Requirement already satisfied: pygments<3.0.0,>=2.14.0 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from rich>=10.3.0->peppy>=0.35.7->bedboss==0.1.0a2) (2.14.0) -Requirement already satisfied: greenlet!=0.4.17 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from sqlalchemy<2.0.0->bbconf==0.4.0a1->bedboss==0.1.0a2) (2.0.1) -Requirement already satisfied: setuptools>=0.7 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from pyfaidx->refgenconf>=0.12.2->bedboss==0.1.0a2) (65.5.1) -Requirement already satisfied: mdurl~=0.1 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from markdown-it-py<3.0.0,>=2.1.0->rich>=10.3.0->peppy>=0.35.7->bedboss==0.1.0a2) (0.1.2) -Requirement already satisfied: typing-extensions>=4.2.0 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from pydantic<2.0.0,>=1.10.7->pipestat>=0.4.0->bbconf==0.4.0a1->bedboss==0.1.0a2) (4.4.0) -Requirement already satisfied: sqlalchemy2-stubs in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from sqlmodel>=0.0.8->pipestat>=0.4.0->bbconf==0.4.0a1->bedboss==0.1.0a2) (0.0.2a35) - -[notice] A new release of pip available: 22.3.1 -> 23.2.1 -[notice] To update, run: pip install --upgrade pip - -``` - - -```bash -bedboss all -``` - -```.output -usage: bedboss all [-h] --outfolder OUTFOLDER -s SAMPLE_NAME -f INPUT_FILE -t - INPUT_TYPE -g GENOME [-r RFG_CONFIG] - [--chrom-sizes CHROM_SIZES] [-n] [--standard-chrom] - [--check-qc] [--open-signal-matrix OPEN_SIGNAL_MATRIX] - [--ensdb ENSDB] --bedbase-config BEDBASE_CONFIG - [-y SAMPLE_YAML] [--no-db-commit] [--just-db-commit] -bedboss all: error: the following arguments are required: --outfolder, -s/--sample-name, -f/--input-file, -t/--input-type, -g/--genome, --bedbase-config - -``` - - - -Let's download sample file. Information about this file you can find here: https://pephub.databio.org/bedbase/GSE177859?tag=default - - -```bash -wget -O sample1.bed.gz ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM5379nnn/GSM5379062/suppl/GSM5379062_ENCFF834LRN_peaks_GRCh38.bed.gz -``` - -```.output ---2023-08-11 07:12:28-- ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM5379nnn/GSM5379062/suppl/GSM5379062_ENCFF834LRN_peaks_GRCh38.bed.gz - => ‘sample1.bed.gz’ -Resolving ftp.ncbi.nlm.nih.gov (ftp.ncbi.nlm.nih.gov)... 130.14.250.12, 130.14.250.10, 2607:f220:41f:250::229, ... -Connecting to ftp.ncbi.nlm.nih.gov (ftp.ncbi.nlm.nih.gov)|130.14.250.12|:21... connected. -Logging in as anonymous ... Logged in! -==> SYST ... done. ==> PWD ... done. -==> TYPE I ... done. ==> CWD (1) /geo/samples/GSM5379nnn/GSM5379062/suppl ... done. -==> SIZE GSM5379062_ENCFF834LRN_peaks_GRCh38.bed.gz ... 5470278 -==> PASV ... done. ==> RETR GSM5379062_ENCFF834LRN_peaks_GRCh38.bed.gz ... done. -Length: 5470278 (5.2M) (unauthoritative) - -GSM5379062_ENCFF834 100%[===================>] 9.76M 1008KB/s in 24s - -2023-08-11 07:12:55 (419 KB/s) - ‘sample1.bed.gz’ saved [10231006] - - -``` - - -```bash - -``` - -let's create bedbase config file: - - -```bash -cat bedbase_config_test.yaml -``` - -```.output -cat: bedbase_config_test.yaml: No such file or directory - -``` - - - -Now let's run bedboss: - - -```bash -bedboss all --sample-name tutorial_f1 \ ---input-file sample1.bed.gz \ ---input-type bed \ ---outfolder ./tutorial \ ---genome GRCh38 \ ---bedbase-config bedbase_config_test.yaml -``` - -```.output -Warning: You're running an interactive python session. This works, but pypiper cannot tee the output, so results are only logged to screen. -### Pipeline run code and environment: - -* Command: `/home/bnt4me/virginia/venv/jupyter/bin/bedboss all --sample-name tutorial_f1 --input-file sample1.bed.gz --input-type bed --outfolder ./tutorial --genome GRCh38 --bedbase-config bedbase_config_test.yaml` -* Compute host: bnt4me-Precision-5560 -* Working dir: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/all_tutorial -* Outfolder: ./tutorial/ -* Pipeline started at: (02-27 12:47:26) elapsed: 0.0 _TIME_ - -### Version log: - -* Python version: 3.10.6 -* Pypiper dir: `/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper` -* Pypiper version: 0.12.3 -* Pipeline dir: `/home/bnt4me/virginia/venv/jupyter/bin` -* Pipeline version: None - -### Arguments passed to pipeline: - - ----------------------------------------- - -Unused arguments: {'command': 'all'} -Getting Open Signal Matrix file path... -output_bed = ./tutorial/bed_files/sample1.bed.gz -output_bigbed = ./tutorial/bigbed_files -Output directory does not exist. Creating: ./tutorial/bed_files -BigBed directory does not exist. Creating: ./tutorial/bigbed_files -bedmaker logs directory doesn't exist. Creating one... -Got input type: bed -Converting sample1.bed.gz to BED format. -Target to produce: `./tutorial/bed_files/sample1.bed.gz` - -> `cp sample1.bed.gz ./tutorial/bed_files/sample1.bed.gz` (434320) -
-
-Command completed. Elapsed time: 0:00:00. Running peak memory: 0GB. - PID: 434320; Command: cp; Return code: 0; Memory used: 0.0GB - -Running bedqc... -Unused arguments: {} -Target to produce: `./tutorial/bed_files/bedmaker_logs/tutorial_f1/rigumni8` - -> `zcat ./tutorial/bed_files/sample1.bed.gz > ./tutorial/bed_files/bedmaker_logs/tutorial_f1/rigumni8` (434322) -
-
-Command completed. Elapsed time: 0:00:00. Running peak memory: 0.003GB. - PID: 434322; Command: zcat; Return code: 0; Memory used: 0.003GB - -Targetless command, running... - -> `bash /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedqc/est_line.sh ./tutorial/bed_files/bedmaker_logs/tutorial_f1/rigumni8 ` (434324) -
-236000
-Command completed. Elapsed time: 0:00:00. Running peak memory: 0.003GB. - PID: 434324; Command: bash; Return code: 0; Memory used: 0.0GB - -File (./tutorial/bed_files/bedmaker_logs/tutorial_f1/rigumni8) has passed Quality Control! -Generating bigBed files for: sample1.bed.gz -Determining path to chrom.sizes asset via Refgenie. -Creating refgenie genome config file... -Reading refgenie genome configuration file from file: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/all_tutorial/genome_config.yaml -/home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/all_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes -Determined path to chrom.sizes asset: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/all_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes -Target to produce: `./tutorial/bigbed_files/vzxyqexz` - -> `zcat ./tutorial/bed_files/sample1.bed.gz | sort -k1,1 -k2,2n > ./tutorial/bigbed_files/vzxyqexz` (434335,434336) -
-
-Command completed. Elapsed time: 0:00:00. Running peak memory: 0.007GB. - PID: 434335; Command: zcat; Return code: 0; Memory used: 0.002GB - PID: 434336; Command: sort; Return code: 0; Memory used: 0.007GB - -Running: /home/bnt4me/virginia/repos/bedbase_all/bedboss/bedToBigBed -type=bed6+4 ./tutorial/bigbed_files/vzxyqexz /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/all_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes ./tutorial/bigbed_files/sample1.bigBed -Target to produce: `./tutorial/bigbed_files/sample1.bigBed` - -> `/home/bnt4me/virginia/repos/bedbase_all/bedboss/bedToBigBed -type=bed6+4 ./tutorial/bigbed_files/vzxyqexz /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/all_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes ./tutorial/bigbed_files/sample1.bigBed` (434338) -
-pass1 - making usageList (25 chroms): 27 millis
-pass2 - checking and writing primary data (222016 records, 10 fields): 413 millis
-
-Command completed. Elapsed time: 0:00:01. Running peak memory: 0.007GB. - PID: 434338; Command: /home/bnt4me/virginia/repos/bedbase_all/bedboss/bedToBigBed; Return code: 0; Memory used: 0.004GB - -Target to produce: `/home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/all_tutorial/tutorial/output/bedstat_output/eb617f28e129c401be94069e0fdedbb5/sample1.json` - -> `Rscript /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedstat/tools/regionstat.R --bedfilePath=./tutorial/bed_files/sample1.bed.gz --fileId=sample1 --openSignalMatrix=./openSignalMatrix/openSignalMatrix_hg38_percentile99_01_quantNormalized_round4d.txt.gz --outputFolder=/home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/all_tutorial/tutorial/output/bedstat_output/eb617f28e129c401be94069e0fdedbb5 --genome=hg38 --ensdb=None --digest=eb617f28e129c401be94069e0fdedbb5` (434343) -
-Loading required package: IRanges
-Loading required package: BiocGenerics
-
-Attaching package: ‘BiocGenerics’
-
-The following objects are masked from ‘package:stats’:
-
-    IQR, mad, sd, var, xtabs
-
-The following objects are masked from ‘package:base’:
-
-    anyDuplicated, append, as.data.frame, basename, cbind, colnames,
-    dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
-    grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
-    order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
-    rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
-    union, unique, unsplit, which.max, which.min
-
-Loading required package: S4Vectors
-Loading required package: stats4
-
-Attaching package: ‘S4Vectors’
-
-The following objects are masked from ‘package:base’:
-
-    expand.grid, I, unname
-
-Loading required package: GenomicRanges
-Loading required package: GenomeInfoDb
-[?25hsnapshotDate(): 2021-10-19
-[?25h[?25hLoading required package: GenomicFeatures
-Loading required package: AnnotationDbi
-Loading required package: Biobase
-Welcome to Bioconductor
-
-    Vignettes contain introductory material; view with
-    'browseVignettes()'. To cite Bioconductor, see
-    'citation("Biobase")', and for packages 'citation("pkgname")'.
-
-Loading required package: AnnotationFilter
-
-Attaching package: 'ensembldb'
-
-The following object is masked from 'package:stats':
-
-    filter
-
-[?25h[?25h[?25hLoading required package: R.oo
-Loading required package: R.methodsS3
-R.methodsS3 v1.8.2 (2022-06-13 22:00:14 UTC) successfully loaded. See ?R.methodsS3 for help.
-R.oo v1.25.0 (2022-06-12 02:20:02 UTC) successfully loaded. See ?R.oo for help.
-
-Attaching package: 'R.oo'
-
-The following object is masked from 'package:R.methodsS3':
-
-    throw
-
-The following object is masked from 'package:GenomicRanges':
-
-    trim
-
-The following object is masked from 'package:IRanges':
-
-    trim
-
-The following objects are masked from 'package:methods':
-
-    getClasses, getMethods
-
-The following objects are masked from 'package:base':
-
-    attach, detach, load, save
-
-R.utils v2.12.2 (2022-11-11 22:00:03 UTC) successfully loaded. See ?R.utils for help.
-
-Attaching package: 'R.utils'
-
-The following object is masked from 'package:utils':
-
-    timestamp
-
-The following objects are masked from 'package:base':
-
-    cat, commandArgs, getOption, isOpen, nullfile, parse, warnings
-
-[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25hsee ?GenomicDistributionsData and browseVignettes('GenomicDistributionsData') for documentation
-loading from cache
-[1] "Plotting: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/all_tutorial/tutorial/output/bedstat_output/eb617f28e129c401be94069e0fdedbb5/sample1_tssdist"
-Scale for x is already present.
-Adding another scale for x, which will replace the existing scale.
-[1] "Writing plot json: output/bedstat_output/eb617f28e129c401be94069e0fdedbb5/sample1_tssdist"
-Successfully calculated and plot TSS distance.
-[1] "Plotting: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/all_tutorial/tutorial/output/bedstat_output/eb617f28e129c401be94069e0fdedbb5/sample1_chrombins"
-see ?GenomicDistributionsData and browseVignettes('GenomicDistributionsData') for documentation
-loading from cache
-[1] "Writing plot json: output/bedstat_output/eb617f28e129c401be94069e0fdedbb5/sample1_chrombins"
-Successfully calculated and plot chromosomes region distribution.
-see ?GenomicDistributionsData and browseVignettes('GenomicDistributionsData') for documentation
-loading from cache
-Calculating overlaps...
-[1] "Plotting: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/all_tutorial/tutorial/output/bedstat_output/eb617f28e129c401be94069e0fdedbb5/sample1_paritions"
-[1] "Writing plot json: output/bedstat_output/eb617f28e129c401be94069e0fdedbb5/sample1_paritions"
-Successfully calculated and plot regions distribution over genomic partitions.
-[1] "Plotting: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/all_tutorial/tutorial/output/bedstat_output/eb617f28e129c401be94069e0fdedbb5/sample1_expected_partitions"
-see ?GenomicDistributionsData and browseVignettes('GenomicDistributionsData') for documentation
-loading from cache
-see ?GenomicDistributionsData and browseVignettes('GenomicDistributionsData') for documentation
-loading from cache
-[1] "Writing plot json: output/bedstat_output/eb617f28e129c401be94069e0fdedbb5/sample1_expected_partitions"
-Successfully calculated and plot expected distribution over genomic partitions.
-[1] "Plotting: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/all_tutorial/tutorial/output/bedstat_output/eb617f28e129c401be94069e0fdedbb5/sample1_cumulative_partitions"
-see ?GenomicDistributionsData and browseVignettes('GenomicDistributionsData') for documentation
-loading from cache
-[1] "Writing plot json: output/bedstat_output/eb617f28e129c401be94069e0fdedbb5/sample1_cumulative_partitions"
-Successfully calculated and plot cumulative distribution over genomic partitions.
-[1] "Plotting: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/all_tutorial/tutorial/output/bedstat_output/eb617f28e129c401be94069e0fdedbb5/sample1_widths_histogram"
-[1] "Writing plot json: output/bedstat_output/eb617f28e129c401be94069e0fdedbb5/sample1_widths_histogram"
-Successfully calculated and plot quantile-trimmed histogram of widths.
-[1] "Plotting: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/all_tutorial/tutorial/output/bedstat_output/eb617f28e129c401be94069e0fdedbb5/sample1_neighbor_distances"
-[1] "Writing plot json: output/bedstat_output/eb617f28e129c401be94069e0fdedbb5/sample1_neighbor_distances"
-Successfully calculated and plot distance between neighbor regions.
-[1] "Plotting: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/all_tutorial/tutorial/output/bedstat_output/eb617f28e129c401be94069e0fdedbb5/sample1_open_chromatin"
-[1] "Writing plot json: output/bedstat_output/eb617f28e129c401be94069e0fdedbb5/sample1_open_chromatin"
-Successfully calculated and plot cell specific enrichment for open chromatin.
-[?25h[?25h
-Command completed. Elapsed time: 0:00:49. Running peak memory: 3.843GB. - PID: 434343; Command: Rscript; Return code: 0; Memory used: 3.843GB - -These results exist for 'eb617f28e129c401be94069e0fdedbb5': name, regions_no, mean_region_width, md5sum, bedfile, genome, bigbedfile, widths_histogram, neighbor_distances -Starting cleanup: 2 files; 0 conditional files for cleanup - -Cleaning up flagged intermediate files. . . - -### Pipeline completed. Epilogue -* Elapsed time (this run): 0:00:50 -* Total elapsed time (all runs): 0:00:50 -* Peak memory (this run): 3.8432 GB -* Pipeline completed time: 2023-02-27 12:48:16 - -``` - -Now let's check if all files where saved - - -```bash -ls tutorial/bed_files -``` - -```.output -bedmaker_logs sample1.bed.gz - -``` - - -```bash -ls tutorial/bigbed_files -``` - -```.output -sample1.bigBed - -``` - - -```bash -ls tutorial/output/bedstat_output/eb617f28e129c401be94069e0fdedbb5/ -``` - -```.output -sample1_chrombins.pdf sample1_open_chromatin.pdf -sample1_chrombins.png sample1_open_chromatin.png -sample1_cumulative_partitions.pdf sample1_paritions.pdf -sample1_cumulative_partitions.png sample1_paritions.png -sample1_expected_partitions.pdf sample1_plots.json -sample1_expected_partitions.png sample1_tssdist.pdf -sample1.json sample1_tssdist.png -sample1_neighbor_distances.pdf sample1_widths_histogram.pdf -sample1_neighbor_distances.png sample1_widths_histogram.png - -``` - -Everything was ran correctly:) diff --git a/docs/bedboss/how-to-install-requirements.md b/docs/bedboss/how-to-install-requirements.md index 24ac979..f6a5ce5 100644 --- a/docs/bedboss/how-to-install-requirements.md +++ b/docs/bedboss/how-to-install-requirements.md @@ -4,14 +4,12 @@ 1. Install R: https://cran.r-project.org/bin/linux/ubuntu/fullREADME.html 2. Download this script: [installRdeps.R](https://github.com/databio/bedboss/blob/dev/scripts/installRdeps.R) 3. Install dependencies by running this command in your terminal: ```Rscript installRdeps.R``` -4. Run `bedboss requirements-check` to check if everything was installed correctly. +4. Run `bedboss check-requirements` to check if everything was installed correctly. # How to install regionset conversion tools: -- bedToBigBed: http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/bedToBigBed -- bigBedToBed: http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/bigBedToBed -- bigWigToBedGraph: http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/bigWigToBedGraph -- wigToBigWig: http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/wigToBigWig - - +- **bedToBigBed**: http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/bedToBigBed +- **bigBedToBed**: http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/bigBedToBed +- **bigWigToBedGraph**: http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/bigWigToBedGraph +- **wigToBigWig**: http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/wigToBigWig diff --git a/docs/bedboss/notebooks/bedbase-tutorial.ipynb b/docs/bedboss/notebooks/bedbase-tutorial.ipynb deleted file mode 100644 index ac7cf8d..0000000 --- a/docs/bedboss/notebooks/bedbase-tutorial.ipynb +++ /dev/null @@ -1,3224 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# BEDBASE workflow tutorial\n", - "\n", - "This demo demonstrates how to process, analyze, visualize, and serve BED files. The process has 5 steps: First, the [bedmaker](https://github.com/databio/bedmaker) pipeline converts different region data files (bed, bedGraph, bigBed, bigWig, and wig) into BED format and generates bigBed format for each file for visualization in Genome Browser. An optional step, the [bedqc](https://github.com/databio/bedqc) pipline, flags the BED files that you might not want to include in the downstream analysis. Second, individual BED files are analyzed using the [bedstat](https://github.com/databio/bedstat) pipeline. Third, BED files are grouped and then analyzed as groups using the [bedbuncher](https://github.com/databio/bedbuncher) pipeline. Fourth, [bedembed](https://github.com/databio/bedembed) uses the StarSpace method to embed the bed files and the meta data, and the distances between the file labels and trained search terms will be calculated with cosine distance. Finally, the BED files, along with statistics, plots, and grouping information, is served via a web interface and RESTful API using the [bedhost](https://github.com/databio/bedhost) package.\n", - "\n", - "**Glossary of terms:**\n", - "\n", - "- *bedfile*: a tab-delimited file with one genomic region per line. Each genomic region is decribed by 3 required columns: chrom, start and end.\n", - "- *bedset*: a collection of BED files grouped by with a shared biological, experimental, or logical criterion.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "toc": true - }, - "source": [ - "

Table of Contents

\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1. Preparation \n", - "\n", - "First, we will create a tutorial directory where we'll store the bedbase pipelines and files to be processed. We'll also need to create an environment variable that points to the tutorial directory (we'll need this variable later). " - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "bash: cd: bedbase_tutorial: No such file or directory\n" - ] - } - ], - "source": [ - "# mkdir bedbase_tutorial\n", - "cd bedbase_tutorial\n", - "export BEDBASE_DATA_PATH_HOST=`pwd`\n", - "export CODE=`pwd`" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Download some example BED files:" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--2023-08-11 08:10:02-- http://big.databio.org/example_data/bedbase_tutorial/bed_files.tar.gz\n", - "Resolving big.databio.org (big.databio.org)... 128.143.223.179\n", - "Connecting to big.databio.org (big.databio.org)|128.143.223.179|:80... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 44549692 (42M) [application/octet-stream]\n", - "Saving to: ‘bed_files.tar.gz’\n", - "\n", - "bed_files.tar.gz 100%[===================>] 42.49M 303KB/s in 95s \n", - "\n", - "2023-08-11 08:11:37 (458 KB/s) - ‘bed_files.tar.gz’ saved [44549692/44549692]\n", - "\n" - ] - } - ], - "source": [ - "wget http://big.databio.org/example_data/bedbase_tutorial/bed_files.tar.gz " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The downloaded files are compressed so we'll need to untar them:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "bed_files/\n", - "bed_files/GSE105587_ENCFF018NNF_conservative_idr_thresholded_peaks_GRCh38.bed.gz\n", - "bed_files/GSM2423312_ENCFF155HVK_peaks_GRCh38.bed.gz\n", - "bed_files/GSE105977_ENCFF617QGK_optimal_idr_thresholded_peaks_GRCh38.bed.gz\n", - "bed_files/GSE91663_ENCFF316ASR_peaks_GRCh38.bed.gz\n", - "bed_files/GSM2423313_ENCFF722AOG_peaks_GRCh38.bed.gz\n", - "bed_files/GSM2827349_ENCFF196DNQ_peaks_GRCh38.bed.gz\n", - "bed_files/GSE91663_ENCFF553KIK_optimal_idr_thresholded_peaks_GRCh38.bed.gz\n", - "bed_files/GSE91663_ENCFF319TPR_conservative_idr_thresholded_peaks_GRCh38.bed.gz\n", - "bed_files/GSE105977_ENCFF937CGY_peaks_GRCh38.bed.gz\n", - "bed_files/GSM2827350_ENCFF928JXU_peaks_GRCh38.bed.gz\n", - "bed_files/GSE105977_ENCFF793SZW_conservative_idr_thresholded_peaks_GRCh38.bed.gz\n" - ] - } - ], - "source": [ - "tar -zxvf bed_files.tar.gz && mv bed_files files" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "rm bed_files.tar.gz" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Additionally, we'll download a matrix we need to provide if we wish to plot the tissue specificity of our set of genomic ranges:" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Lastly, we'll download the core pipelines and tools needed to complete this tutorial: `bedmaker`, `bedqc`, `bedstat`, `bedbuncher` , `bedhost`, and `bedhost-ui`" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting looper==1.5.1\n", - " Downloading looper-1.5.1-py3-none-any.whl (121 kB)\n", - "\u001b[2K \u001b[38;5;107m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m121.8/121.8 KB\u001b[0m \u001b[31m734.8 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m kB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\n", - "\u001b[?25hCollecting pipestat>=0.5.1\n", - " Downloading pipestat-0.5.1-py3-none-any.whl (61 kB)\n", - "\u001b[2K \u001b[38;5;107m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.2/61.2 KB\u001b[0m \u001b[31m724.9 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m kB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: pandas>=2.0.2 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from looper==1.5.1) (2.0.3)\n", - "Requirement already satisfied: logmuse>=0.2.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from looper==1.5.1) (0.2.7)\n", - "Requirement already satisfied: ubiquerg>=0.5.2 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from looper==1.5.1) (0.6.2)\n", - "Requirement already satisfied: pyyaml>=3.12 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from looper==1.5.1) (6.0)\n", - "Requirement already satisfied: eido>=0.2.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from looper==1.5.1) (0.2.1)\n", - "Requirement already satisfied: peppy>=0.35.4 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from looper==1.5.1) (0.35.7)\n", - "Requirement already satisfied: yacman>=0.9 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from looper==1.5.1) (0.9.1)\n", - "Requirement already satisfied: colorama>=0.3.9 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from looper==1.5.1) (0.4.6)\n", - "Requirement already satisfied: divvy>=0.5.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from looper==1.5.1) (0.6.0)\n", - "Requirement already satisfied: rich>=9.10.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from looper==1.5.1) (13.3.1)\n", - "Requirement already satisfied: jinja2 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from looper==1.5.1) (3.1.2)\n", - "Requirement already satisfied: pephubclient in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from looper==1.5.1) (0.1.0)\n", - "Requirement already satisfied: attmap>=0.12.9 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from divvy>=0.5.0->looper==1.5.1) (0.13.2)\n", - "Requirement already satisfied: jsonschema>=3.0.1 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from eido>=0.2.0->looper==1.5.1) (4.17.3)\n", - "Requirement already satisfied: pytz>=2020.1 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pandas>=2.0.2->looper==1.5.1) (2022.7.1)\n", - "Requirement already satisfied: numpy>=1.21.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pandas>=2.0.2->looper==1.5.1) (1.22.0)\n", - "Requirement already satisfied: tzdata>=2022.1 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pandas>=2.0.2->looper==1.5.1) (2023.3)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pandas>=2.0.2->looper==1.5.1) (2.8.2)\n", - "Requirement already satisfied: oyaml in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pipestat>=0.5.1->looper==1.5.1) (1.0)\n", - "Requirement already satisfied: psycopg2-binary in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pipestat>=0.5.1->looper==1.5.1) (2.9.5)\n", - "Requirement already satisfied: pydantic<2.0.0,>=1.10.7 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pipestat>=0.5.1->looper==1.5.1) (1.10.7)\n", - "Requirement already satisfied: sqlmodel>=0.0.8 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pipestat>=0.5.1->looper==1.5.1) (0.0.8)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.14.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from rich>=9.10.0->looper==1.5.1) (2.14.0)\n", - "Requirement already satisfied: markdown-it-py<3.0.0,>=2.1.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from rich>=9.10.0->looper==1.5.1) (2.1.0)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from jinja2->looper==1.5.1) (2.1.2)\n", - "Requirement already satisfied: requests>=2.28.2 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pephubclient->looper==1.5.1) (2.28.2)\n", - "Requirement already satisfied: typer>=0.7.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pephubclient->looper==1.5.1) (0.8.0)\n", - "Requirement already satisfied: attrs>=17.4.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from jsonschema>=3.0.1->eido>=0.2.0->looper==1.5.1) (22.2.0)\n", - "Requirement already satisfied: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from jsonschema>=3.0.1->eido>=0.2.0->looper==1.5.1) (0.19.3)\n", - "Requirement already satisfied: mdurl~=0.1 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from markdown-it-py<3.0.0,>=2.1.0->rich>=9.10.0->looper==1.5.1) (0.1.2)\n", - "Requirement already satisfied: typing-extensions>=4.2.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pydantic<2.0.0,>=1.10.7->pipestat>=0.5.1->looper==1.5.1) (4.4.0)\n", - "Requirement already satisfied: six>=1.5 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas>=2.0.2->looper==1.5.1) (1.16.0)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from requests>=2.28.2->pephubclient->looper==1.5.1) (3.0.1)\n", - "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from requests>=2.28.2->pephubclient->looper==1.5.1) (1.26.14)\n", - "Requirement already satisfied: idna<4,>=2.5 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from requests>=2.28.2->pephubclient->looper==1.5.1) (3.4)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from requests>=2.28.2->pephubclient->looper==1.5.1) (2022.12.7)\n", - "Requirement already satisfied: SQLAlchemy<=1.4.41,>=1.4.17 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from sqlmodel>=0.0.8->pipestat>=0.5.1->looper==1.5.1) (1.4.41)\n", - "Requirement already satisfied: sqlalchemy2-stubs in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from sqlmodel>=0.0.8->pipestat>=0.5.1->looper==1.5.1) (0.0.2a35)\n", - "Requirement already satisfied: click<9.0.0,>=7.1.1 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from typer>=0.7.0->pephubclient->looper==1.5.1) (8.1.3)\n", - "Requirement already satisfied: greenlet!=0.4.17 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from SQLAlchemy<=1.4.41,>=1.4.17->sqlmodel>=0.0.8->pipestat>=0.5.1->looper==1.5.1) (2.0.2)\n", - "Installing collected packages: pipestat, looper\n", - " Attempting uninstall: pipestat\n", - " Found existing installation: pipestat 0.5.0\n", - " Uninstalling pipestat-0.5.0:\n", - " Successfully uninstalled pipestat-0.5.0\n", - " Attempting uninstall: looper\n", - " Found existing installation: looper 1.5.0\n", - " Uninstalling looper-1.5.0:\n", - " Successfully uninstalled looper-1.5.0\n", - "Successfully installed looper-1.5.1 pipestat-0.5.1\n", - "Requirement already satisfied: refgenie in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (0.12.1)\n", - "Requirement already satisfied: pyfaidx>=0.5.5.2 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from refgenie) (0.7.1)\n", - "Requirement already satisfied: refgenconf>=0.12.2 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from refgenie) (0.12.2)\n", - "Requirement already satisfied: piper>=0.12.1 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from refgenie) (0.13.2)\n", - "Requirement already satisfied: logmuse>=0.2.6 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from refgenie) (0.2.7)\n", - "Requirement already satisfied: yacman>=0.8.3 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from refgenie) (0.9.1)\n", - "Requirement already satisfied: ubiquerg>=0.4.5 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from piper>=0.12.1->refgenie) (0.6.2)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: pipestat>=0.4.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from piper>=0.12.1->refgenie) (0.5.1)\n", - "Requirement already satisfied: pandas in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from piper>=0.12.1->refgenie) (2.0.3)\n", - "Requirement already satisfied: attmap>=0.12.5 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from piper>=0.12.1->refgenie) (0.13.2)\n", - "Requirement already satisfied: psutil in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from piper>=0.12.1->refgenie) (5.9.4)\n", - "Requirement already satisfied: setuptools>=0.7 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pyfaidx>=0.5.5.2->refgenie) (59.6.0)\n", - "Requirement already satisfied: six in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pyfaidx>=0.5.5.2->refgenie) (1.16.0)\n", - "Requirement already satisfied: future in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from refgenconf>=0.12.2->refgenie) (0.18.3)\n", - "Requirement already satisfied: jsonschema>=3.0.1 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from refgenconf>=0.12.2->refgenie) (4.17.3)\n", - "Requirement already satisfied: pyyaml in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from refgenconf>=0.12.2->refgenie) (6.0)\n", - "Requirement already satisfied: tqdm in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from refgenconf>=0.12.2->refgenie) (4.64.1)\n", - "Requirement already satisfied: rich>=9.0.1 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from refgenconf>=0.12.2->refgenie) (13.3.1)\n", - "Requirement already satisfied: requests in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from refgenconf>=0.12.2->refgenie) (2.28.2)\n", - "Requirement already satisfied: oyaml in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from yacman>=0.8.3->refgenie) (1.0)\n", - "Requirement already satisfied: attrs>=17.4.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from jsonschema>=3.0.1->refgenconf>=0.12.2->refgenie) (22.2.0)\n", - "Requirement already satisfied: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from jsonschema>=3.0.1->refgenconf>=0.12.2->refgenie) (0.19.3)\n", - "Requirement already satisfied: pydantic<2.0.0,>=1.10.7 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pipestat>=0.4.0->piper>=0.12.1->refgenie) (1.10.7)\n", - "Requirement already satisfied: sqlmodel>=0.0.8 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pipestat>=0.4.0->piper>=0.12.1->refgenie) (0.0.8)\n", - "Requirement already satisfied: eido in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pipestat>=0.4.0->piper>=0.12.1->refgenie) (0.2.1)\n", - "Requirement already satisfied: psycopg2-binary in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pipestat>=0.4.0->piper>=0.12.1->refgenie) (2.9.5)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.14.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from rich>=9.0.1->refgenconf>=0.12.2->refgenie) (2.14.0)\n", - "Requirement already satisfied: markdown-it-py<3.0.0,>=2.1.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from rich>=9.0.1->refgenconf>=0.12.2->refgenie) (2.1.0)\n", - "Requirement already satisfied: pytz>=2020.1 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pandas->piper>=0.12.1->refgenie) (2022.7.1)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pandas->piper>=0.12.1->refgenie) (2.8.2)\n", - "Requirement already satisfied: tzdata>=2022.1 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pandas->piper>=0.12.1->refgenie) (2023.3)\n", - "Requirement already satisfied: numpy>=1.21.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pandas->piper>=0.12.1->refgenie) (1.22.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from requests->refgenconf>=0.12.2->refgenie) (2022.12.7)\n", - "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from requests->refgenconf>=0.12.2->refgenie) (1.26.14)\n", - "Requirement already satisfied: idna<4,>=2.5 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from requests->refgenconf>=0.12.2->refgenie) (3.4)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from requests->refgenconf>=0.12.2->refgenie) (3.0.1)\n", - "Requirement already satisfied: mdurl~=0.1 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from markdown-it-py<3.0.0,>=2.1.0->rich>=9.0.1->refgenconf>=0.12.2->refgenie) (0.1.2)\n", - "Requirement already satisfied: typing-extensions>=4.2.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pydantic<2.0.0,>=1.10.7->pipestat>=0.4.0->piper>=0.12.1->refgenie) (4.4.0)\n", - "Requirement already satisfied: sqlalchemy2-stubs in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from sqlmodel>=0.0.8->pipestat>=0.4.0->piper>=0.12.1->refgenie) (0.0.2a35)\n", - "Requirement already satisfied: SQLAlchemy<=1.4.41,>=1.4.17 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from sqlmodel>=0.0.8->pipestat>=0.4.0->piper>=0.12.1->refgenie) (1.4.41)\n", - "Requirement already satisfied: peppy>=0.35.5 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from eido->pipestat>=0.4.0->piper>=0.12.1->refgenie) (0.35.7)\n", - "Requirement already satisfied: greenlet!=0.4.17 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from SQLAlchemy<=1.4.41,>=1.4.17->sqlmodel>=0.0.8->pipestat>=0.4.0->piper>=0.12.1->refgenie) (2.0.2)\n" - ] - } - ], - "source": [ - "pip install looper==1.5.1\n", - "pip install refgenie" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Cloning into 'bedbase'...\n", - "remote: Enumerating objects: 664, done.\u001b[K\n", - "remote: Counting objects: 100% (317/317), done.\u001b[K\n", - "remote: Compressing objects: 100% (159/159), done.\u001b[K\n", - "remote: Total 664 (delta 188), reused 250 (delta 148), pack-reused 347\u001b[K\n", - "Receiving objects: 100% (664/664), 695.03 KiB | 386.00 KiB/s, done.\n", - "Resolving deltas: 100% (337/337), done.\n", - "Collecting bedboss==0.1.0a2\n", - " Downloading bedboss-0.1.0a2-py3-none-any.whl (24 kB)\n", - "Requirement already satisfied: requests>=2.28.2 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from bedboss==0.1.0a2) (2.28.2)\n", - "Collecting bbconf==0.4.0a1\n", - " Using cached bbconf-0.4.0a1-py3-none-any.whl (11 kB)\n", - "Requirement already satisfied: refgenconf>=0.12.2 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from bedboss==0.1.0a2) (0.12.2)\n", - "Requirement already satisfied: yacman>=0.8.4 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from bedboss==0.1.0a2) (0.9.1)\n", - "Collecting piper>=0.13.2\n", - " Using cached piper-0.13.2-py3-none-any.whl (72 kB)\n", - "Requirement already satisfied: peppy>=0.35.7 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from bedboss==0.1.0a2) (0.35.7)\n", - "Requirement already satisfied: pandas>=1.5.3 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from bedboss==0.1.0a2) (2.0.0)\n", - "Requirement already satisfied: ubiquerg>=0.6.2 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from bedboss==0.1.0a2) (0.6.2)\n", - "Requirement already satisfied: logmuse>=0.2.7 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from bedboss==0.1.0a2) (0.2.7)\n", - "Requirement already satisfied: sqlalchemy<2.0.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from bbconf==0.4.0a1->bedboss==0.1.0a2) (1.4.41)\n", - "Requirement already satisfied: pipestat>=0.4.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from bbconf==0.4.0a1->bedboss==0.1.0a2) (0.4.1)\n", - "Requirement already satisfied: pytz>=2020.1 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pandas>=1.5.3->bedboss==0.1.0a2) (2022.7.1)\n", - "Requirement already satisfied: tzdata>=2022.1 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pandas>=1.5.3->bedboss==0.1.0a2) (2023.3)\n", - "Requirement already satisfied: numpy>=1.21.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pandas>=1.5.3->bedboss==0.1.0a2) (1.22.0)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pandas>=1.5.3->bedboss==0.1.0a2) (2.8.2)\n", - "Requirement already satisfied: pyyaml in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from peppy>=0.35.7->bedboss==0.1.0a2) (6.0)\n", - "Requirement already satisfied: attmap>=0.13.2 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from peppy>=0.35.7->bedboss==0.1.0a2) (0.13.2)\n", - "Requirement already satisfied: rich>=10.3.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from peppy>=0.35.7->bedboss==0.1.0a2) (13.3.1)\n", - "Requirement already satisfied: psutil in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from piper>=0.13.2->bedboss==0.1.0a2) (5.9.4)\n", - "Requirement already satisfied: future in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from refgenconf>=0.12.2->bedboss==0.1.0a2) (0.18.3)\n", - "Requirement already satisfied: pyfaidx in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from refgenconf>=0.12.2->bedboss==0.1.0a2) (0.7.1)\n", - "Requirement already satisfied: jsonschema>=3.0.1 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from refgenconf>=0.12.2->bedboss==0.1.0a2) (4.17.3)\n", - "Requirement already satisfied: tqdm in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from refgenconf>=0.12.2->bedboss==0.1.0a2) (4.64.1)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from requests>=2.28.2->bedboss==0.1.0a2) (3.0.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from requests>=2.28.2->bedboss==0.1.0a2) (3.4)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from requests>=2.28.2->bedboss==0.1.0a2) (2022.12.7)\n", - "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from requests>=2.28.2->bedboss==0.1.0a2) (1.26.14)\n", - "Requirement already satisfied: oyaml in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from yacman>=0.8.4->bedboss==0.1.0a2) (1.0)\n", - "Requirement already satisfied: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from jsonschema>=3.0.1->refgenconf>=0.12.2->bedboss==0.1.0a2) (0.19.3)\n", - "Requirement already satisfied: attrs>=17.4.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from jsonschema>=3.0.1->refgenconf>=0.12.2->bedboss==0.1.0a2) (22.2.0)\n", - "Requirement already satisfied: eido in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pipestat>=0.4.0->bbconf==0.4.0a1->bedboss==0.1.0a2) (0.2.1)\n", - "Requirement already satisfied: sqlmodel>=0.0.8 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pipestat>=0.4.0->bbconf==0.4.0a1->bedboss==0.1.0a2) (0.0.8)\n", - "Requirement already satisfied: psycopg2-binary in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pipestat>=0.4.0->bbconf==0.4.0a1->bedboss==0.1.0a2) (2.9.5)\n", - "Requirement already satisfied: pydantic<2.0.0,>=1.10.7 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pipestat>=0.4.0->bbconf==0.4.0a1->bedboss==0.1.0a2) (1.10.7)\n", - "Requirement already satisfied: six>=1.5 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas>=1.5.3->bedboss==0.1.0a2) (1.16.0)\n", - "Requirement already satisfied: markdown-it-py<3.0.0,>=2.1.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from rich>=10.3.0->peppy>=0.35.7->bedboss==0.1.0a2) (2.1.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.14.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from rich>=10.3.0->peppy>=0.35.7->bedboss==0.1.0a2) (2.14.0)\n", - "Requirement already satisfied: greenlet!=0.4.17 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from sqlalchemy<2.0.0->bbconf==0.4.0a1->bedboss==0.1.0a2) (2.0.2)\n", - "Requirement already satisfied: setuptools>=0.7 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pyfaidx->refgenconf>=0.12.2->bedboss==0.1.0a2) (59.6.0)\n", - "Requirement already satisfied: mdurl~=0.1 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from markdown-it-py<3.0.0,>=2.1.0->rich>=10.3.0->peppy>=0.35.7->bedboss==0.1.0a2) (0.1.2)\n", - "Requirement already satisfied: typing-extensions>=4.2.0 in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from pydantic<2.0.0,>=1.10.7->pipestat>=0.4.0->bbconf==0.4.0a1->bedboss==0.1.0a2) (4.4.0)\n", - "Requirement already satisfied: sqlalchemy2-stubs in /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages (from sqlmodel>=0.0.8->pipestat>=0.4.0->bbconf==0.4.0a1->bedboss==0.1.0a2) (0.0.2a35)\n", - "Installing collected packages: piper, bbconf, bedboss\n", - " Attempting uninstall: piper\n", - " Found existing installation: piper 0.12.3\n", - " Uninstalling piper-0.12.3:\n", - " Successfully uninstalled piper-0.12.3\n", - " Attempting uninstall: bbconf\n", - " Found existing installation: bbconf 0.4.0\n", - " Uninstalling bbconf-0.4.0:\n", - " Successfully uninstalled bbconf-0.4.0\n", - " Attempting uninstall: bedboss\n", - " Found existing installation: bedboss 0.1.0.dev2\n", - " Uninstalling bedboss-0.1.0.dev2:\n", - " Successfully uninstalled bedboss-0.1.0.dev2\n", - "Successfully installed bbconf-0.4.0a1 bedboss-0.1.0a2 piper-0.13.2\n" - ] - } - ], - "source": [ - "git clone -b dev-bedboss git@github.com:databio/bedbase.git\n", - "pip install bedboss==0.1.0a2\n", - "# git clone -b validate_genome_assembly git@github.com:databio/bedbuncher\n", - "# git clone git@github.com:databio/bedembed\n", - "# git clone -b dev git@github.com:databio/bedhost\n", - "# git clone git@github.com:databio/bedhost-ui" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Let's install this packages!" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " I have problems with bedtoBigBed script, so I am downloading it too, and seting in bedmaker path to this script :/" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--2023-08-11 07:51:37-- http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/bedToBigBed\n", - "Resolving hgdownload.soe.ucsc.edu (hgdownload.soe.ucsc.edu)... 128.114.119.163\n", - "Connecting to hgdownload.soe.ucsc.edu (hgdownload.soe.ucsc.edu)|128.114.119.163|:80... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 9632264 (9.2M)\n", - "Saving to: ‘bedToBigBed’\n", - "\n", - "bedToBigBed 100%[===================>] 9.19M 740KB/s in 18s \n", - "\n", - "2023-08-11 07:51:56 (524 KB/s) - ‘bedToBigBed’ saved [9632264/9632264]\n", - "\n" - ] - } - ], - "source": [ - "wget http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/bedToBigBed\n", - "chmod a+x bedToBigBed" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial\n" - ] - } - ], - "source": [ - "pwd" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - - "Check if we have all tutorial files:" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "bedbase_configuration_compose_local.yaml \u001b[0m\u001b[01;34mbedboss\u001b[0m \u001b[01;34mscripts\u001b[0m\n", - "bedbase_configuration_compose.yaml \u001b[01;34mPEPs\u001b[0m\n" - ] - } - ], - "source": [ - "ls bedbase/tutorial_files" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 2. BEDBOSS: ALL TOGETHER" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[01;31m\u001b[Kbbconf\u001b[m\u001b[K 0.4.0a1\n" - ] - } - ], - "source": [ - "pip list | grep bbconf" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[01;31m\u001b[Kbedboss\u001b[m\u001b[K 0.1.0a2\n" - ] - } - ], - "source": [ - "pip list | grep bedboss" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Check and update config files" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "bedboss_pep_config.yaml looper_config_bedboss.yaml sample_table.csv\n", - "config_db_local.yaml pipeline_interface.yaml\n" - ] - } - ], - "source": [ - "ls bedbase/tutorial_files/bedboss" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's create additional metadata for our database:" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "pep_version: 2.1.0\n", - "sample_table: sample_table.csv\n", - "\n", - "sample_modifiers:\n", - " append:\n", - " input_file_path: INPUT\n", - " output_folder: \"$BEDBASE_DATA_PATH_HOST/outputs\"\n", - " narrowpeak: TRUE\n", - " rfg_config_path: RFG\n", - " bedbase_config: \"$BEDBASE_DATA_PATH_HOST/bedbase/tutorial_files/bedboss/config_db_local.yaml\"\n", - " yaml_file: YAMLFILE\n", - " derive:\n", - " attributes: [input_file_path, rfg_config_path, yaml_file]\n", - " sources:\n", - " INPUT: \"$BEDBASE_DATA_PATH_HOST/files/{file_name}\"\n", - " RFG: \"$REFGENIE\"\n", - " YAMLFILE: \"$BEDBASE_DATA_PATH_HOST/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/{sample_name}_sample.yaml\"\n", - " imply:\n", - " - if:\n", - " antibody: [ H3K4me3, H3K27me3, H3K27ac, H3K9ac, H4K5ac, H3K4me, H3K36me3, H4K5ac, H3K9ac ]\n", - " then:\n", - " narrowpeak: FALSE\n" - ] - } - ], - "source": [ - "cat bedbase/tutorial_files/bedboss/bedboss_pep_config.yaml" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Config for local db and bedstat" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "path:\n", - " pipeline_output_path: $BEDBASE_DATA_PATH_HOST/outputs\n", - " bedstat_dir: outputs/bedstat_output\n", - " bedbuncher_dir: outputs/bedbuncher_output\n", - " remote_url_base: null\n", - "database:\n", - " host: $DB_HOST_URL\n", - " port: $POSTGRES_PORT\n", - " password: $POSTGRES_PASSWORD\n", - " user: $POSTGRES_USER\n", - " name: $POSTGRES_DB\n", - " dialect: postgresql\n", - " driver: psycopg2\n", - "server:\n", - " host: 0.0.0.0\n", - " port: 8080\n" - ] - } - ], - "source": [ - "cat bedbase/tutorial_files/bedboss/config_db_local.yaml" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "looper for bedboss" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "pipeline_name: BEDBOSS\n", - "pipeline_type: sample\n", - "pre_submit:\n", - " python_functions:\n", - " - looper.write_sample_yaml\n", - "command_template: >\n", - " bedboss all\n", - " --sample-name {sample.sample_name}\n", - " --input-file {sample.input_file_path}\n", - " --input-type {sample.format}\n", - " --genome {sample.genome}\n", - " --sample-yaml {sample.yaml_file}\n", - " --output_folder {sample.output_folder}\n", - " --narrowpeak {sample.narrowpeak}\n", - " --rfg-config {sample.rfg_config_path}\n", - " {% if sample.bedbase_config is defined %} --bedbase-config {sample.bedbase_config} {% endif %}\n", - " {% if sample.chrom_sizes is defined %} --chrom-sizes {sample.chrom_sizes} {% endif %}\n", - " {% if sample.open_signal_matrix is defined %} --open-signal-matrix {sample.open_signal_matrix} {% endif %}\n", - " {% if sample.ensdb is defined %} --ensdb {sample.ensdb} {% endif %}\n", - " {% if sample.fasta is defined %} --fasta {sample.fasta} {% endif %}\n", - " --outfolder $BEDBASE_DATA_PATH_HOST/outputs/outputs/bedstat_output/bedstat_pipeline_logs\n" - ] - } - ], - "source": [ - "cat bedbase/tutorial_files/bedboss//pipeline_interface.yaml" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Looper config file:" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "bedboss_pep_config.yaml looper_config_bedboss.yaml sample_table.csv\n", - "config_db_local.yaml pipeline_interface.yaml\n" - ] - } - ], - "source": [ - "ls bedbase/tutorial_files/bedboss" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "pep_config: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/bedboss_pep_config.yaml\n", - "output_dir: $BEDBASE_DATA_PATH_HOST/outputs/outputs/bedstat_output/bedstat_pipeline_logs\n", - "\n", - "pipeline_interfaces:\n", - " sample: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss//pipeline_interface.yaml\n" - ] - } - ], - "source": [ - "cat bedbase/tutorial_files/bedboss/looper_config_bedboss.yaml" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Start DB (bedbase-postgres)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Inititiate a local PostgreSQL instance\n", - "\n", - "In addition to generate statistics and plots, `bedstat` inserts JSON formatted metadata into relational [PostgreSQL] database. \n", - "\n", - "If you don't have docker installed, you can install it with `sudo apt-get update && apt-get install docker-engine -y`.\n", - "\n", - "Now, create a persistent volume to house PostgreSQL data:" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "postgres-data\n" - ] - } - ], - "source": [ - "docker volume create postgres-data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Spin up a `postgres` container. Provide required environment variables (need to match the settings in bedbase configuration file) and bind the created docker volume to `/var/lib/postgresql/data` path in the container:" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "42ed2028444042c3ceef801c0828ce016dde87f1c0ac0d9494ffb6274374f262\n", - "docker: Error response from daemon: driver failed programming external connectivity on endpoint bedbase-postgres (fe853ffbf2fa584785686c319c5a657021a860dce6c9e81f67f5e805ef2133a0): Bind for 0.0.0.0:5432 failed: port is already allocated.\n" - ] - }, - { - "ename": "", - "evalue": "125", - "output_type": "error", - "traceback": [] - } - ], - "source": [ - "docker run -d --name bedbase-postgres -p 5432:5432 -e POSTGRES_PASSWORD=bedbasepassword -e POSTGRES_USER=postgres -e POSTGRES_DB=postgres -v postgres-data:/var/lib/postgresql/data postgres" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If environment variables are not initialized with function above, We have to initialize them manually " - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [], - "source": [ - "export DB_HOST_URL=localhost\n", - "export POSTGRES_PORT=5432\n", - "export POSTGRES_PASSWORD=docker\n", - "export POSTGRES_USER=postgres\n", - "export POSTGRES_DB=pep-db" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### RUN BEDBoss" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Additionally, we have to initialize environment variable $REFGENIE - the path to the refgenie configuration file. If Refgenie is not initialize, we will have to initialize it localy. use `pip install --user refgenie` to install and add to the PATH with `export PATH=~/.local/bin:$PATH`" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Traceback (most recent call last):\n", - " File \"/home/bnt4me/virginia/venv/jupyter/bin/refgenie\", line 8, in \n", - " sys.exit(main())\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/refgenie/cli.py\", line 133, in main\n", - " rgc.initialize_config_file(os.path.abspath(gencfg))\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/refgenconf/refgenconf.py\", line 290, in initialize_config_file\n", - " _write_fail_err(\"file exists\")\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/refgenconf/refgenconf.py\", line 281, in _write_fail_err\n", - " raise OSError(\"Can't initialize, {}: {} \".format(reason, filepath))\n", - "OSError: Can't initialize, file exists: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/genome_config.yaml \n" - ] - }, - { - "ename": "", - "evalue": "1", - "output_type": "error", - "traceback": [] - } - ], - "source": [ - "export REFGENIE='genome_config.yaml'\n", - "refgenie init -c $REFGENIE" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial\n" - ] - } - ], - "source": [ - "pwd" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "bedboss_pep_config.yaml looper_config_bedboss.yaml sample_table.csv\n", - "config_db_local.yaml pipeline_interface.yaml\n" - ] - } - ], - "source": [ - "ls bedbase/tutorial_files/bedboss" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "##### Run bedboss" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "looper 1.5.1\n", - "\u001b[0m\n" - ] - } - ], - "source": [ - "looper --version" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "usage: looper run [-h] [-i] [-d] [-t S] [-x S] [-y S] [-f] [--divvy DIVCFG] [-p P] [-s S]\n", - " [-c K [K ...]] [-u X] [-n N] [--looper-config LOOPER_CONFIG]\n", - " [-S YAML [YAML ...]] [-P YAML [YAML ...]] [-l N] [-k N]\n", - " [--sel-attr ATTR] [--sel-excl [E ...] | --sel-incl [I ...]]\n", - " [-a A [A ...]]\n", - " [config_file]\n", - "\n", - "Run or submit sample jobs.\n", - "\n", - "positional arguments:\n", - " config_file Project configuration file (YAML) or pephub registry\n", - " path.\n", - "\n", - "options:\n", - " -h, --help show this help message and exit\n", - " -i, --ignore-flags Ignore run status flags? Default=False\n", - " -d, --dry-run Don't actually submit the jobs. Default=False\n", - " -t S, --time-delay S Time delay in seconds between job submissions\n", - " -x S, --command-extra S String to append to every command\n", - " -y S, --command-extra-override S Same as command-extra, but overrides values in PEP\n", - " -f, --skip-file-checks Do not perform input file checks\n", - " -u X, --lump X Total input file size (GB) to batch into one job\n", - " -n N, --lumpn N Number of commands to batch into one job\n", - " --looper-config LOOPER_CONFIG Looper configuration file (YAML)\n", - " -S YAML [YAML ...], --sample-pipeline-interfaces YAML [YAML ...]\n", - " Path to looper sample config file\n", - " -P YAML [YAML ...], --project-pipeline-interfaces YAML [YAML ...]\n", - " Path to looper project config file\n", - " -a A [A ...], --amend A [A ...] List of amendments to activate\n", - "\n", - "divvy arguments:\n", - " Configure divvy to change computing settings\n", - "\n", - " --divvy DIVCFG Path to divvy configuration file. Default=$DIVCFG env\n", - " variable. Currently: not set\n", - " -p P, --package P Name of computing resource package to use\n", - " -s S, --settings S Path to a YAML settings file with compute settings\n", - " -c K [K ...], --compute K [K ...] List of key-value pairs (k1=v1)\n", - "\n", - "sample selection arguments:\n", - " Specify samples to include or exclude based on sample attribute values\n", - "\n", - " -l N, --limit N Limit to n samples\n", - " -k N, --skip N Skip samples by numerical index\n", - " --sel-attr ATTR Attribute for sample exclusion OR inclusion\n", - " --sel-excl [E ...] Exclude samples with these values\n", - " --sel-incl [I ...] Include only samples with these values\n", - "\u001b[0m\n" - ] - } - ], - "source": [ - "looper run --help" - ] - }, - { - "cell_type": "code", - "execution_count": 56, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial\n" - ] - } - ], - "source": [ - "pwd" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "bash: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial: Is a directory\n" - ] - }, - { - "ename": "", - "evalue": "126", - "output_type": "error", - "traceback": [] - } - ], - "source": [ - "$BEDBASE_DATA_PATH_HOST" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Looper version: 1.5.1\n", - "Command: run\n", - "Using default config. No config found in env var: ['DIVCFG']\n", - "Pipestat compatible: False\n", - "\u001b[36m## [1 of 11] sample: bedbase_demo_db1; pipeline: BEDBOSS\u001b[0m\n", - "Calling pre-submit function: looper.write_sample_yaml\n", - "Writing script to /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedbase_demo_db1.sub\n", - "Job script (n=1; 0.00Gb): /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedbase_demo_db1.sub\n", - "Compute node: bnt4me-Precision-5560\n", - "Start time: 2023-08-14 09:27:35\n", - "Using default config. No config found in env var: PIPESTAT_CONFIG\n", - "Config: None.\n", - "No schema supplied.\n", - "Initialize FileBackend\n", - "Warning: You're running an interactive python session. This works, but pypiper cannot tee the output, so results are only logged to screen.\n", - "### Pipeline run code and environment:\n", - "\n", - "* Command: `/home/bnt4me/virginia/venv/jupyter/bin/bedboss all --sample-name bedbase_demo_db1 --input-file /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE105587_ENCFF018NNF_conservative_idr_thresholded_peaks_GRCh38.bed.gz --input-type bed --genome hg38 --sample-yaml /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/bedbase_demo_db1_sample.yaml --output_folder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs --narrowpeak True --rfg-config genome_config.yaml --bedbase-config /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml --outfolder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs`\n", - "* Compute host: bnt4me-Precision-5560\n", - "* Working dir: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial\n", - "* Outfolder: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/\n", - "* Pipeline started at: (08-14 09:27:35) elapsed: 0.0 _TIME_\n", - "\n", - "### Version log:\n", - "\n", - "* Python version: 3.10.12\n", - "* Pypiper dir: `/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper`\n", - "* Pypiper version: 0.13.2\n", - "* Pipeline dir: `/home/bnt4me/virginia/venv/jupyter/bin`\n", - "* Pipeline version: 0.1.0a2\n", - "\n", - "### Arguments passed to pipeline:\n", - "\n", - "\n", - "### Initialized Pipestat Object:\n", - "\n", - "* PipestatManager (bedboss-pipeline)\n", - "* Backend: File\n", - "* - results: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/stats.yaml\n", - "* - status: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs\n", - "* Multiple Pipelines Allowed: True\n", - "* Pipeline name: bedboss-pipeline\n", - "* Pipeline type: sample\n", - "* Status Schema key: None\n", - "* Results formatter: default_formatter\n", - "* Results schema source: None\n", - "* Status schema source: None\n", - "* Records count: 2\n", - "* Sample name: DEFAULT_SAMPLE_NAME\n", - "\n", - "\n", - "----------------------------------------\n", - "\n", - "Unused arguments: {'command': 'all', 'silent': False, 'verbosity': None, 'logdev': False}\n", - "Getting Open Signal Matrix file path...\n", - "output_bed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE105587_ENCFF018NNF_conservative_idr_thresholded_peaks_GRCh38.bed.gz\n", - "output_bigbed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bigbed_files\n", - "Got input type: bed\n", - "Converting /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE105587_ENCFF018NNF_conservative_idr_thresholded_peaks_GRCh38.bed.gz to BED format.\n", - "Target exists: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE105587_ENCFF018NNF_conservative_idr_thresholded_peaks_GRCh38.bed.gz` \n", - "Running bedqc...\n", - "Unused arguments: {}\n", - "Target to produce: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db1/33xf84g5` \n", - "\n", - "> `zcat /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE105587_ENCFF018NNF_conservative_idr_thresholded_peaks_GRCh38.bed.gz > /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db1/33xf84g5` (24312)\n", - "
\n",
-      "
\n", - "Command completed. Elapsed time: 0:00:00. Running peak memory: 0.003GB. \n", - " PID: 24312;\tCommand: zcat;\tReturn code: 0;\tMemory used: 0.003GB\n", - "\n", - "\n", - "> `bash /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedqc/est_line.sh /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db1/33xf84g5 `\n", - "File (/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db1/33xf84g5) has passed Quality Control!\n", - "Generating bigBed files for: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE105587_ENCFF018NNF_conservative_idr_thresholded_peaks_GRCh38.bed.gz\n", - "Determining path to chrom.sizes asset via Refgenie.\n", - "Reading refgenie genome configuration file from file: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/genome_config.yaml\n", - "/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes\n", - "Determined path to chrom.sizes asset: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes\n", - "Config: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml.\n", - "Initialize DBBackend\n", - "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/parsed_schema.py:284: RuntimeWarning: fields may not start with an underscore, ignoring \"_pipeline_name\"\n", - " return create_model(\n", - "Traceback (most recent call last):\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py\", line 689, in _engine\n", - "Using default schema: /home/bnt4me/virginia/venv/jupyter/bin/pipestat_output_schema.yaml\n", - " return self.db_engine_key\n", - "AttributeError: 'DBBackend' object has no attribute 'db_engine_key'\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/home/bnt4me/virginia/venv/jupyter/bin/bedboss\", line 8, in \n", - " sys.exit(main())\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py\", line 180, in main\n", - " run_all(pm=pm, **args_dict)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py\", line 138, in run_all\n", - " bedstat(\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedstat/bedstat.py\", line 103, in bedstat\n", - " bbc = bbconf.BedBaseConf(config_path=bedbase_config, database_only=True)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bbconf/bbconf.py\", line 72, in __init__\n", - " BED_TABLE: pipestat.PipestatManager(\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/pipestat.py\", line 161, in __init__\n", - " self.backend = DBBackend(\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py\", line 63, in __init__\n", - " SQLModel.metadata.create_all(self._engine)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py\", line 694, in _engine\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " self.db_engine_key = create_engine(self.db_url, echo=self.show_db_logs)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlmodel/engine/create.py\", line 139, in create_engine\n", - " return _create_engine(url, **current_kwargs) # type: ignore\n", - " File \"\", line 2, in create_engine\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/util/deprecations.py\", line 309, in warned\n", - " return fn(*args, **kwargs)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/create.py\", line 518, in create_engine\n", - " u = _url.make_url(url)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py\", line 725, in make_url\n", - " return _parse_rfc1738_args(name_or_url)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py\", line 781, in _parse_rfc1738_args\n", - " components[\"port\"] = int(components[\"port\"])\n", - "ValueError: invalid literal for int() with base 10: '%24POSTGRES_PORT'\n", - "Starting cleanup: 1 files; 0 conditional files for cleanup\n", - "\n", - "Cleaning up flagged intermediate files. . .\n", - "\n", - "### Pipeline failed at: (08-14 09:27:35) elapsed: 0.0 _TIME_\n", - "\n", - "Total time: 0:00:00\n", - "Failure reason: Pipeline failure. See details above.\n", - "Exception ignored in atexit callback: >\n", - "Traceback (most recent call last):\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py\", line 2191, in _exit_handler\n", - " self.fail_pipeline(Exception(\"Pipeline failure. See details above.\"))\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py\", line 2036, in fail_pipeline\n", - " raise exc\n", - "Exception: Pipeline failure. See details above.\n", - "\u001b[36m## [2 of 11] sample: bedbase_demo_db2; pipeline: BEDBOSS\u001b[0m\n", - "Calling pre-submit function: looper.write_sample_yaml\n", - "Writing script to /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedbase_demo_db2.sub\n", - "Job script (n=1; 0.00Gb): /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedbase_demo_db2.sub\n", - "Compute node: bnt4me-Precision-5560\n", - "Start time: 2023-08-14 09:27:35\n", - "Using default config. No config found in env var: PIPESTAT_CONFIG\n", - "Config: None.\n", - "No schema supplied.\n", - "Initialize FileBackend\n", - "Warning: You're running an interactive python session. This works, but pypiper cannot tee the output, so results are only logged to screen.\n", - "### Pipeline run code and environment:\n", - "\n", - "* Command: `/home/bnt4me/virginia/venv/jupyter/bin/bedboss all --sample-name bedbase_demo_db2 --input-file /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE105977_ENCFF617QGK_optimal_idr_thresholded_peaks_GRCh38.bed.gz --input-type bed --genome hg38 --sample-yaml /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/bedbase_demo_db2_sample.yaml --output_folder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs --narrowpeak True --rfg-config genome_config.yaml --bedbase-config /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml --outfolder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs`\n", - "* Compute host: bnt4me-Precision-5560\n", - "* Working dir: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial\n", - "* Outfolder: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/\n", - "* Pipeline started at: (08-14 09:27:36) elapsed: 0.0 _TIME_\n", - "\n", - "### Version log:\n", - "\n", - "* Python version: 3.10.12\n", - "* Pypiper dir: `/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper`\n", - "* Pypiper version: 0.13.2\n", - "* Pipeline dir: `/home/bnt4me/virginia/venv/jupyter/bin`\n", - "* Pipeline version: 0.1.0a2\n", - "\n", - "### Arguments passed to pipeline:\n", - "\n", - "\n", - "### Initialized Pipestat Object:\n", - "\n", - "* PipestatManager (bedboss-pipeline)\n", - "* Backend: File\n", - "* - results: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/stats.yaml\n", - "* - status: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs\n", - "* Multiple Pipelines Allowed: True\n", - "* Pipeline name: bedboss-pipeline\n", - "* Pipeline type: sample\n", - "* Status Schema key: None\n", - "* Results formatter: default_formatter\n", - "* Results schema source: None\n", - "* Status schema source: None\n", - "* Records count: 2\n", - "* Sample name: DEFAULT_SAMPLE_NAME\n", - "\n", - "\n", - "----------------------------------------\n", - "\n", - "Unused arguments: {'command': 'all', 'silent': False, 'verbosity': None, 'logdev': False}\n", - "Getting Open Signal Matrix file path...\n", - "output_bed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE105977_ENCFF617QGK_optimal_idr_thresholded_peaks_GRCh38.bed.gz\n", - "output_bigbed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bigbed_files\n", - "Got input type: bed\n", - "Converting /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE105977_ENCFF617QGK_optimal_idr_thresholded_peaks_GRCh38.bed.gz to BED format.\n", - "Target exists: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE105977_ENCFF617QGK_optimal_idr_thresholded_peaks_GRCh38.bed.gz` \n", - "Running bedqc...\n", - "Unused arguments: {}\n", - "Target to produce: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db2/lypwq5fe` \n", - "\n", - "> `zcat /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE105977_ENCFF617QGK_optimal_idr_thresholded_peaks_GRCh38.bed.gz > /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db2/lypwq5fe` (24344)\n", - "
\n",
-      "
\n", - "Command completed. Elapsed time: 0:00:00. Running peak memory: 0GB. \n", - " PID: 24344;\tCommand: zcat;\tReturn code: 0;\tMemory used: 0.0GB\n", - "\n", - "\n", - "> `bash /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedqc/est_line.sh /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db2/lypwq5fe `\n", - "File (/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db2/lypwq5fe) has passed Quality Control!\n", - "Generating bigBed files for: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE105977_ENCFF617QGK_optimal_idr_thresholded_peaks_GRCh38.bed.gz\n", - "Determining path to chrom.sizes asset via Refgenie.\n", - "Reading refgenie genome configuration file from file: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/genome_config.yaml\n", - "/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes\n", - "Determined path to chrom.sizes asset: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes\n", - "Config: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml.\n", - "Initialize DBBackend\n", - "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/parsed_schema.py:284: RuntimeWarning: fields may not start with an underscore, ignoring \"_pipeline_name\"\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " return create_model(\n", - "Using default schema: /home/bnt4me/virginia/venv/jupyter/bin/pipestat_output_schema.yaml\n", - "Traceback (most recent call last):\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py\", line 689, in _engine\n", - " return self.db_engine_key\n", - "AttributeError: 'DBBackend' object has no attribute 'db_engine_key'\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/home/bnt4me/virginia/venv/jupyter/bin/bedboss\", line 8, in \n", - " sys.exit(main())\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py\", line 180, in main\n", - " run_all(pm=pm, **args_dict)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py\", line 138, in run_all\n", - " bedstat(\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedstat/bedstat.py\", line 103, in bedstat\n", - " bbc = bbconf.BedBaseConf(config_path=bedbase_config, database_only=True)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bbconf/bbconf.py\", line 72, in __init__\n", - " BED_TABLE: pipestat.PipestatManager(\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/pipestat.py\", line 161, in __init__\n", - " self.backend = DBBackend(\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py\", line 63, in __init__\n", - " SQLModel.metadata.create_all(self._engine)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py\", line 694, in _engine\n", - " self.db_engine_key = create_engine(self.db_url, echo=self.show_db_logs)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlmodel/engine/create.py\", line 139, in create_engine\n", - " return _create_engine(url, **current_kwargs) # type: ignore\n", - " File \"\", line 2, in create_engine\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/util/deprecations.py\", line 309, in warned\n", - " return fn(*args, **kwargs)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/create.py\", line 518, in create_engine\n", - " u = _url.make_url(url)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py\", line 725, in make_url\n", - " return _parse_rfc1738_args(name_or_url)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py\", line 781, in _parse_rfc1738_args\n", - " components[\"port\"] = int(components[\"port\"])\n", - "ValueError: invalid literal for int() with base 10: '%24POSTGRES_PORT'\n", - "Starting cleanup: 1 files; 0 conditional files for cleanup\n", - "\n", - "Cleaning up flagged intermediate files. . .\n", - "\n", - "### Pipeline failed at: (08-14 09:27:36) elapsed: 0.0 _TIME_\n", - "\n", - "Total time: 0:00:00\n", - "Failure reason: Pipeline failure. See details above.\n", - "Exception ignored in atexit callback: >\n", - "Traceback (most recent call last):\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py\", line 2191, in _exit_handler\n", - " self.fail_pipeline(Exception(\"Pipeline failure. See details above.\"))\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py\", line 2036, in fail_pipeline\n", - " raise exc\n", - "Exception: Pipeline failure. See details above.\n", - "\u001b[36m## [3 of 11] sample: bedbase_demo_db3; pipeline: BEDBOSS\u001b[0m\n", - "Calling pre-submit function: looper.write_sample_yaml\n", - "Writing script to /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedbase_demo_db3.sub\n", - "Job script (n=1; 0.00Gb): /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedbase_demo_db3.sub\n", - "Compute node: bnt4me-Precision-5560\n", - "Start time: 2023-08-14 09:27:36\n", - "Using default config. No config found in env var: PIPESTAT_CONFIG\n", - "Config: None.\n", - "No schema supplied.\n", - "Initialize FileBackend\n", - "Warning: You're running an interactive python session. This works, but pypiper cannot tee the output, so results are only logged to screen.\n", - "### Pipeline run code and environment:\n", - "\n", - "* Command: `/home/bnt4me/virginia/venv/jupyter/bin/bedboss all --sample-name bedbase_demo_db3 --input-file /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE105977_ENCFF793SZW_conservative_idr_thresholded_peaks_GRCh38.bed.gz --input-type bed --genome hg38 --sample-yaml /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/bedbase_demo_db3_sample.yaml --output_folder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs --narrowpeak True --rfg-config genome_config.yaml --bedbase-config /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml --outfolder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs`\n", - "* Compute host: bnt4me-Precision-5560\n", - "* Working dir: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial\n", - "* Outfolder: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/\n", - "* Pipeline started at: (08-14 09:27:37) elapsed: 0.0 _TIME_\n", - "\n", - "### Version log:\n", - "\n", - "* Python version: 3.10.12\n", - "* Pypiper dir: `/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper`\n", - "* Pypiper version: 0.13.2\n", - "* Pipeline dir: `/home/bnt4me/virginia/venv/jupyter/bin`\n", - "* Pipeline version: 0.1.0a2\n", - "\n", - "### Arguments passed to pipeline:\n", - "\n", - "\n", - "### Initialized Pipestat Object:\n", - "\n", - "* PipestatManager (bedboss-pipeline)\n", - "* Backend: File\n", - "* - results: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/stats.yaml\n", - "* - status: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs\n", - "* Multiple Pipelines Allowed: True\n", - "* Pipeline name: bedboss-pipeline\n", - "* Pipeline type: sample\n", - "* Status Schema key: None\n", - "* Results formatter: default_formatter\n", - "* Results schema source: None\n", - "* Status schema source: None\n", - "* Records count: 2\n", - "* Sample name: DEFAULT_SAMPLE_NAME\n", - "\n", - "\n", - "----------------------------------------\n", - "\n", - "Unused arguments: {'command': 'all', 'silent': False, 'verbosity': None, 'logdev': False}\n", - "Getting Open Signal Matrix file path...\n", - "output_bed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE105977_ENCFF793SZW_conservative_idr_thresholded_peaks_GRCh38.bed.gz\n", - "output_bigbed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bigbed_files\n", - "Got input type: bed\n", - "Converting /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE105977_ENCFF793SZW_conservative_idr_thresholded_peaks_GRCh38.bed.gz to BED format.\n", - "Target exists: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE105977_ENCFF793SZW_conservative_idr_thresholded_peaks_GRCh38.bed.gz` \n", - "Running bedqc...\n", - "Unused arguments: {}\n", - "Target to produce: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db3/_5zvvg7p` \n", - "\n", - "> `zcat /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE105977_ENCFF793SZW_conservative_idr_thresholded_peaks_GRCh38.bed.gz > /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db3/_5zvvg7p` (24374)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "
\n",
-      "
\n", - "Command completed. Elapsed time: 0:00:00. Running peak memory: 0GB. \n", - " PID: 24374;\tCommand: zcat;\tReturn code: 0;\tMemory used: 0.0GB\n", - "\n", - "\n", - "> `bash /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedqc/est_line.sh /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db3/_5zvvg7p `\n", - "File (/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db3/_5zvvg7p) has passed Quality Control!\n", - "Generating bigBed files for: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE105977_ENCFF793SZW_conservative_idr_thresholded_peaks_GRCh38.bed.gz\n", - "Determining path to chrom.sizes asset via Refgenie.\n", - "Reading refgenie genome configuration file from file: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/genome_config.yaml\n", - "/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes\n", - "Determined path to chrom.sizes asset: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes\n", - "Config: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml.\n", - "Initialize DBBackend\n", - "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/parsed_schema.py:284: RuntimeWarning: fields may not start with an underscore, ignoring \"_pipeline_name\"\n", - " return create_model(\n", - "Traceback (most recent call last):\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py\", line 689, in _engine\n", - "Using default schema: /home/bnt4me/virginia/venv/jupyter/bin/pipestat_output_schema.yaml\n", - " return self.db_engine_key\n", - "AttributeError: 'DBBackend' object has no attribute 'db_engine_key'\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/home/bnt4me/virginia/venv/jupyter/bin/bedboss\", line 8, in \n", - " sys.exit(main())\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py\", line 180, in main\n", - " run_all(pm=pm, **args_dict)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py\", line 138, in run_all\n", - " bedstat(\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedstat/bedstat.py\", line 103, in bedstat\n", - " bbc = bbconf.BedBaseConf(config_path=bedbase_config, database_only=True)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bbconf/bbconf.py\", line 72, in __init__\n", - " BED_TABLE: pipestat.PipestatManager(\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/pipestat.py\", line 161, in __init__\n", - " self.backend = DBBackend(\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py\", line 63, in __init__\n", - " SQLModel.metadata.create_all(self._engine)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py\", line 694, in _engine\n", - " self.db_engine_key = create_engine(self.db_url, echo=self.show_db_logs)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlmodel/engine/create.py\", line 139, in create_engine\n", - " return _create_engine(url, **current_kwargs) # type: ignore\n", - " File \"\", line 2, in create_engine\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/util/deprecations.py\", line 309, in warned\n", - " return fn(*args, **kwargs)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/create.py\", line 518, in create_engine\n", - " u = _url.make_url(url)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py\", line 725, in make_url\n", - " return _parse_rfc1738_args(name_or_url)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py\", line 781, in _parse_rfc1738_args\n", - " components[\"port\"] = int(components[\"port\"])\n", - "ValueError: invalid literal for int() with base 10: '%24POSTGRES_PORT'\n", - "Starting cleanup: 1 files; 0 conditional files for cleanup\n", - "\n", - "Cleaning up flagged intermediate files. . .\n", - "\n", - "### Pipeline failed at: (08-14 09:27:37) elapsed: 0.0 _TIME_\n", - "\n", - "Total time: 0:00:00\n", - "Failure reason: Pipeline failure. See details above.\n", - "Exception ignored in atexit callback: >\n", - "Traceback (most recent call last):\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py\", line 2191, in _exit_handler\n", - " self.fail_pipeline(Exception(\"Pipeline failure. See details above.\"))\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py\", line 2036, in fail_pipeline\n", - " raise exc\n", - "Exception: Pipeline failure. See details above.\n", - "\u001b[36m## [4 of 11] sample: bedbase_demo_db4; pipeline: BEDBOSS\u001b[0m\n", - "Calling pre-submit function: looper.write_sample_yaml\n", - "Writing script to /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedbase_demo_db4.sub\n", - "Job script (n=1; 0.00Gb): /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedbase_demo_db4.sub\n", - "Compute node: bnt4me-Precision-5560\n", - "Start time: 2023-08-14 09:27:37\n", - "Using default config. No config found in env var: PIPESTAT_CONFIG\n", - "Config: None.\n", - "No schema supplied.\n", - "Initialize FileBackend\n", - "Warning: You're running an interactive python session. This works, but pypiper cannot tee the output, so results are only logged to screen.\n", - "### Pipeline run code and environment:\n", - "\n", - "* Command: `/home/bnt4me/virginia/venv/jupyter/bin/bedboss all --sample-name bedbase_demo_db4 --input-file /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE105977_ENCFF937CGY_peaks_GRCh38.bed.gz --input-type bed --genome hg38 --sample-yaml /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/bedbase_demo_db4_sample.yaml --output_folder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs --narrowpeak True --rfg-config genome_config.yaml --bedbase-config /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml --outfolder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs`\n", - "* Compute host: bnt4me-Precision-5560\n", - "* Working dir: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial\n", - "* Outfolder: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/\n", - "* Pipeline started at: (08-14 09:27:37) elapsed: 0.0 _TIME_\n", - "\n", - "### Version log:\n", - "\n", - "* Python version: 3.10.12\n", - "* Pypiper dir: `/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper`\n", - "* Pypiper version: 0.13.2\n", - "* Pipeline dir: `/home/bnt4me/virginia/venv/jupyter/bin`\n", - "* Pipeline version: 0.1.0a2\n", - "\n", - "### Arguments passed to pipeline:\n", - "\n", - "\n", - "### Initialized Pipestat Object:\n", - "\n", - "* PipestatManager (bedboss-pipeline)\n", - "* Backend: File\n", - "* - results: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/stats.yaml\n", - "* - status: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs\n", - "* Multiple Pipelines Allowed: True\n", - "* Pipeline name: bedboss-pipeline\n", - "* Pipeline type: sample\n", - "* Status Schema key: None\n", - "* Results formatter: default_formatter\n", - "* Results schema source: None\n", - "* Status schema source: None\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "* Records count: 2\n", - "* Sample name: DEFAULT_SAMPLE_NAME\n", - "\n", - "\n", - "----------------------------------------\n", - "\n", - "Unused arguments: {'command': 'all', 'silent': False, 'verbosity': None, 'logdev': False}\n", - "Getting Open Signal Matrix file path...\n", - "output_bed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE105977_ENCFF937CGY_peaks_GRCh38.bed.gz\n", - "output_bigbed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bigbed_files\n", - "Got input type: bed\n", - "Converting /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE105977_ENCFF937CGY_peaks_GRCh38.bed.gz to BED format.\n", - "Target exists: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE105977_ENCFF937CGY_peaks_GRCh38.bed.gz` \n", - "Running bedqc...\n", - "Unused arguments: {}\n", - "Target to produce: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db4/gig106fd` \n", - "\n", - "> `zcat /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE105977_ENCFF937CGY_peaks_GRCh38.bed.gz > /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db4/gig106fd` (24404)\n", - "
\n",
-      "
\n", - "Command completed. Elapsed time: 0:00:00. Running peak memory: 0.003GB. \n", - " PID: 24404;\tCommand: zcat;\tReturn code: 0;\tMemory used: 0.003GB\n", - "\n", - "\n", - "> `bash /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedqc/est_line.sh /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db4/gig106fd `\n", - "File (/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db4/gig106fd) has passed Quality Control!\n", - "Generating bigBed files for: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE105977_ENCFF937CGY_peaks_GRCh38.bed.gz\n", - "Determining path to chrom.sizes asset via Refgenie.\n", - "Reading refgenie genome configuration file from file: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/genome_config.yaml\n", - "/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes\n", - "Determined path to chrom.sizes asset: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes\n", - "Config: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml.\n", - "Initialize DBBackend\n", - "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/parsed_schema.py:284: RuntimeWarning: fields may not start with an underscore, ignoring \"_pipeline_name\"\n", - " return create_model(\n", - "Traceback (most recent call last):\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py\", line 689, in _engine\n", - "Using default schema: /home/bnt4me/virginia/venv/jupyter/bin/pipestat_output_schema.yaml\n", - " return self.db_engine_key\n", - "AttributeError: 'DBBackend' object has no attribute 'db_engine_key'\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/home/bnt4me/virginia/venv/jupyter/bin/bedboss\", line 8, in \n", - " sys.exit(main())\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py\", line 180, in main\n", - " run_all(pm=pm, **args_dict)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py\", line 138, in run_all\n", - " bedstat(\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedstat/bedstat.py\", line 103, in bedstat\n", - " bbc = bbconf.BedBaseConf(config_path=bedbase_config, database_only=True)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bbconf/bbconf.py\", line 72, in __init__\n", - " BED_TABLE: pipestat.PipestatManager(\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/pipestat.py\", line 161, in __init__\n", - " self.backend = DBBackend(\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py\", line 63, in __init__\n", - " SQLModel.metadata.create_all(self._engine)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py\", line 694, in _engine\n", - " self.db_engine_key = create_engine(self.db_url, echo=self.show_db_logs)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlmodel/engine/create.py\", line 139, in create_engine\n", - " return _create_engine(url, **current_kwargs) # type: ignore\n", - " File \"\", line 2, in create_engine\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/util/deprecations.py\", line 309, in warned\n", - " return fn(*args, **kwargs)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/create.py\", line 518, in create_engine\n", - " u = _url.make_url(url)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py\", line 725, in make_url\n", - " return _parse_rfc1738_args(name_or_url)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py\", line 781, in _parse_rfc1738_args\n", - " components[\"port\"] = int(components[\"port\"])\n", - "ValueError: invalid literal for int() with base 10: '%24POSTGRES_PORT'\n", - "Starting cleanup: 1 files; 0 conditional files for cleanup\n", - "\n", - "Cleaning up flagged intermediate files. . .\n", - "\n", - "### Pipeline failed at: (08-14 09:27:38) elapsed: 0.0 _TIME_\n", - "\n", - "Total time: 0:00:00\n", - "Failure reason: Pipeline failure. See details above.\n", - "Exception ignored in atexit callback: >\n", - "Traceback (most recent call last):\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py\", line 2191, in _exit_handler\n", - " self.fail_pipeline(Exception(\"Pipeline failure. See details above.\"))\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py\", line 2036, in fail_pipeline\n", - " raise exc\n", - "Exception: Pipeline failure. See details above.\n", - "\u001b[36m## [5 of 11] sample: bedbase_demo_db5; pipeline: BEDBOSS\u001b[0m\n", - "Calling pre-submit function: looper.write_sample_yaml\n", - "Writing script to /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedbase_demo_db5.sub\n", - "Job script (n=1; 0.00Gb): /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedbase_demo_db5.sub\n", - "Compute node: bnt4me-Precision-5560\n", - "Start time: 2023-08-14 09:27:38\n", - "Using default config. No config found in env var: PIPESTAT_CONFIG\n", - "Config: None.\n", - "No schema supplied.\n", - "Initialize FileBackend\n", - "Warning: You're running an interactive python session. This works, but pypiper cannot tee the output, so results are only logged to screen.\n", - "### Pipeline run code and environment:\n", - "\n", - "* Command: `/home/bnt4me/virginia/venv/jupyter/bin/bedboss all --sample-name bedbase_demo_db5 --input-file /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE91663_ENCFF316ASR_peaks_GRCh38.bed.gz --input-type bed --genome hg38 --sample-yaml /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/bedbase_demo_db5_sample.yaml --output_folder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs --narrowpeak True --rfg-config genome_config.yaml --bedbase-config /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml --outfolder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs`\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "* Compute host: bnt4me-Precision-5560\n", - "* Working dir: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial\n", - "* Outfolder: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/\n", - "* Pipeline started at: (08-14 09:27:38) elapsed: 0.0 _TIME_\n", - "\n", - "### Version log:\n", - "\n", - "* Python version: 3.10.12\n", - "* Pypiper dir: `/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper`\n", - "* Pypiper version: 0.13.2\n", - "* Pipeline dir: `/home/bnt4me/virginia/venv/jupyter/bin`\n", - "* Pipeline version: 0.1.0a2\n", - "\n", - "### Arguments passed to pipeline:\n", - "\n", - "\n", - "### Initialized Pipestat Object:\n", - "\n", - "* PipestatManager (bedboss-pipeline)\n", - "* Backend: File\n", - "* - results: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/stats.yaml\n", - "* - status: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs\n", - "* Multiple Pipelines Allowed: True\n", - "* Pipeline name: bedboss-pipeline\n", - "* Pipeline type: sample\n", - "* Status Schema key: None\n", - "* Results formatter: default_formatter\n", - "* Results schema source: None\n", - "* Status schema source: None\n", - "* Records count: 2\n", - "* Sample name: DEFAULT_SAMPLE_NAME\n", - "\n", - "\n", - "----------------------------------------\n", - "\n", - "Unused arguments: {'command': 'all', 'silent': False, 'verbosity': None, 'logdev': False}\n", - "Getting Open Signal Matrix file path...\n", - "output_bed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE91663_ENCFF316ASR_peaks_GRCh38.bed.gz\n", - "output_bigbed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bigbed_files\n", - "Got input type: bed\n", - "Converting /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE91663_ENCFF316ASR_peaks_GRCh38.bed.gz to BED format.\n", - "Target exists: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE91663_ENCFF316ASR_peaks_GRCh38.bed.gz` \n", - "Running bedqc...\n", - "Unused arguments: {}\n", - "Target to produce: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db5/ix1s2r3k` \n", - "\n", - "> `zcat /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE91663_ENCFF316ASR_peaks_GRCh38.bed.gz > /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db5/ix1s2r3k` (24435)\n", - "
\n",
-      "
\n", - "Command completed. Elapsed time: 0:00:00. Running peak memory: 0.003GB. \n", - " PID: 24435;\tCommand: zcat;\tReturn code: 0;\tMemory used: 0.003GB\n", - "\n", - "\n", - "> `bash /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedqc/est_line.sh /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db5/ix1s2r3k `\n", - "File (/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db5/ix1s2r3k) has passed Quality Control!\n", - "Generating bigBed files for: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE91663_ENCFF316ASR_peaks_GRCh38.bed.gz\n", - "Determining path to chrom.sizes asset via Refgenie.\n", - "Reading refgenie genome configuration file from file: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/genome_config.yaml\n", - "/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes\n", - "Determined path to chrom.sizes asset: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes\n", - "Config: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml.\n", - "Initialize DBBackend\n", - "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/parsed_schema.py:284: RuntimeWarning: fields may not start with an underscore, ignoring \"_pipeline_name\"\n", - " return create_model(\n", - "Traceback (most recent call last):\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py\", line 689, in _engine\n", - "Using default schema: /home/bnt4me/virginia/venv/jupyter/bin/pipestat_output_schema.yaml\n", - " return self.db_engine_key\n", - "AttributeError: 'DBBackend' object has no attribute 'db_engine_key'\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/home/bnt4me/virginia/venv/jupyter/bin/bedboss\", line 8, in \n", - " sys.exit(main())\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py\", line 180, in main\n", - " run_all(pm=pm, **args_dict)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py\", line 138, in run_all\n", - " bedstat(\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedstat/bedstat.py\", line 103, in bedstat\n", - " bbc = bbconf.BedBaseConf(config_path=bedbase_config, database_only=True)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bbconf/bbconf.py\", line 72, in __init__\n", - " BED_TABLE: pipestat.PipestatManager(\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/pipestat.py\", line 161, in __init__\n", - " self.backend = DBBackend(\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py\", line 63, in __init__\n", - " SQLModel.metadata.create_all(self._engine)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py\", line 694, in _engine\n", - " self.db_engine_key = create_engine(self.db_url, echo=self.show_db_logs)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlmodel/engine/create.py\", line 139, in create_engine\n", - " return _create_engine(url, **current_kwargs) # type: ignore\n", - " File \"\", line 2, in create_engine\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/util/deprecations.py\", line 309, in warned\n", - " return fn(*args, **kwargs)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/create.py\", line 518, in create_engine\n", - " u = _url.make_url(url)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py\", line 725, in make_url\n", - " return _parse_rfc1738_args(name_or_url)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py\", line 781, in _parse_rfc1738_args\n", - " components[\"port\"] = int(components[\"port\"])\n", - "ValueError: invalid literal for int() with base 10: '%24POSTGRES_PORT'\n", - "Starting cleanup: 1 files; 0 conditional files for cleanup\n", - "\n", - "Cleaning up flagged intermediate files. . .\n", - "\n", - "### Pipeline failed at: (08-14 09:27:39) elapsed: 0.0 _TIME_\n", - "\n", - "Total time: 0:00:00\n", - "Failure reason: Pipeline failure. See details above.\n", - "Exception ignored in atexit callback: >\n", - "Traceback (most recent call last):\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py\", line 2191, in _exit_handler\n", - " self.fail_pipeline(Exception(\"Pipeline failure. See details above.\"))\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py\", line 2036, in fail_pipeline\n", - " raise exc\n", - "Exception: Pipeline failure. See details above.\n", - "\u001b[36m## [6 of 11] sample: bedbase_demo_db6; pipeline: BEDBOSS\u001b[0m\n", - "Calling pre-submit function: looper.write_sample_yaml\n", - "Writing script to /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedbase_demo_db6.sub\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Job script (n=1; 0.00Gb): /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedbase_demo_db6.sub\n", - "Compute node: bnt4me-Precision-5560\n", - "Start time: 2023-08-14 09:27:39\n", - "Using default config. No config found in env var: PIPESTAT_CONFIG\n", - "Config: None.\n", - "No schema supplied.\n", - "Initialize FileBackend\n", - "Warning: You're running an interactive python session. This works, but pypiper cannot tee the output, so results are only logged to screen.\n", - "### Pipeline run code and environment:\n", - "\n", - "* Command: `/home/bnt4me/virginia/venv/jupyter/bin/bedboss all --sample-name bedbase_demo_db6 --input-file /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE91663_ENCFF319TPR_conservative_idr_thresholded_peaks_GRCh38.bed.gz --input-type bed --genome hg38 --sample-yaml /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/bedbase_demo_db6_sample.yaml --output_folder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs --narrowpeak True --rfg-config genome_config.yaml --bedbase-config /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml --outfolder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs`\n", - "* Compute host: bnt4me-Precision-5560\n", - "* Working dir: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial\n", - "* Outfolder: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/\n", - "* Pipeline started at: (08-14 09:27:40) elapsed: 0.0 _TIME_\n", - "\n", - "### Version log:\n", - "\n", - "* Python version: 3.10.12\n", - "* Pypiper dir: `/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper`\n", - "* Pypiper version: 0.13.2\n", - "* Pipeline dir: `/home/bnt4me/virginia/venv/jupyter/bin`\n", - "* Pipeline version: 0.1.0a2\n", - "\n", - "### Arguments passed to pipeline:\n", - "\n", - "\n", - "### Initialized Pipestat Object:\n", - "\n", - "* PipestatManager (bedboss-pipeline)\n", - "* Backend: File\n", - "* - results: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/stats.yaml\n", - "* - status: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs\n", - "* Multiple Pipelines Allowed: True\n", - "* Pipeline name: bedboss-pipeline\n", - "* Pipeline type: sample\n", - "* Status Schema key: None\n", - "* Results formatter: default_formatter\n", - "* Results schema source: None\n", - "* Status schema source: None\n", - "* Records count: 2\n", - "* Sample name: DEFAULT_SAMPLE_NAME\n", - "\n", - "\n", - "----------------------------------------\n", - "\n", - "Unused arguments: {'command': 'all', 'silent': False, 'verbosity': None, 'logdev': False}\n", - "Getting Open Signal Matrix file path...\n", - "output_bed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE91663_ENCFF319TPR_conservative_idr_thresholded_peaks_GRCh38.bed.gz\n", - "output_bigbed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bigbed_files\n", - "Got input type: bed\n", - "Converting /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE91663_ENCFF319TPR_conservative_idr_thresholded_peaks_GRCh38.bed.gz to BED format.\n", - "Target exists: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE91663_ENCFF319TPR_conservative_idr_thresholded_peaks_GRCh38.bed.gz` \n", - "Running bedqc...\n", - "Unused arguments: {}\n", - "Target to produce: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db6/jrhj1l5n` \n", - "\n", - "> `zcat /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE91663_ENCFF319TPR_conservative_idr_thresholded_peaks_GRCh38.bed.gz > /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db6/jrhj1l5n` (24466)\n", - "
\n",
-      "
\n", - "Command completed. Elapsed time: 0:00:00. Running peak memory: 0.003GB. \n", - " PID: 24466;\tCommand: zcat;\tReturn code: 0;\tMemory used: 0.003GB\n", - "\n", - "\n", - "> `bash /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedqc/est_line.sh /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db6/jrhj1l5n `\n", - "File (/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db6/jrhj1l5n) has passed Quality Control!\n", - "Generating bigBed files for: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE91663_ENCFF319TPR_conservative_idr_thresholded_peaks_GRCh38.bed.gz\n", - "Determining path to chrom.sizes asset via Refgenie.\n", - "Reading refgenie genome configuration file from file: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/genome_config.yaml\n", - "/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes\n", - "Determined path to chrom.sizes asset: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes\n", - "Config: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml.\n", - "Initialize DBBackend\n", - "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/parsed_schema.py:284: RuntimeWarning: fields may not start with an underscore, ignoring \"_pipeline_name\"\n", - " return create_model(\n", - "Traceback (most recent call last):\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py\", line 689, in _engine\n", - "Using default schema: /home/bnt4me/virginia/venv/jupyter/bin/pipestat_output_schema.yaml\n", - " return self.db_engine_key\n", - "AttributeError: 'DBBackend' object has no attribute 'db_engine_key'\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/home/bnt4me/virginia/venv/jupyter/bin/bedboss\", line 8, in \n", - " sys.exit(main())\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py\", line 180, in main\n", - " run_all(pm=pm, **args_dict)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py\", line 138, in run_all\n", - " bedstat(\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedstat/bedstat.py\", line 103, in bedstat\n", - " bbc = bbconf.BedBaseConf(config_path=bedbase_config, database_only=True)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bbconf/bbconf.py\", line 72, in __init__\n", - " BED_TABLE: pipestat.PipestatManager(\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/pipestat.py\", line 161, in __init__\n", - " self.backend = DBBackend(\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py\", line 63, in __init__\n", - " SQLModel.metadata.create_all(self._engine)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py\", line 694, in _engine\n", - " self.db_engine_key = create_engine(self.db_url, echo=self.show_db_logs)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlmodel/engine/create.py\", line 139, in create_engine\n", - " return _create_engine(url, **current_kwargs) # type: ignore\n", - " File \"\", line 2, in create_engine\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/util/deprecations.py\", line 309, in warned\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " return fn(*args, **kwargs)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/create.py\", line 518, in create_engine\n", - " u = _url.make_url(url)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py\", line 725, in make_url\n", - " return _parse_rfc1738_args(name_or_url)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py\", line 781, in _parse_rfc1738_args\n", - " components[\"port\"] = int(components[\"port\"])\n", - "ValueError: invalid literal for int() with base 10: '%24POSTGRES_PORT'\n", - "Starting cleanup: 1 files; 0 conditional files for cleanup\n", - "\n", - "Cleaning up flagged intermediate files. . .\n", - "\n", - "### Pipeline failed at: (08-14 09:27:40) elapsed: 0.0 _TIME_\n", - "\n", - "Total time: 0:00:00\n", - "Failure reason: Pipeline failure. See details above.\n", - "Exception ignored in atexit callback: >\n", - "Traceback (most recent call last):\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py\", line 2191, in _exit_handler\n", - " self.fail_pipeline(Exception(\"Pipeline failure. See details above.\"))\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py\", line 2036, in fail_pipeline\n", - " raise exc\n", - "Exception: Pipeline failure. See details above.\n", - "\u001b[36m## [7 of 11] sample: bedbase_demo_db7; pipeline: BEDBOSS\u001b[0m\n", - "Calling pre-submit function: looper.write_sample_yaml\n", - "Writing script to /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedbase_demo_db7.sub\n", - "Job script (n=1; 0.00Gb): /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedbase_demo_db7.sub\n", - "Compute node: bnt4me-Precision-5560\n", - "Start time: 2023-08-14 09:27:40\n", - "Using default config. No config found in env var: PIPESTAT_CONFIG\n", - "Config: None.\n", - "No schema supplied.\n", - "Initialize FileBackend\n", - "Warning: You're running an interactive python session. This works, but pypiper cannot tee the output, so results are only logged to screen.\n", - "### Pipeline run code and environment:\n", - "\n", - "* Command: `/home/bnt4me/virginia/venv/jupyter/bin/bedboss all --sample-name bedbase_demo_db7 --input-file /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE91663_ENCFF553KIK_optimal_idr_thresholded_peaks_GRCh38.bed.gz --input-type bed --genome hg38 --sample-yaml /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/bedbase_demo_db7_sample.yaml --output_folder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs --narrowpeak True --rfg-config genome_config.yaml --bedbase-config /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml --outfolder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs`\n", - "* Compute host: bnt4me-Precision-5560\n", - "* Working dir: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial\n", - "* Outfolder: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/\n", - "* Pipeline started at: (08-14 09:27:40) elapsed: 0.0 _TIME_\n", - "\n", - "### Version log:\n", - "\n", - "* Python version: 3.10.12\n", - "* Pypiper dir: `/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper`\n", - "* Pypiper version: 0.13.2\n", - "* Pipeline dir: `/home/bnt4me/virginia/venv/jupyter/bin`\n", - "* Pipeline version: 0.1.0a2\n", - "\n", - "### Arguments passed to pipeline:\n", - "\n", - "\n", - "### Initialized Pipestat Object:\n", - "\n", - "* PipestatManager (bedboss-pipeline)\n", - "* Backend: File\n", - "* - results: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/stats.yaml\n", - "* - status: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs\n", - "* Multiple Pipelines Allowed: True\n", - "* Pipeline name: bedboss-pipeline\n", - "* Pipeline type: sample\n", - "* Status Schema key: None\n", - "* Results formatter: default_formatter\n", - "* Results schema source: None\n", - "* Status schema source: None\n", - "* Records count: 2\n", - "* Sample name: DEFAULT_SAMPLE_NAME\n", - "\n", - "\n", - "----------------------------------------\n", - "\n", - "Unused arguments: {'command': 'all', 'silent': False, 'verbosity': None, 'logdev': False}\n", - "Getting Open Signal Matrix file path...\n", - "output_bed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE91663_ENCFF553KIK_optimal_idr_thresholded_peaks_GRCh38.bed.gz\n", - "output_bigbed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bigbed_files\n", - "Got input type: bed\n", - "Converting /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE91663_ENCFF553KIK_optimal_idr_thresholded_peaks_GRCh38.bed.gz to BED format.\n", - "Target exists: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE91663_ENCFF553KIK_optimal_idr_thresholded_peaks_GRCh38.bed.gz` \n", - "Running bedqc...\n", - "Unused arguments: {}\n", - "Target to produce: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db7/9r0q9410` \n", - "\n", - "> `zcat /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSE91663_ENCFF553KIK_optimal_idr_thresholded_peaks_GRCh38.bed.gz > /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db7/9r0q9410` (24496)\n", - "
\n",
-      "
\n", - "Command completed. Elapsed time: 0:00:00. Running peak memory: 0.003GB. \n", - " PID: 24496;\tCommand: zcat;\tReturn code: 0;\tMemory used: 0.003GB\n", - "\n", - "\n", - "> `bash /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedqc/est_line.sh /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db7/9r0q9410 `\n", - "File (/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db7/9r0q9410) has passed Quality Control!\n", - "Generating bigBed files for: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSE91663_ENCFF553KIK_optimal_idr_thresholded_peaks_GRCh38.bed.gz\n", - "Determining path to chrom.sizes asset via Refgenie.\n", - "Reading refgenie genome configuration file from file: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/genome_config.yaml\n", - "/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes\n", - "Determined path to chrom.sizes asset: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes\n", - "Config: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml.\n", - "Initialize DBBackend\n", - "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/parsed_schema.py:284: RuntimeWarning: fields may not start with an underscore, ignoring \"_pipeline_name\"\n", - " return create_model(\n", - "Traceback (most recent call last):\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py\", line 689, in _engine\n", - "Using default schema: /home/bnt4me/virginia/venv/jupyter/bin/pipestat_output_schema.yaml\n", - " return self.db_engine_key\n", - "AttributeError: 'DBBackend' object has no attribute 'db_engine_key'\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/home/bnt4me/virginia/venv/jupyter/bin/bedboss\", line 8, in \n", - " sys.exit(main())\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py\", line 180, in main\n", - " run_all(pm=pm, **args_dict)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py\", line 138, in run_all\n", - " bedstat(\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedstat/bedstat.py\", line 103, in bedstat\n", - " bbc = bbconf.BedBaseConf(config_path=bedbase_config, database_only=True)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bbconf/bbconf.py\", line 72, in __init__\n", - " BED_TABLE: pipestat.PipestatManager(\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/pipestat.py\", line 161, in __init__\n", - " self.backend = DBBackend(\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py\", line 63, in __init__\n", - " SQLModel.metadata.create_all(self._engine)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py\", line 694, in _engine\n", - " self.db_engine_key = create_engine(self.db_url, echo=self.show_db_logs)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlmodel/engine/create.py\", line 139, in create_engine\n", - " return _create_engine(url, **current_kwargs) # type: ignore\n", - " File \"\", line 2, in create_engine\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/util/deprecations.py\", line 309, in warned\n", - " return fn(*args, **kwargs)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/create.py\", line 518, in create_engine\n", - " u = _url.make_url(url)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py\", line 725, in make_url\n", - " return _parse_rfc1738_args(name_or_url)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py\", line 781, in _parse_rfc1738_args\n", - " components[\"port\"] = int(components[\"port\"])\n", - "ValueError: invalid literal for int() with base 10: '%24POSTGRES_PORT'\n", - "Starting cleanup: 1 files; 0 conditional files for cleanup\n", - "\n", - "Cleaning up flagged intermediate files. . .\n", - "\n", - "### Pipeline failed at: (08-14 09:27:40) elapsed: 0.0 _TIME_\n", - "\n", - "Total time: 0:00:00\n", - "Failure reason: Pipeline failure. See details above.\n", - "Exception ignored in atexit callback: >\n", - "Traceback (most recent call last):\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py\", line 2191, in _exit_handler\n", - " self.fail_pipeline(Exception(\"Pipeline failure. See details above.\"))\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py\", line 2036, in fail_pipeline\n", - " raise exc\n", - "Exception: Pipeline failure. See details above.\n", - "\u001b[36m## [8 of 11] sample: bedbase_demo_db8; pipeline: BEDBOSS\u001b[0m\n", - "Calling pre-submit function: looper.write_sample_yaml\n", - "Writing script to /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedbase_demo_db8.sub\n", - "Job script (n=1; 0.00Gb): /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedbase_demo_db8.sub\n", - "Compute node: bnt4me-Precision-5560\n", - "Start time: 2023-08-14 09:27:40\n", - "Using default config. No config found in env var: PIPESTAT_CONFIG\n", - "Config: None.\n", - "No schema supplied.\n", - "Initialize FileBackend\n", - "Warning: You're running an interactive python session. This works, but pypiper cannot tee the output, so results are only logged to screen.\n", - "### Pipeline run code and environment:\n", - "\n", - "* Command: `/home/bnt4me/virginia/venv/jupyter/bin/bedboss all --sample-name bedbase_demo_db8 --input-file /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSM2423312_ENCFF155HVK_peaks_GRCh38.bed.gz --input-type bed --genome hg38 --sample-yaml /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/bedbase_demo_db8_sample.yaml --output_folder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs --narrowpeak True --rfg-config genome_config.yaml --bedbase-config /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml --outfolder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs`\n", - "* Compute host: bnt4me-Precision-5560\n", - "* Working dir: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial\n", - "* Outfolder: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/\n", - "* Pipeline started at: (08-14 09:27:41) elapsed: 0.0 _TIME_\n", - "\n", - "### Version log:\n", - "\n", - "* Python version: 3.10.12\n", - "* Pypiper dir: `/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper`\n", - "* Pypiper version: 0.13.2\n", - "* Pipeline dir: `/home/bnt4me/virginia/venv/jupyter/bin`\n", - "* Pipeline version: 0.1.0a2\n", - "\n", - "### Arguments passed to pipeline:\n", - "\n", - "\n", - "### Initialized Pipestat Object:\n", - "\n", - "* PipestatManager (bedboss-pipeline)\n", - "* Backend: File\n", - "* - results: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/stats.yaml\n", - "* - status: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs\n", - "* Multiple Pipelines Allowed: True\n", - "* Pipeline name: bedboss-pipeline\n", - "* Pipeline type: sample\n", - "* Status Schema key: None\n", - "* Results formatter: default_formatter\n", - "* Results schema source: None\n", - "* Status schema source: None\n", - "* Records count: 2\n", - "* Sample name: DEFAULT_SAMPLE_NAME\n", - "\n", - "\n", - "----------------------------------------\n", - "\n", - "Unused arguments: {'command': 'all', 'silent': False, 'verbosity': None, 'logdev': False}\n", - "Getting Open Signal Matrix file path...\n", - "output_bed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSM2423312_ENCFF155HVK_peaks_GRCh38.bed.gz\n", - "output_bigbed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bigbed_files\n", - "Got input type: bed\n", - "Converting /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSM2423312_ENCFF155HVK_peaks_GRCh38.bed.gz to BED format.\n", - "Target exists: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSM2423312_ENCFF155HVK_peaks_GRCh38.bed.gz` \n", - "Running bedqc...\n", - "Unused arguments: {}\n", - "Target to produce: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db8/ny2pxb01` \n", - "\n", - "> `zcat /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSM2423312_ENCFF155HVK_peaks_GRCh38.bed.gz > /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db8/ny2pxb01` (24527)\n", - "
\n",
-      "
\n", - "Command completed. Elapsed time: 0:00:00. Running peak memory: 0.003GB. \n", - " PID: 24527;\tCommand: zcat;\tReturn code: 0;\tMemory used: 0.003GB\n", - "\n", - "\n", - "> `bash /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedqc/est_line.sh /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db8/ny2pxb01 `\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File (/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db8/ny2pxb01) has passed Quality Control!\n", - "Generating bigBed files for: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSM2423312_ENCFF155HVK_peaks_GRCh38.bed.gz\n", - "Determining path to chrom.sizes asset via Refgenie.\n", - "Reading refgenie genome configuration file from file: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/genome_config.yaml\n", - "/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes\n", - "Determined path to chrom.sizes asset: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes\n", - "Config: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml.\n", - "Initialize DBBackend\n", - "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/parsed_schema.py:284: RuntimeWarning: fields may not start with an underscore, ignoring \"_pipeline_name\"\n", - " return create_model(\n", - "Using default schema: /home/bnt4me/virginia/venv/jupyter/bin/pipestat_output_schema.yaml\n", - "Traceback (most recent call last):\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py\", line 689, in _engine\n", - " return self.db_engine_key\n", - "AttributeError: 'DBBackend' object has no attribute 'db_engine_key'\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/home/bnt4me/virginia/venv/jupyter/bin/bedboss\", line 8, in \n", - " sys.exit(main())\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py\", line 180, in main\n", - " run_all(pm=pm, **args_dict)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py\", line 138, in run_all\n", - " bedstat(\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedstat/bedstat.py\", line 103, in bedstat\n", - " bbc = bbconf.BedBaseConf(config_path=bedbase_config, database_only=True)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bbconf/bbconf.py\", line 72, in __init__\n", - " BED_TABLE: pipestat.PipestatManager(\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/pipestat.py\", line 161, in __init__\n", - " self.backend = DBBackend(\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py\", line 63, in __init__\n", - " SQLModel.metadata.create_all(self._engine)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py\", line 694, in _engine\n", - " self.db_engine_key = create_engine(self.db_url, echo=self.show_db_logs)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlmodel/engine/create.py\", line 139, in create_engine\n", - " return _create_engine(url, **current_kwargs) # type: ignore\n", - " File \"\", line 2, in create_engine\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/util/deprecations.py\", line 309, in warned\n", - " return fn(*args, **kwargs)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/create.py\", line 518, in create_engine\n", - " u = _url.make_url(url)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py\", line 725, in make_url\n", - " return _parse_rfc1738_args(name_or_url)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py\", line 781, in _parse_rfc1738_args\n", - " components[\"port\"] = int(components[\"port\"])\n", - "ValueError: invalid literal for int() with base 10: '%24POSTGRES_PORT'\n", - "Starting cleanup: 1 files; 0 conditional files for cleanup\n", - "\n", - "Cleaning up flagged intermediate files. . .\n", - "\n", - "### Pipeline failed at: (08-14 09:27:41) elapsed: 0.0 _TIME_\n", - "\n", - "Total time: 0:00:00\n", - "Failure reason: Pipeline failure. See details above.\n", - "Exception ignored in atexit callback: >\n", - "Traceback (most recent call last):\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py\", line 2191, in _exit_handler\n", - " self.fail_pipeline(Exception(\"Pipeline failure. See details above.\"))\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py\", line 2036, in fail_pipeline\n", - " raise exc\n", - "Exception: Pipeline failure. See details above.\n", - "\u001b[36m## [9 of 11] sample: bedhost_demo_db9; pipeline: BEDBOSS\u001b[0m\n", - "Calling pre-submit function: looper.write_sample_yaml\n", - "Writing script to /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedhost_demo_db9.sub\n", - "Job script (n=1; 0.00Gb): /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedhost_demo_db9.sub\n", - "Compute node: bnt4me-Precision-5560\n", - "Start time: 2023-08-14 09:27:41\n", - "Using default config. No config found in env var: PIPESTAT_CONFIG\n", - "Config: None.\n", - "No schema supplied.\n", - "Initialize FileBackend\n", - "Warning: You're running an interactive python session. This works, but pypiper cannot tee the output, so results are only logged to screen.\n", - "### Pipeline run code and environment:\n", - "\n", - "* Command: `/home/bnt4me/virginia/venv/jupyter/bin/bedboss all --sample-name bedhost_demo_db9 --input-file /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSM2423313_ENCFF722AOG_peaks_GRCh38.bed.gz --input-type bed --genome hg38 --sample-yaml /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/bedhost_demo_db9_sample.yaml --output_folder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs --narrowpeak True --rfg-config genome_config.yaml --bedbase-config /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml --outfolder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs`\n", - "* Compute host: bnt4me-Precision-5560\n", - "* Working dir: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial\n", - "* Outfolder: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/\n", - "* Pipeline started at: (08-14 09:27:42) elapsed: 0.0 _TIME_\n", - "\n", - "### Version log:\n", - "\n", - "* Python version: 3.10.12\n", - "* Pypiper dir: `/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper`\n", - "* Pypiper version: 0.13.2\n", - "* Pipeline dir: `/home/bnt4me/virginia/venv/jupyter/bin`\n", - "* Pipeline version: 0.1.0a2\n", - "\n", - "### Arguments passed to pipeline:\n", - "\n", - "\n", - "### Initialized Pipestat Object:\n", - "\n", - "* PipestatManager (bedboss-pipeline)\n", - "* Backend: File\n", - "* - results: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/stats.yaml\n", - "* - status: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs\n", - "* Multiple Pipelines Allowed: True\n", - "* Pipeline name: bedboss-pipeline\n", - "* Pipeline type: sample\n", - "* Status Schema key: None\n", - "* Results formatter: default_formatter\n", - "* Results schema source: None\n", - "* Status schema source: None\n", - "* Records count: 2\n", - "* Sample name: DEFAULT_SAMPLE_NAME\n", - "\n", - "\n", - "----------------------------------------\n", - "\n", - "Unused arguments: {'command': 'all', 'silent': False, 'verbosity': None, 'logdev': False}\n", - "Getting Open Signal Matrix file path...\n", - "output_bed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSM2423313_ENCFF722AOG_peaks_GRCh38.bed.gz\n", - "output_bigbed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bigbed_files\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Got input type: bed\n", - "Converting /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSM2423313_ENCFF722AOG_peaks_GRCh38.bed.gz to BED format.\n", - "Target exists: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSM2423313_ENCFF722AOG_peaks_GRCh38.bed.gz` \n", - "Running bedqc...\n", - "Unused arguments: {}\n", - "Target to produce: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedhost_demo_db9/h6i4w9_0` \n", - "\n", - "> `zcat /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSM2423313_ENCFF722AOG_peaks_GRCh38.bed.gz > /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedhost_demo_db9/h6i4w9_0` (24559)\n", - "
\n",
-      "
\n", - "Command completed. Elapsed time: 0:00:00. Running peak memory: 0.003GB. \n", - " PID: 24559;\tCommand: zcat;\tReturn code: 0;\tMemory used: 0.003GB\n", - "\n", - "\n", - "> `bash /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedqc/est_line.sh /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedhost_demo_db9/h6i4w9_0 `\n", - "File (/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedhost_demo_db9/h6i4w9_0) has passed Quality Control!\n", - "Generating bigBed files for: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSM2423313_ENCFF722AOG_peaks_GRCh38.bed.gz\n", - "Determining path to chrom.sizes asset via Refgenie.\n", - "Reading refgenie genome configuration file from file: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/genome_config.yaml\n", - "/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes\n", - "Determined path to chrom.sizes asset: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes\n", - "Config: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml.\n", - "Initialize DBBackend\n", - "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/parsed_schema.py:284: RuntimeWarning: fields may not start with an underscore, ignoring \"_pipeline_name\"\n", - " return create_model(\n", - "Traceback (most recent call last):\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py\", line 689, in _engine\n", - "Using default schema: /home/bnt4me/virginia/venv/jupyter/bin/pipestat_output_schema.yaml\n", - " return self.db_engine_key\n", - "AttributeError: 'DBBackend' object has no attribute 'db_engine_key'\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/home/bnt4me/virginia/venv/jupyter/bin/bedboss\", line 8, in \n", - " sys.exit(main())\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py\", line 180, in main\n", - " run_all(pm=pm, **args_dict)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py\", line 138, in run_all\n", - " bedstat(\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedstat/bedstat.py\", line 103, in bedstat\n", - " bbc = bbconf.BedBaseConf(config_path=bedbase_config, database_only=True)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bbconf/bbconf.py\", line 72, in __init__\n", - " BED_TABLE: pipestat.PipestatManager(\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/pipestat.py\", line 161, in __init__\n", - " self.backend = DBBackend(\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py\", line 63, in __init__\n", - " SQLModel.metadata.create_all(self._engine)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py\", line 694, in _engine\n", - " self.db_engine_key = create_engine(self.db_url, echo=self.show_db_logs)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlmodel/engine/create.py\", line 139, in create_engine\n", - " return _create_engine(url, **current_kwargs) # type: ignore\n", - " File \"\", line 2, in create_engine\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/util/deprecations.py\", line 309, in warned\n", - " return fn(*args, **kwargs)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/create.py\", line 518, in create_engine\n", - " u = _url.make_url(url)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py\", line 725, in make_url\n", - " return _parse_rfc1738_args(name_or_url)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py\", line 781, in _parse_rfc1738_args\n", - " components[\"port\"] = int(components[\"port\"])\n", - "ValueError: invalid literal for int() with base 10: '%24POSTGRES_PORT'\n", - "Starting cleanup: 1 files; 0 conditional files for cleanup\n", - "\n", - "Cleaning up flagged intermediate files. . .\n", - "\n", - "### Pipeline failed at: (08-14 09:27:42) elapsed: 0.0 _TIME_\n", - "\n", - "Total time: 0:00:00\n", - "Failure reason: Pipeline failure. See details above.\n", - "Exception ignored in atexit callback: >\n", - "Traceback (most recent call last):\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py\", line 2191, in _exit_handler\n", - " self.fail_pipeline(Exception(\"Pipeline failure. See details above.\"))\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py\", line 2036, in fail_pipeline\n", - " raise exc\n", - "Exception: Pipeline failure. See details above.\n", - "\u001b[36m## [10 of 11] sample: bedbase_demo_db10; pipeline: BEDBOSS\u001b[0m\n", - "Calling pre-submit function: looper.write_sample_yaml\n", - "Writing script to /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedbase_demo_db10.sub\n", - "Job script (n=1; 0.00Gb): /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedbase_demo_db10.sub\n", - "Compute node: bnt4me-Precision-5560\n", - "Start time: 2023-08-14 09:27:42\n", - "Using default config. No config found in env var: PIPESTAT_CONFIG\n", - "Config: None.\n", - "No schema supplied.\n", - "Initialize FileBackend\n", - "Warning: You're running an interactive python session. This works, but pypiper cannot tee the output, so results are only logged to screen.\n", - "### Pipeline run code and environment:\n", - "\n", - "* Command: `/home/bnt4me/virginia/venv/jupyter/bin/bedboss all --sample-name bedbase_demo_db10 --input-file /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSM2827349_ENCFF196DNQ_peaks_GRCh38.bed.gz --input-type bed --genome hg38 --sample-yaml /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/bedbase_demo_db10_sample.yaml --output_folder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs --narrowpeak True --rfg-config genome_config.yaml --bedbase-config /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml --outfolder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs`\n", - "* Compute host: bnt4me-Precision-5560\n", - "* Working dir: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial\n", - "* Outfolder: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "* Pipeline started at: (08-14 09:27:43) elapsed: 0.0 _TIME_\n", - "\n", - "### Version log:\n", - "\n", - "* Python version: 3.10.12\n", - "* Pypiper dir: `/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper`\n", - "* Pypiper version: 0.13.2\n", - "* Pipeline dir: `/home/bnt4me/virginia/venv/jupyter/bin`\n", - "* Pipeline version: 0.1.0a2\n", - "\n", - "### Arguments passed to pipeline:\n", - "\n", - "\n", - "### Initialized Pipestat Object:\n", - "\n", - "* PipestatManager (bedboss-pipeline)\n", - "* Backend: File\n", - "* - results: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/stats.yaml\n", - "* - status: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs\n", - "* Multiple Pipelines Allowed: True\n", - "* Pipeline name: bedboss-pipeline\n", - "* Pipeline type: sample\n", - "* Status Schema key: None\n", - "* Results formatter: default_formatter\n", - "* Results schema source: None\n", - "* Status schema source: None\n", - "* Records count: 2\n", - "* Sample name: DEFAULT_SAMPLE_NAME\n", - "\n", - "\n", - "----------------------------------------\n", - "\n", - "Unused arguments: {'command': 'all', 'silent': False, 'verbosity': None, 'logdev': False}\n", - "Getting Open Signal Matrix file path...\n", - "output_bed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSM2827349_ENCFF196DNQ_peaks_GRCh38.bed.gz\n", - "output_bigbed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bigbed_files\n", - "Got input type: bed\n", - "Converting /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSM2827349_ENCFF196DNQ_peaks_GRCh38.bed.gz to BED format.\n", - "Target exists: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSM2827349_ENCFF196DNQ_peaks_GRCh38.bed.gz` \n", - "Running bedqc...\n", - "Unused arguments: {}\n", - "Target to produce: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db10/l3b3cyqx` \n", - "\n", - "> `zcat /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSM2827349_ENCFF196DNQ_peaks_GRCh38.bed.gz > /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db10/l3b3cyqx` (24590)\n", - "
\n",
-      "
\n", - "Command completed. Elapsed time: 0:00:00. Running peak memory: 0.003GB. \n", - " PID: 24590;\tCommand: zcat;\tReturn code: 0;\tMemory used: 0.003GB\n", - "\n", - "\n", - "> `bash /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedqc/est_line.sh /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db10/l3b3cyqx `\n", - "File (/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db10/l3b3cyqx) has passed Quality Control!\n", - "Generating bigBed files for: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSM2827349_ENCFF196DNQ_peaks_GRCh38.bed.gz\n", - "Determining path to chrom.sizes asset via Refgenie.\n", - "Reading refgenie genome configuration file from file: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/genome_config.yaml\n", - "/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes\n", - "Determined path to chrom.sizes asset: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes\n", - "Config: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml.\n", - "Initialize DBBackend\n", - "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/parsed_schema.py:284: RuntimeWarning: fields may not start with an underscore, ignoring \"_pipeline_name\"\n", - " return create_model(\n", - "Traceback (most recent call last):\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py\", line 689, in _engine\n", - "Using default schema: /home/bnt4me/virginia/venv/jupyter/bin/pipestat_output_schema.yaml\n", - " return self.db_engine_key\n", - "AttributeError: 'DBBackend' object has no attribute 'db_engine_key'\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/home/bnt4me/virginia/venv/jupyter/bin/bedboss\", line 8, in \n", - " sys.exit(main())\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py\", line 180, in main\n", - " run_all(pm=pm, **args_dict)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py\", line 138, in run_all\n", - " bedstat(\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedstat/bedstat.py\", line 103, in bedstat\n", - " bbc = bbconf.BedBaseConf(config_path=bedbase_config, database_only=True)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bbconf/bbconf.py\", line 72, in __init__\n", - " BED_TABLE: pipestat.PipestatManager(\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/pipestat.py\", line 161, in __init__\n", - " self.backend = DBBackend(\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py\", line 63, in __init__\n", - " SQLModel.metadata.create_all(self._engine)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py\", line 694, in _engine\n", - " self.db_engine_key = create_engine(self.db_url, echo=self.show_db_logs)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlmodel/engine/create.py\", line 139, in create_engine\n", - " return _create_engine(url, **current_kwargs) # type: ignore\n", - " File \"\", line 2, in create_engine\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/util/deprecations.py\", line 309, in warned\n", - " return fn(*args, **kwargs)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/create.py\", line 518, in create_engine\n", - " u = _url.make_url(url)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py\", line 725, in make_url\n", - " return _parse_rfc1738_args(name_or_url)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py\", line 781, in _parse_rfc1738_args\n", - " components[\"port\"] = int(components[\"port\"])\n", - "ValueError: invalid literal for int() with base 10: '%24POSTGRES_PORT'\n", - "Starting cleanup: 1 files; 0 conditional files for cleanup\n", - "\n", - "Cleaning up flagged intermediate files. . .\n", - "\n", - "### Pipeline failed at: (08-14 09:27:43) elapsed: 0.0 _TIME_\n", - "\n", - "Total time: 0:00:00\n", - "Failure reason: Pipeline failure. See details above.\n", - "Exception ignored in atexit callback: >\n", - "Traceback (most recent call last):\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py\", line 2191, in _exit_handler\n", - " self.fail_pipeline(Exception(\"Pipeline failure. See details above.\"))\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py\", line 2036, in fail_pipeline\n", - " raise exc\n", - "Exception: Pipeline failure. See details above.\n", - "\u001b[36m## [11 of 11] sample: bedbase_demo_db11; pipeline: BEDBOSS\u001b[0m\n", - "Calling pre-submit function: looper.write_sample_yaml\n", - "Writing script to /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedbase_demo_db11.sub\n", - "Job script (n=1; 0.00Gb): /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/BEDBOSS_bedbase_demo_db11.sub\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Compute node: bnt4me-Precision-5560\n", - "Start time: 2023-08-14 09:27:43\n", - "Using default config. No config found in env var: PIPESTAT_CONFIG\n", - "Config: None.\n", - "No schema supplied.\n", - "Initialize FileBackend\n", - "Warning: You're running an interactive python session. This works, but pypiper cannot tee the output, so results are only logged to screen.\n", - "### Pipeline run code and environment:\n", - "\n", - "* Command: `/home/bnt4me/virginia/venv/jupyter/bin/bedboss all --sample-name bedbase_demo_db11 --input-file /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSM2827350_ENCFF928JXU_peaks_GRCh38.bed.gz --input-type bed --genome hg38 --sample-yaml /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/submission/bedbase_demo_db11_sample.yaml --output_folder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs --narrowpeak True --rfg-config genome_config.yaml --bedbase-config /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml --outfolder /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs`\n", - "* Compute host: bnt4me-Precision-5560\n", - "* Working dir: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial\n", - "* Outfolder: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/\n", - "* Pipeline started at: (08-14 09:27:44) elapsed: 0.0 _TIME_\n", - "\n", - "### Version log:\n", - "\n", - "* Python version: 3.10.12\n", - "* Pypiper dir: `/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper`\n", - "* Pypiper version: 0.13.2\n", - "* Pipeline dir: `/home/bnt4me/virginia/venv/jupyter/bin`\n", - "* Pipeline version: 0.1.0a2\n", - "\n", - "### Arguments passed to pipeline:\n", - "\n", - "\n", - "### Initialized Pipestat Object:\n", - "\n", - "* PipestatManager (bedboss-pipeline)\n", - "* Backend: File\n", - "* - results: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/stats.yaml\n", - "* - status: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs\n", - "* Multiple Pipelines Allowed: True\n", - "* Pipeline name: bedboss-pipeline\n", - "* Pipeline type: sample\n", - "* Status Schema key: None\n", - "* Results formatter: default_formatter\n", - "* Results schema source: None\n", - "* Status schema source: None\n", - "* Records count: 2\n", - "* Sample name: DEFAULT_SAMPLE_NAME\n", - "\n", - "\n", - "----------------------------------------\n", - "\n", - "Unused arguments: {'command': 'all', 'silent': False, 'verbosity': None, 'logdev': False}\n", - "Getting Open Signal Matrix file path...\n", - "output_bed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSM2827350_ENCFF928JXU_peaks_GRCh38.bed.gz\n", - "output_bigbed = /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bigbed_files\n", - "Got input type: bed\n", - "Converting /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSM2827350_ENCFF928JXU_peaks_GRCh38.bed.gz to BED format.\n", - "Target exists: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSM2827350_ENCFF928JXU_peaks_GRCh38.bed.gz` \n", - "Running bedqc...\n", - "Unused arguments: {}\n", - "Target to produce: `/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db11/2pfkxwx0` \n", - "\n", - "> `zcat /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/GSM2827350_ENCFF928JXU_peaks_GRCh38.bed.gz > /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db11/2pfkxwx0` (24621)\n", - "
\n",
-      "
\n", - "Command completed. Elapsed time: 0:00:00. Running peak memory: 0.003GB. \n", - " PID: 24621;\tCommand: zcat;\tReturn code: 0;\tMemory used: 0.003GB\n", - "\n", - "\n", - "> `bash /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedqc/est_line.sh /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db11/2pfkxwx0 `\n", - "File (/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/outputs/outputs/bedstat_output/bedstat_pipeline_logs/bed_files/bedmaker_logs/bedbase_demo_db11/2pfkxwx0) has passed Quality Control!\n", - "Generating bigBed files for: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/files/GSM2827350_ENCFF928JXU_peaks_GRCh38.bed.gz\n", - "Determining path to chrom.sizes asset via Refgenie.\n", - "Reading refgenie genome configuration file from file: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/genome_config.yaml\n", - "/home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes\n", - "Determined path to chrom.sizes asset: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes\n", - "Config: /home/bnt4me/virginia/repos/bedbase_all/bedbase/docs_jupyter/bedbase_tutorial/bedbase/tutorial_files/bedboss/config_db_local.yaml.\n", - "Initialize DBBackend\n", - "/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/parsed_schema.py:284: RuntimeWarning: fields may not start with an underscore, ignoring \"_pipeline_name\"\n", - " return create_model(\n", - "Traceback (most recent call last):\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py\", line 689, in _engine\n", - "Using default schema: /home/bnt4me/virginia/venv/jupyter/bin/pipestat_output_schema.yaml\n", - " return self.db_engine_key\n", - "AttributeError: 'DBBackend' object has no attribute 'db_engine_key'\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/home/bnt4me/virginia/venv/jupyter/bin/bedboss\", line 8, in \n", - " sys.exit(main())\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py\", line 180, in main\n", - " run_all(pm=pm, **args_dict)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedboss.py\", line 138, in run_all\n", - " bedstat(\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedstat/bedstat.py\", line 103, in bedstat\n", - " bbc = bbconf.BedBaseConf(config_path=bedbase_config, database_only=True)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bbconf/bbconf.py\", line 72, in __init__\n", - " BED_TABLE: pipestat.PipestatManager(\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/pipestat.py\", line 161, in __init__\n", - " self.backend = DBBackend(\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py\", line 63, in __init__\n", - " SQLModel.metadata.create_all(self._engine)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pipestat/backends/dbbackend.py\", line 694, in _engine\n", - " self.db_engine_key = create_engine(self.db_url, echo=self.show_db_logs)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlmodel/engine/create.py\", line 139, in create_engine\n", - " return _create_engine(url, **current_kwargs) # type: ignore\n", - " File \"\", line 2, in create_engine\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/util/deprecations.py\", line 309, in warned\n", - " return fn(*args, **kwargs)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/create.py\", line 518, in create_engine\n", - " u = _url.make_url(url)\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py\", line 725, in make_url\n", - " return _parse_rfc1738_args(name_or_url)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/sqlalchemy/engine/url.py\", line 781, in _parse_rfc1738_args\n", - " components[\"port\"] = int(components[\"port\"])\n", - "ValueError: invalid literal for int() with base 10: '%24POSTGRES_PORT'\n", - "Starting cleanup: 1 files; 0 conditional files for cleanup\n", - "\n", - "Cleaning up flagged intermediate files. . .\n", - "\n", - "### Pipeline failed at: (08-14 09:27:44) elapsed: 0.0 _TIME_\n", - "\n", - "Total time: 0:00:00\n", - "Failure reason: Pipeline failure. See details above.\n", - "Exception ignored in atexit callback: >\n", - "Traceback (most recent call last):\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py\", line 2191, in _exit_handler\n", - " self.fail_pipeline(Exception(\"Pipeline failure. See details above.\"))\n", - " File \"/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper/manager.py\", line 2036, in fail_pipeline\n", - " raise exc\n", - "Exception: Pipeline failure. See details above.\n", - "\n", - "Looper finished\n", - "Samples valid for job generation: 11 of 11\n", - "Commands submitted: 11 of 11\n", - "Jobs submitted: 11\n", - "\u001b[0m\n" - ] - } - ], - "source": [ - "looper run --looper-config ./bedbase/tutorial_files/bedboss/looper_config_bedboss.yaml --output-dir $BEDBASE_DATA_PATH_HOST/outputs/outputs/bedstat_output/bedstat_pipeline_logs" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### ❗❗ If You have errors in bedstat requirements:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "ename": "", - "evalue": "1", - "output_type": "error", - "traceback": [] - } - ], - "source": [ - "pip install -r bedstat/requirements.txt --user > requirements_log.txt" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Install R dependencies" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [], - "source": [ - "Rscript bedstat/scripts/installRdeps.R > R_deps.txt" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In case there is an issue installing `GenomicDistributionsData`, try:\n", - "```\n", - "wget http://big.databio.org/GenomicDistributionsData/GenomicDistributionsData_0.0.2.tar.gz\n", - "Rscript -e 'install.packages(\"GenomicDistributionsData_0.0.2.tar.gz\", type=\"source\", repos=NULL)'\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "There's an additional dependency needed by `bedstat` if we wish to calculate and plot the GC content of our bedfiles. Depending on the genome assemblies of the files listed on a PEP, the appropriate BSgenome packages should be installed. The following is an example of how we can do so:" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "if (!requireNamespace(\"BiocManager\", quietly = TRUE))\n", - " install.packages(\"BiocManager\")\n", - "\n", - "BiocManager::install(\"BSgenome.Hsapiens.UCSC.hg38.masked\")" - ] - } - ], - "source": [ - "cat bedbase/tutorial_files/scripts/BSgenome_install.R" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [], - "source": [ - "Rscript bedbase/tutorial_files/scripts/BSgenome_install.R > BSgenome.txt" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We'll need to create a directory where we can store the stats and plots generated by `bedstat`. Additionally, we'll create a directory where we can store log and metadata files that we'll need later on." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 4. BEDBUNCHER: Create bedsets and their respective statistics \n", - "\n", - "### Create a new PEP describing the bedset name and specific JSON query \n", - "\n", - "Now that we've processed several individual BED files, we'll turn to the next task: grouping them together into collections of BED files, which we call *bedsets*. For this, we use the `bedbuncher` pipeline, which produces outputs for each bedset, such as a bedset PEP, bedset-level statistics and plots, and an `IGD` database. To run `bedbuncher`, we will need another PEP describing each bedset. Though the annotation sheet below specifies attributes for one bedset, you can create as many as you wish using additional rows. For each bedset, you need to provide the query to retrieve certain collection BED files. \n", - "\n", - "The following example PEP shows the attributes we need to provide for each bedset and the config.yaml file that will grab the files needed to run `bedbuncher`:" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sample_name,bedset_name,genome,query,operator,query_val,bbconfig_name,bedbase_config\n", - "sample1,bedsetOver1kRegions,hg38,'regions_no',gt,\"\"\"1000\"\"\",bedbase_configuration_compose,source1\n", - "sample2,bedsetOver50GCContent,hg38,'gc_content',gt,\"\"\"0.5\"\"\",bedbase_configuration_compose,source1\n", - "sample3,bedsetUnder500MeanWidth,hg38,'mean_region_width',lt,\"\"\"500\"\"\",bedbase_configuration_compose,source1\n", - "sample4,bedsetTestSelectCellType,hg38,\"\"\"other::text~~:str_1 or other::text~~:str_2\"\"\",\"\"\"str_1,str_2\"\"\",\"\"\"%GM12878%,%HEK293%\"\"\",bedbase_configuration_compose,source1\n", - "sample5,bedsetTestSelectGenome,hg38,\"\"\"name=:name_1 or name=:name_2\"\"\",\"\"\"name_1,name_2\"\"\",\"\"\"GSE105587_ENCFF018NNF_conservative_idr_thresholded_peaks_GRCh38,GSE91663_ENCFF553KIK_optimal_idr_thresholded_peaks_GRCh38\"\"\",bedbase_configuration_compose,source1\n", - "sample6,bedsetTestCellType,hg38,\"\"\"other\"\"\",contains,\"\"\"\"\"{\\\"\"cell_type\\\"\":\\ \\\"\"K562\\\"\"}\"\"\"\"\",bedbase_configuration_compose,source1\n", - "sample7,bedsetTestSpace,hg38,\"\"\"other\"\"\",contains,\"\"\"\"\"{\\\"\"description\\\"\":\\ \\\"\"IKZF1\\ ChIP-seq\\ on\\ human\\ GM12878\\\"\"}\"\"\"\"\",bedbase_configuration_compose,source1\n", - "sample8,bedsetTestsSpaceMult,hg38,\"\"\"other::text~~:str_1 or other::text~~:str_2\"\"\",\"\"\"str_1,str_2\"\"\",\"\"\"%IKZF1 ChIP-seq on human GM12878%,%ZEB2 ChIP-seq on human K562 (ENCODE)%\"\"\",bedbase_configuration_compose,source1\n", - "sample9,bedsetTestSpace2,hg38,\"\"\"other\"\"\",contains,\"\"\"\"\"{\\\"\"description\\\"\":\\ \\\"\"HEK293\\ cell\\ line\\ stably\\ expressing\\ N-terminal\\ tagged\\ eGFP-GLI2\\ under\\ the\\ control\\ of\\ a\\ CMV\\ promoter\\\"\"}\"\"\"\"\",bedbase_configuration_compose,source1\n", - "sample10,bedsetTestsSpaceMult2,hg38,\"\"\"other::text~~:str_1 or other::text~~:str_2\"\"\",\"\"\"str_1,str_2\"\"\",\"\"\"%ZEB2 ChIP-seq on human K562 (ENCODE)%,%HEK293 cell line stably expressing N-terminal tagged eGFP-GLI2 under the control of a CMV promoter %\"\"\",bedbase_configuration_compose,source1\n" - ] - } - ], - "source": [ - "cat bedbase/tutorial_files/PEPs/bedbuncher_query.csv" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "pep_version: 2.0.0\n", - "sample_table: bedbuncher_query.csv\n", - "\n", - "looper:\n", - " output_dir: $BEDBASE_DATA_PATH_HOST/outputs/bedbuncher_output/bedbuncher_pipeline_logs\n", - "\n", - "sample_modifiers:\n", - " append:\n", - " pipeline_interfaces: $CODE/bedbuncher/pipeline_interface.yaml \n", - " derive:\n", - " attributes: [bedbase_config]\n", - " sources:\n", - " source1: $CODE/bedbase/tutorial_files/{bbconfig_name}.yaml\n" - ] - } - ], - "source": [ - "cat bedbase/tutorial_files/PEPs/bedbuncher_config.yaml" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Running `bedbuncher` with arguments defined in the example PEP above will result in a bedset with bedfiles that consist of at least 1000 regions." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create outputs directory and install bedbuncher command line dependencies\n", - "\n", - "We need a folder where we can store bedset related outputs. Though not required, we'll also create a directory where we can store the `bedbuncher` pipeline logs. " - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [], - "source": [ - "mkdir -p outputs/bedbuncher_output/bedbuncher_pipeline_logs" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "One of the feats of `bedbuncher` includes [IGD](https://github.com/databio/IGD) database creation from the files in the bedset. `IGD` can be installed by cloning the repository from github, executing the make file to create the binary, and pointing the binary location with the `$PATH` environment variable. " - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Cloning into 'IGD'...\n", - "remote: Enumerating objects: 1297, done.\u001b[K\n", - "remote: Counting objects: 100% (67/67), done.\u001b[K\n", - "remote: Compressing objects: 100% (50/50), done.\u001b[K\n", - "remote: Total 1297 (delta 35), reused 40 (delta 17), pack-reused 1230\u001b[K\n", - "Receiving objects: 100% (1297/1297), 949.45 KiB | 10.79 MiB/s, done.\n", - "Resolving deltas: 100% (804/804), done.\n" - ] - } - ], - "source": [ - "git clone git@github.com:databio/IGD\n", - "cd IGD\n", - "make > igd_make_log.txt 2>&1\n", - "cd ..\n", - "\n", - "export PATH=$BEDBASE_DATA_PATH_HOST/IGD/bin/:$PATH" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Run bedbuncher using Looper \n", - "\n", - "Once we have cloned the `bedbuncher` repository, set our local Postgres cluster and created the `iGD` binary, we can run the pipeline by pointing `looper run` to the appropriate `PEP` config file. As mentioned earlier, if the path to the bedbase configuration file has been stored in the `$BEDBASE` environment variable, it's not neccesary to pass the `--bedbase-config` argument. " - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Looper version: 1.3.1\n", - "Command: run\n", - "/home/bnt4me/.local/lib/python3.8/site-packages/divvy/compute.py:150: UserWarning: The '_file_path' property is deprecated and will be removed in a future release. Use ComputingConfiguration[\"__internal\"][\"_file_path\"] instead.\n", - " os.path.dirname(self._file_path),\n", - "/home/bnt4me/.local/lib/python3.8/site-packages/divvy/compute.py:58: UserWarning: The '_file_path' property is deprecated and will be removed in a future release. Use ComputingConfiguration[\"__internal\"][\"_file_path\"] instead.\n", - " self.config_file = self._file_path\n", - "Activating compute package 'local'\n", - "\u001b[36m## [1 of 10] sample: sample1; pipeline: BEDBUNCHER\u001b[0m\n", - "Writing script to /home/bnt4me/Virginia/bed_maker/bedbase_tutorial/outputs/bedbuncher_output/bedbuncher_pipeline_logs/submission/BEDBUNCHER_sample1.sub\n", - "Job script (n=1; 0.00Gb): /home/bnt4me/Virginia/bed_maker/bedbase_tutorial/outputs/bedbuncher_output/bedbuncher_pipeline_logs/submission/BEDBUNCHER_sample1.sub\n", - "\u001b[36m## [2 of 10] sample: sample2; pipeline: BEDBUNCHER\u001b[0m\n", - "Writing script to /home/bnt4me/Virginia/bed_maker/bedbase_tutorial/outputs/bedbuncher_output/bedbuncher_pipeline_logs/submission/BEDBUNCHER_sample2.sub\n", - "Job script (n=1; 0.00Gb): /home/bnt4me/Virginia/bed_maker/bedbase_tutorial/outputs/bedbuncher_output/bedbuncher_pipeline_logs/submission/BEDBUNCHER_sample2.sub\n", - "\u001b[36m## [3 of 10] sample: sample3; pipeline: BEDBUNCHER\u001b[0m\n", - "Writing script to /home/bnt4me/Virginia/bed_maker/bedbase_tutorial/outputs/bedbuncher_output/bedbuncher_pipeline_logs/submission/BEDBUNCHER_sample3.sub\n", - "Job script (n=1; 0.00Gb): /home/bnt4me/Virginia/bed_maker/bedbase_tutorial/outputs/bedbuncher_output/bedbuncher_pipeline_logs/submission/BEDBUNCHER_sample3.sub\n", - "\u001b[36m## [4 of 10] sample: sample4; pipeline: BEDBUNCHER\u001b[0m\n", - "Writing script to /home/bnt4me/Virginia/bed_maker/bedbase_tutorial/outputs/bedbuncher_output/bedbuncher_pipeline_logs/submission/BEDBUNCHER_sample4.sub\n", - "Job script (n=1; 0.00Gb): /home/bnt4me/Virginia/bed_maker/bedbase_tutorial/outputs/bedbuncher_output/bedbuncher_pipeline_logs/submission/BEDBUNCHER_sample4.sub\n", - "\u001b[36m## [5 of 10] sample: sample5; pipeline: BEDBUNCHER\u001b[0m\n", - "Writing script to /home/bnt4me/Virginia/bed_maker/bedbase_tutorial/outputs/bedbuncher_output/bedbuncher_pipeline_logs/submission/BEDBUNCHER_sample5.sub\n", - "Job script (n=1; 0.00Gb): /home/bnt4me/Virginia/bed_maker/bedbase_tutorial/outputs/bedbuncher_output/bedbuncher_pipeline_logs/submission/BEDBUNCHER_sample5.sub\n", - "\u001b[36m## [6 of 10] sample: sample6; pipeline: BEDBUNCHER\u001b[0m\n", - "Writing script to /home/bnt4me/Virginia/bed_maker/bedbase_tutorial/outputs/bedbuncher_output/bedbuncher_pipeline_logs/submission/BEDBUNCHER_sample6.sub\n", - "Job script (n=1; 0.00Gb): /home/bnt4me/Virginia/bed_maker/bedbase_tutorial/outputs/bedbuncher_output/bedbuncher_pipeline_logs/submission/BEDBUNCHER_sample6.sub\n", - "\u001b[36m## [7 of 10] sample: sample7; pipeline: BEDBUNCHER\u001b[0m\n", - "Writing script to /home/bnt4me/Virginia/bed_maker/bedbase_tutorial/outputs/bedbuncher_output/bedbuncher_pipeline_logs/submission/BEDBUNCHER_sample7.sub\n", - "Job script (n=1; 0.00Gb): /home/bnt4me/Virginia/bed_maker/bedbase_tutorial/outputs/bedbuncher_output/bedbuncher_pipeline_logs/submission/BEDBUNCHER_sample7.sub\n", - "\u001b[36m## [8 of 10] sample: sample8; pipeline: BEDBUNCHER\u001b[0m\n", - "Writing script to /home/bnt4me/Virginia/bed_maker/bedbase_tutorial/outputs/bedbuncher_output/bedbuncher_pipeline_logs/submission/BEDBUNCHER_sample8.sub\n", - "Job script (n=1; 0.00Gb): /home/bnt4me/Virginia/bed_maker/bedbase_tutorial/outputs/bedbuncher_output/bedbuncher_pipeline_logs/submission/BEDBUNCHER_sample8.sub\n", - "\u001b[36m## [9 of 10] sample: sample9; pipeline: BEDBUNCHER\u001b[0m\n", - "Writing script to /home/bnt4me/Virginia/bed_maker/bedbase_tutorial/outputs/bedbuncher_output/bedbuncher_pipeline_logs/submission/BEDBUNCHER_sample9.sub\n", - "Job script (n=1; 0.00Gb): /home/bnt4me/Virginia/bed_maker/bedbase_tutorial/outputs/bedbuncher_output/bedbuncher_pipeline_logs/submission/BEDBUNCHER_sample9.sub\n", - "\u001b[36m## [10 of 10] sample: sample10; pipeline: BEDBUNCHER\u001b[0m\n", - "Writing script to /home/bnt4me/Virginia/bed_maker/bedbase_tutorial/outputs/bedbuncher_output/bedbuncher_pipeline_logs/submission/BEDBUNCHER_sample10.sub\n", - "Job script (n=1; 0.00Gb): /home/bnt4me/Virginia/bed_maker/bedbase_tutorial/outputs/bedbuncher_output/bedbuncher_pipeline_logs/submission/BEDBUNCHER_sample10.sub\n", - "\n", - "Looper finished\n", - "Samples valid for job generation: 10 of 10\n", - "Commands submitted: 10 of 10\n", - "Jobs submitted: 10\n" - ] - } - ], - "source": [ - "looper run bedbase/tutorial_files/PEPs/bedbuncher_config.yaml --package local \\\n", - "--command-extra=\"-R\" > outputs/bedbuncher_output/bedbuncher_pipeline_logs/looper_logs.txt" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 5. BEDEMBED: \n", - "\n", - "### bedembed_train: Uses the StarSpace method to embed the bed files and the meta data." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We need to install [StarSpace](https://github.com/facebookresearch/StarSpace) first. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mkdir -p bedembed/tools" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We need to install [Boost](http://www.boost.org/) library and specify the path of boost library in makefile in order to run StarSpace." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "wget https://boostorg.jfrog.io/artifactory/main/release/1.78.0/source/boost_1_78_0.zip\n", - "unzip boost_1_78_0.zip\n", - "sudo mv boost_1_78_0 /usr/local/bin\n", - "cd /usr/local/bin/boost_1_78_0\n", - "./bootstrap.sh\n", - "./b2" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In order to build StarSpace on Mac OS or Linux, use the following:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cd $BEDBASE_DATA_PATH_HOST/bedembed/tools\n", - "git clone https://github.com/facebookresearch/Starspace.git\n", - "cd Starspace\n", - "make\n", - "make embed_doc" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We need a folder where we can store bedembed related outputs. Though not required, we'll also create a directory where we can store the bedembed pipeline logs." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mkdir -p outputs/bedembed_output/bedembed_pipeline_logs" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "path_starspace=$BEDBASE_DATA_PATH_HOST'/bedembed/tools/Starspace/starspace'\n", - "path_meta=$BEDBASE_DATA_PATH_HOST'/bedbase/tutorial_files/PEPs/bedstat_annotation_sheet.csv'\n", - "# download Universe file from rivanna\n", - "path_universe=$BEDBASE_DATA_PATH_HOST'/tiles1000.hg19.bed'\n", - "path_output=$BEDBASE_DATA_PATH_HOST'/outputs/bedembed_output/'\n", - "assembly='hg38'\n", - "path_data=$BEDBASE_DATA_PATH_HOST'/bed_files/'\n", - "labels=\"exp_protocol,cell_type,tissue,antibody,treatment\"\n", - "no_files=10\n", - "start_line=0\n", - "dim=50\n", - "epochs=20\n", - "learning_rate=0.001\n", - "\n", - "python ./bedembed/pipeline/bedembed_train.py -star $path_starspace -i $path_data -g $assembly -meta $path_meta -univ $path_universe \\\n", - "-l $labels -nof $no_files -o $path_output -startline $start_line -dim $dim -epochs $epochs -lr $learning_rate" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### bedembed_test: calculate the distances between file labels and trained search terms\n", - "\n", - "### Get a PEP describing the bedfiles to process \n", - "\n", - "We'll use the standard [PEP](http://pep.databio.org) format for the annotation, which consists of 1) a sample table (.csv) that annotates the files, and 2) a project config.yaml file that points to the sample annotation sheet. The config file also has other components, such as derived attributes, that in this case point to the bedfiles to be processed. Here is the PEP config file for this example project:" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "bedembed_version: 0.0.0\n", - "sample_table: bedstat_annotation_sheet.csv\n", - "\n", - "looper:\n", - " output-dir: $BEDBASE_DATA_PATH_HOST/outputs/bedembed_output/bedembed_pipeline_logs \n", - "sample_modifiers:\n", - " append:\n", - " bedbase_config: $BEDBASE_DATA_PATH_HOST/bedbase/tutorial_files/bedbase_configuration_compose.yaml\n", - " pipeline_interfaces: $BEDBASE_DATA_PATH_HOST/bedembed/pipeline_interface_test.yaml\n", - " universe: /project/shefflab/data/StarSpace/universe/universe_tilelen1000.bed\n", - " input_file_path: INPUT\n", - " output_file_path: $BEDBASE_DATA_PATH_HOST/outputs/bedembed_output\n", - " yaml_file: SAMPLE_YAML\n", - " derive:\n", - " attributes: [yaml_file, input_file_path]\n", - " sources:\n", - " INPUT: \"/project/shefflab/data/encode/{file_name}\"\n", - " SAMPLE_YAML: \"$BEDBASE_DATA_PATH_HOST/outputs/bedembed_output/bedembed_pipeline_logs/submission/{sample_name}_sample.yaml\"\n" - ] - } - ], - "source": [ - "cat bedbase/tutorial_files/PEPs/bedembed_test_config.yaml" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Run bedembed using Looper \n", - "\n", - "Once we have cloned the `bedembed` repository, set our local postgres cluster, we can run the pipeline by pointing `looper run` to the appropriate `PEP` config file. As mentioned earlier, if the path to the bedbase configuration file is provided, the calculated distances will report to the postgres database, if not it will save as a csv file in the `output_file_path`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "looper run bedbase/tutorial_files/PEPs/bedembed_test_config.yaml --package local" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 5. BEDHOST: Serve BED files and API to explore pipeline outputs\n", - "\n", - "The last part of the tutorial consists on running a local instance of `bedhost` (a REST API for `bedstat` and `bedbuncher` produced outputs) in order to explore plots, statistics and download pipeline outputs. \n", - "To run `bedhost`, frist use `bedhost-ui` to built the bedhost user interface with React." - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [], - "source": [ - "cd bedhost-ui\n", - "# Install node modules defined in package.json\n", - "npm install \n", - "# Build the app for production to the ./build folder\n", - "npm run build\n", - "# copy the contents of the ./build directory to bedhost/bedhost/static/bedhost-ui\n", - "cp -avr ./build ../bedhost/bedhost/static/bedhost-ui\n", - "\n", - "cd .." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To run `bedhost`, we'll pip install the package from the previously cloned repository:" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [], - "source": [ - "pip install bedhost/. --user > bedhost_log.txt" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To start `bedhost`, we simply need to run the following command passing the location of the bedbase configuration file to the `-c` flag. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Serving data for columns: ['md5sum']\n", - "Serving data for columns: ['md5sum']\n", - "Generating GraphQL schema\n", - "running bedhost app\n", - "\u001b[32mINFO\u001b[0m: Started server process [\u001b[36m648505\u001b[0m]\n", - "\u001b[32mINFO\u001b[0m: Waiting for application startup.\n", - "\u001b[32mINFO\u001b[0m: Application startup complete.\n", - "\u001b[32mINFO\u001b[0m: Uvicorn running on \u001b[1mhttp://0.0.0.0:8000\u001b[0m (Press CTRL+C to quit)\n", - "\u001b[32mINFO\u001b[0m: 127.0.0.1:47532 - \"\u001b[1mGET / HTTP/1.1\u001b[0m\" \u001b[32m200 OK\u001b[0m\n", - "\u001b[32mINFO\u001b[0m: 127.0.0.1:47532 - \"\u001b[1mGET /ui/static/css/2.fa6c921b.chunk.css HTTP/1.1\u001b[0m\" \u001b[32m200 OK\u001b[0m\n", - "\u001b[32mINFO\u001b[0m: 127.0.0.1:47534 - \"\u001b[1mGET /ui/static/css/main.4620a2c9.chunk.css HTTP/1.1\u001b[0m\" \u001b[32m200 OK\u001b[0m\n", - "\u001b[32mINFO\u001b[0m: 127.0.0.1:47536 - \"\u001b[1mGET /ui/static/js/2.b0639060.chunk.js HTTP/1.1\u001b[0m\" \u001b[32m200 OK\u001b[0m\n", - "\u001b[32mINFO\u001b[0m: 127.0.0.1:47534 - \"\u001b[1mGET /ui/static/js/main.56118e82.chunk.js HTTP/1.1\u001b[0m\" \u001b[32m200 OK\u001b[0m\n", - "\u001b[32mINFO\u001b[0m: 127.0.0.1:47536 - \"\u001b[1mGET /api/bed/all/data/count HTTP/1.1\u001b[0m\" \u001b[32m200 OK\u001b[0m\n", - "[(None,), ({'alias': 'hg38', 'digest': '2230c535660fb4774114bfa966a62f823fdb6d21acf138d4'},)]\n", - "\u001b[32mINFO\u001b[0m: 127.0.0.1:47532 - \"\u001b[1mGET /api/bed/genomes HTTP/1.1\u001b[0m\" \u001b[32m200 OK\u001b[0m\n", - "\u001b[32mINFO\u001b[0m: 127.0.0.1:47534 - \"\u001b[1mGET /api/versions HTTP/1.1\u001b[0m\" \u001b[32m200 OK\u001b[0m\n", - "\u001b[32mINFO\u001b[0m: 127.0.0.1:47538 - \"\u001b[1mGET /ui/bedbase_logo.svg HTTP/1.1\u001b[0m\" \u001b[32m200 OK\u001b[0m\n", - "\u001b[32mINFO\u001b[0m: 127.0.0.1:47538 - \"\u001b[1mGET /api/bedset/all/data/count HTTP/1.1\u001b[0m\" \u001b[32m200 OK\u001b[0m\n", - "Serving data for columns: ['md5sum']\n", - "\u001b[32mINFO\u001b[0m: 127.0.0.1:47538 - \"\u001b[1mGET /api/bed/all/data?ids=md5sum&limit=1 HTTP/1.1\u001b[0m\" \u001b[32m200 OK\u001b[0m\n", - "Serving data for columns: ['md5sum']\n", - "\u001b[32mINFO\u001b[0m: 127.0.0.1:47538 - \"\u001b[1mGET /api/bedset/all/data?ids=md5sum&limt=1 HTTP/1.1\u001b[0m\" \u001b[32m200 OK\u001b[0m\n", - "\u001b[32mINFO\u001b[0m: 127.0.0.1:47538 - \"\u001b[1mGET /openapi.json HTTP/1.1\u001b[0m\" \u001b[32m200 OK\u001b[0m\n", - "\u001b[32mINFO\u001b[0m: 127.0.0.1:47538 - \"\u001b[1mGET /ui/favicon.ico HTTP/1.1\u001b[0m\" \u001b[32m200 OK\u001b[0m\n" - ] - } - ], - "source": [ - "bedhost serve -c $BEDBASE_DATA_PATH_HOST/bedbase/tutorial_files/bedbase_configuration_compose.yaml" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If we have stored the path to the bedbase config in the environment variable `$BEDBASE` (suggested), it's not neccesary to use said flag. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "bedhost serve " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The `bedhost` API can be opened in the url [http://0.0.0.0:8000](http://0.0.0.0:8000). We can now explore the plots and statistics generated by the `bedstat` and `bedbuncher` pipelines." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## or optionally run BEDHOST using containers\n", - "\n", - "Alternatively, you can run the application inside a container.\n", - "\n", - "For that we'll use [docker compose](https://docs.docker.com/compose/), a tool that makes running multi-contaier Docker applications possible. The `docker-compose.yaml` file defines two services: \n", - "- `fastapi-api`: runs the fastAPI server \n", - "- `postgres-db`: runs the PostgeSQL database used by the server\n" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [], - "source": [ - "cd $BEDBASE_DATA_PATH_HOST" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Use the `BEDBASE_DATA_PATH_HOST` environment variable to point to the host directory with the pipeline results that will be mounted in the container as a volume. \n", - "\n", - "The environment variables are passed to the container via `.env` file, which the `docker-compose.yaml` points to for each service. Additionally, you can just export the environment variables before issuing the `docker-compose` command.\n", - "When you set the same environment variable in multiple files, here’s the priority used by Compose to choose which value to use:\n", - "\n", - "1. Compose file\n", - "2. Shell environment variables\n", - "3. Environment file\n", - "4. Dockerfile\n", - "4. Variable is not defined" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Pulling postgres-db (postgres:)...\n", - "latest: Pulling from library/postgres\n", - "Digest: sha256:8f7c3c9b61d82a4a021da5d9618faf056633e089302a726d619fa467c73609e4\n", - "Status: Downloaded newer image for postgres:latest\n", - "Recreating postgreSQL-bedbase ... \n", - "\u001b[1BRecreating fastAPI-bedbase ... mdone\u001b[0m\n", - "\u001b[1BAttaching to postgreSQL-bedbase, fastAPI-bedbase\n", - "\u001b[33mpostgreSQL-bedbase |\u001b[0m \n", - "\u001b[33mpostgreSQL-bedbase |\u001b[0m PostgreSQL Database directory appears to contain a database; Skipping initialization\n", - "\u001b[33mpostgreSQL-bedbase |\u001b[0m \n", - "\u001b[33mpostgreSQL-bedbase |\u001b[0m 2020-11-02 23:10:28.883 UTC [1] LOG: starting PostgreSQL 13.0 (Debian 13.0-1.pgdg100+1) on x86_64-pc-linux-gnu, compiled by gcc (Debian 8.3.0-6) 8.3.0, 64-bit\n", - "\u001b[33mpostgreSQL-bedbase |\u001b[0m 2020-11-02 23:10:28.885 UTC [1] LOG: listening on IPv4 address \"0.0.0.0\", port 5432\n", - "\u001b[33mpostgreSQL-bedbase |\u001b[0m 2020-11-02 23:10:28.885 UTC [1] LOG: listening on IPv6 address \"::\", port 5432\n", - "\u001b[33mpostgreSQL-bedbase |\u001b[0m 2020-11-02 23:10:28.891 UTC [1] LOG: listening on Unix socket \"/var/run/postgresql/.s.PGSQL.5432\"\n", - "\u001b[33mpostgreSQL-bedbase |\u001b[0m 2020-11-02 23:10:28.901 UTC [25] LOG: database system was shut down at 2020-11-02 23:03:14 UTC\n", - "\u001b[33mpostgreSQL-bedbase |\u001b[0m 2020-11-02 23:10:28.909 UTC [1] LOG: database system is ready to accept connections\n", - "\u001b[36mfastAPI-bedbase |\u001b[0m wait-for-it.sh: waiting 60 seconds for postgres-db:5432\n", - "\u001b[36mfastAPI-bedbase |\u001b[0m wait-for-it.sh: postgres-db:5432 is available after 0 seconds\n", - "\u001b[36mfastAPI-bedbase |\u001b[0m DEBU 2020-11-02 23:10:30,246 | bedhost:est:265 > Configured logger 'bedhost' using logmuse v0.2.6 \n", - "\u001b[36mfastAPI-bedbase |\u001b[0m DEBU 23:10:30 | bbconf:est:265 > Configured logger 'bbconf' using logmuse v0.2.6 \n", - "\u001b[36mfastAPI-bedbase |\u001b[0m DEBU 23:10:30 | bbconf:bbconf:105 > Established connection with PostgreSQL: postgres-db \n", - "\u001b[36mfastAPI-bedbase |\u001b[0m DEBU 2020-11-02 23:10:30,299 | bedhost:main:503 > Determined React UI path: /app/bedhost/static/bedhost-ui \n", - "\u001b[36mfastAPI-bedbase |\u001b[0m INFO 2020-11-02 23:10:30,299 | bedhost:main:510 > running bedhost app \n", - "\u001b[36mfastAPI-bedbase |\u001b[0m INFO: Started server process [1]\n", - "\u001b[36mfastAPI-bedbase |\u001b[0m INFO: Waiting for application startup.\n", - "\u001b[36mfastAPI-bedbase |\u001b[0m INFO: Application startup complete.\n", - "\u001b[36mfastAPI-bedbase |\u001b[0m INFO: Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)\n", - "Gracefully stopping... (press Ctrl+C again to force)\n", - "Stopping fastAPI-bedbase ... \n", - "Stopping postgreSQL-bedbase ... \n", - "\u001b[1Bping postgreSQL-bedbase ... \u001b[32mdone\u001b[0m" - ] - } - ], - "source": [ - "cd bedhost; docker-compose up" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Bash", - "language": "bash", - "name": "bash" - }, - "language_info": { - "codemirror_mode": "shell", - "file_extension": ".sh", - "mimetype": "text/x-sh", - "name": "bash" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": true, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": true, - "toc_position": { - "height": "calc(100% - 180px)", - "left": "10px", - "top": "150px", - "width": "329.797px" - }, - "toc_section_display": true, - "toc_window_display": false - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/docs/bedboss/notebooks/bedmaker-tutorial.ipynb b/docs/bedboss/notebooks/bedmaker-tutorial.ipynb deleted file mode 100644 index ee5af3d..0000000 --- a/docs/bedboss/notebooks/bedmaker-tutorial.ipynb +++ /dev/null @@ -1,300 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "a7e9d7a1", - "metadata": {}, - "source": [ - "# bedmaker tutorial" - ] - }, - { - "cell_type": "markdown", - "id": "2890b6ba", - "metadata": {}, - "source": [ - "To create bed and bigbed files we will need to use bedmaker: `bedboss make`" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "5fc290aa", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "version: 0.1.0-dev1\n", - "usage: bedboss [-h] [--version] {all,make,qc,stat} ...\n", - "\n", - "Warehouse of pipelines for BED-like files: bedmaker, bedstat, and bedqc.\n", - "\n", - "positional arguments:\n", - " {all,make,qc,stat}\n", - " all Run all bedboss pipelines and insert data into bedbase\n", - " make A pipeline to convert bed, bigbed, bigwig or bedgraph\n", - " files into bed and bigbed formats\n", - " qc Run quality control on bed file (bedqc)\n", - " stat A pipeline to read a file in BED format and produce\n", - " metadata in JSON format.\n", - "\n", - "options:\n", - " -h, --help show this help message and exit\n", - " --version show program's version number and exit\n" - ] - } - ], - "source": [ - "bedboss --help" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "caf5dfec", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "usage: bedboss make [-h] -f INPUT_FILE [-n NARROWPEAK] -t INPUT_TYPE -g GENOME\n", - " -r RFG_CONFIG -o OUTPUT_BED --output-bigbed OUTPUT_BIGBED\n", - " -s SAMPLE_NAME [--chrom-sizes CHROM_SIZES]\n", - " [--standard-chrom]\n", - "\n", - "options:\n", - " -h, --help show this help message and exit\n", - " -f INPUT_FILE, --input-file INPUT_FILE\n", - " path to the input file\n", - " -n NARROWPEAK, --narrowpeak NARROWPEAK\n", - " whether the regions are narrow (transcription factor\n", - " implies narrow, histone mark implies broad peaks)\n", - " -t INPUT_TYPE, --input-type INPUT_TYPE\n", - " a bigwig or a bedgraph file that will be converted\n", - " into BED format\n", - " -g GENOME, --genome GENOME\n", - " reference genome\n", - " -r RFG_CONFIG, --rfg-config RFG_CONFIG\n", - " file path to the genome config file\n", - " -o OUTPUT_BED, --output-bed OUTPUT_BED\n", - " path to the output BED files\n", - " --output-bigbed OUTPUT_BIGBED\n", - " path to the folder of output bigBed files\n", - " -s SAMPLE_NAME, --sample-name SAMPLE_NAME\n", - " name of the sample used to systematically build the\n", - " output name\n", - " --chrom-sizes CHROM_SIZES\n", - " a full path to the chrom.sizes required for the\n", - " bedtobigbed conversion\n", - " --standard-chrom Standardize chromosome names. Default: False\n" - ] - } - ], - "source": [ - "bedboss make --help" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "f397de3d", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Output directory does not exist. Creating: ./bed\n", - "BigBed directory does not exist. Creating: ./bigbed\n", - "bedmaker logs directory doesn't exist. Creating one...\n", - "### Pipeline run code and environment:\n", - "\n", - "* Command: `/home/bnt4me/virginia/venv/jupyter/bin/bedboss make --sample-name test_bed --input-file ../test/data/bed/hg19/correct/hg19_example1.bed --input-type bed --genome hg19 --output-bed ./bed --output-bigbed ./bigbed`\n", - "* Compute host: bnt4me-Precision-5560\n", - "* Working dir: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter\n", - "* Outfolder: ./bed/bedmaker_logs/test_bed/\n", - "* Pipeline started at: (02-08 15:39:09) elapsed: 0.0 _TIME_\n", - "\n", - "### Version log:\n", - "\n", - "* Python version: 3.10.6\n", - "* Pypiper dir: `/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper`\n", - "* Pypiper version: 0.12.3\n", - "* Pipeline dir: `/home/bnt4me/virginia/venv/jupyter/bin`\n", - "* Pipeline version: None\n", - "\n", - "### Arguments passed to pipeline:\n", - "\n", - "\n", - "----------------------------------------\n", - "\n", - "Got input type: bed\n", - "Converting ../test/data/bed/hg19/correct/hg19_example1.bed to BED format.\n", - "Target to produce: `./bed/hg19_example1.bed.gz` \n", - "\n", - "> `cp ../test/data/bed/hg19/correct/hg19_example1.bed ./bed/hg19_example1.bed` (2477650)\n", - "
\n",
-      "
\n", - "Command completed. Elapsed time: 0:00:00. Running peak memory: 0GB. \n", - " PID: 2477650;\tCommand: cp;\tReturn code: 0;\tMemory used: 0.0GB\n", - "\n", - "\n", - "> `gzip ./bed/hg19_example1.bed ` (2477652)\n", - "
\n",
-      "
\n", - "Command completed. Elapsed time: 0:00:00. Running peak memory: 0GB. \n", - " PID: 2477652;\tCommand: gzip;\tReturn code: 0;\tMemory used: 0.0GB\n", - "\n", - "Running bedqc...\n", - "Target to produce: `./bed/bedmaker_logs/test_bed/xl67fcgi` \n", - "\n", - "> `zcat ./bed/hg19_example1.bed.gz > ./bed/bedmaker_logs/test_bed/xl67fcgi` (2477654)\n", - "
\n",
-      "
\n", - "Command completed. Elapsed time: 0:00:00. Running peak memory: 0GB. \n", - " PID: 2477654;\tCommand: zcat;\tReturn code: 0;\tMemory used: 0.0GB\n", - "\n", - "Targetless command, running... \n", - "\n", - "> `bash /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedqc/est_line.sh ./bed/bedmaker_logs/test_bed/xl67fcgi ` (2477656)\n", - "
\n",
-      "1000
\n", - "Command completed. Elapsed time: 0:00:00. Running peak memory: 0GB. \n", - " PID: 2477656;\tCommand: bash;\tReturn code: 0;\tMemory used: 0.0GB\n", - "\n", - "Starting cleanup: 1 files; 0 conditional files for cleanup\n", - "\n", - "Cleaning up flagged intermediate files. . .\n", - "\n", - "### Pipeline completed. Epilogue\n", - "* Elapsed time (this run): 0:00:00\n", - "* Total elapsed time (all runs): 0:00:00\n", - "* Peak memory (this run): 0 GB\n", - "* Pipeline completed time: 2023-02-08 15:39:09\n", - "Generating bigBed files for: ../test/data/bed/hg19/correct/hg19_example1.bed\n", - "Determining path to chrom.sizes asset via Refgenie.\n", - "Creating refgenie genome config file...\n", - "Reading refgenie genome configuration file from file: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/genome_config.yaml\n", - "/home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/alias/hg19/fasta/default/hg19.chrom.sizes\n", - "Determined path to chrom.sizes asset: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/alias/hg19/fasta/default/hg19.chrom.sizes\n", - "Target to produce: `./bigbed/jckj3p1d` \n", - "\n", - "> `zcat ./bed/hg19_example1.bed.gz | sort -k1,1 -k2,2n > ./bigbed/jckj3p1d` (2477666,2477667)\n", - "
\n",
-      "
\n", - "Command completed. Elapsed time: 0:00:00. Running peak memory: 0GB. \n", - " PID: 2477666;\tCommand: zcat;\tReturn code: 0;\tMemory used: 0.0GB \n", - " PID: 2477667;\tCommand: sort;\tReturn code: 0;\tMemory used: 0.0GB\n", - "\n", - "Running: bedToBigBed -type=bed6+3 ./bigbed/jckj3p1d /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/alias/hg19/fasta/default/hg19.chrom.sizes ./bigbed/hg19_example1.bigBed\n", - "Target to produce: `./bigbed/hg19_example1.bigBed` \n", - "\n", - "> `bedToBigBed -type=bed6+3 ./bigbed/jckj3p1d /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/alias/hg19/fasta/default/hg19.chrom.sizes ./bigbed/hg19_example1.bigBed` (2477669)\n", - "
\n",
-      "pass1 - making usageList (1 chroms): 1 millis\n",
-      "pass2 - checking and writing primary data (175 records, 9 fields): 0 millis\n",
-      "
\n", - "Command completed. Elapsed time: 0:00:00. Running peak memory: 0GB. \n", - " PID: 2477669;\tCommand: bedToBigBed;\tReturn code: 0;\tMemory used: 0.0GB\n", - "\n", - "Starting cleanup: 2 files; 0 conditional files for cleanup\n", - "\n", - "Cleaning up flagged intermediate files. . .\n", - "\n", - "### Pipeline completed. Epilogue\n", - "* Elapsed time (this run): 0:00:00\n", - "* Total elapsed time (all runs): 0:00:00\n", - "* Peak memory (this run): 0 GB\n", - "* Pipeline completed time: 2023-02-08 15:39:09\n" - ] - } - ], - "source": [ - " bedboss make --sample-name test_bed \\\n", - " --input-file ../test/data/bed/hg19/correct/hg19_example1.bed \\\n", - " --input-type bed \\\n", - " --genome hg19 \\\n", - " --output-bed ./bed \\\n", - " --output-bigbed ./bigbed \n" - ] - }, - { - "cell_type": "markdown", - "id": "6b175141", - "metadata": {}, - "source": [ - "### Let's check if bed file was created (or copied)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "70ee37f5", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001B[0m\u001B[01;34mbedmaker_logs\u001B[0m \u001B[01;31mhg19_example1.bed.gz\u001B[0m\n" - ] - } - ], - "source": [ - "ls bed" - ] - }, - { - "cell_type": "markdown", - "id": "49f19d08", - "metadata": {}, - "source": [ - "### Let's check if bigbed file was created" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "cfd3c9f7", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "hg19_example1.bigBed\n" - ] - } - ], - "source": [ - "ls bigbed" - ] - }, - { - "cell_type": "markdown", - "id": "5c4837b0", - "metadata": {}, - "source": [ - "### everything was finished successfuly and files are ready for further analysis!" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Bash", - "language": "bash", - "name": "bash" - }, - "language_info": { - "codemirror_mode": "shell", - "file_extension": ".sh", - "mimetype": "text/x-sh", - "name": "bash" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/docs/bedboss/notebooks/bedqc-tutorial.ipynb b/docs/bedboss/notebooks/bedqc-tutorial.ipynb deleted file mode 100644 index 3935965..0000000 --- a/docs/bedboss/notebooks/bedqc-tutorial.ipynb +++ /dev/null @@ -1,124 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "c35a64ab", - "metadata": {}, - "source": [ - "# bedqc tutorial" - ] - }, - { - "cell_type": "markdown", - "id": "2b642ffb", - "metadata": {}, - "source": [ - "To check Quality of bed file use this command: `badboss qc`" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "b67214fe", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "usage: bedboss qc [-h] --bedfile BEDFILE --outfolder OUTFOLDER\n", - "\n", - "options:\n", - " -h, --help show this help message and exit\n", - " --bedfile BEDFILE a full path to bed file to process\n", - " --outfolder OUTFOLDER\n", - " a full path to output log folder.\n" - ] - } - ], - "source": [ - "bedboss qc --help" - ] - }, - { - "cell_type": "markdown", - "id": "eab75d79", - "metadata": {}, - "source": [ - "bedqc example:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "1488b255", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running bedqc...\n", - "### Pipeline run code and environment:\n", - "\n", - "* Command: `/home/bnt4me/virginia/venv/jupyter/bin/bedboss qc --bedfile ../test/data/bed/hg19/correct/hg19_example1.bed --outfolder .`\n", - "* Compute host: bnt4me-Precision-5560\n", - "* Working dir: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter\n", - "* Outfolder: ./\n", - "* Pipeline started at: (02-08 15:44:57) elapsed: 0.0 _TIME_\n", - "\n", - "### Version log:\n", - "\n", - "* Python version: 3.10.6\n", - "* Pypiper dir: `/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper`\n", - "* Pypiper version: 0.12.3\n", - "* Pipeline dir: `/home/bnt4me/virginia/venv/jupyter/bin`\n", - "* Pipeline version: None\n", - "\n", - "### Arguments passed to pipeline:\n", - "\n", - "\n", - "----------------------------------------\n", - "\n", - "Target exists: `../test/data/bed/hg19/correct/hg19_example1.bed` \n", - "Targetless command, running... \n", - "\n", - "> `bash /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedqc/est_line.sh ../test/data/bed/hg19/correct/hg19_example1.bed ` (2478311)\n", - "
\n",
-      "1000
\n", - "Command completed. Elapsed time: 0:00:00. Running peak memory: 0GB. \n", - " PID: 2478311;\tCommand: bash;\tReturn code: 0;\tMemory used: 0.0GB\n", - "\n", - "Starting cleanup: 1 files; 0 conditional files for cleanup\n", - "\n", - "Cleaning up flagged intermediate files. . .\n", - "\n", - "### Pipeline completed. Epilogue\n", - "* Elapsed time (this run): 0:00:00\n", - "* Total elapsed time (all runs): 0:00:00\n", - "* Peak memory (this run): 0 GB\n", - "* Pipeline completed time: 2023-02-08 15:44:57\n" - ] - } - ], - "source": [ - "bedboss qc --bedfile ../test/data/bed/hg19/correct/hg19_example1.bed --outfolder ." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Bash", - "language": "bash", - "name": "bash" - }, - "language_info": { - "codemirror_mode": "shell", - "file_extension": ".sh", - "mimetype": "text/x-sh", - "name": "bash" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/docs/bedboss/notebooks/bedstat-tutorial.ipynb b/docs/bedboss/notebooks/bedstat-tutorial.ipynb deleted file mode 100644 index 60d448b..0000000 --- a/docs/bedboss/notebooks/bedstat-tutorial.ipynb +++ /dev/null @@ -1,528 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "c35a64ab", - "metadata": {}, - "source": [ - "# bedboss stat" - ] - }, - { - "cell_type": "markdown", - "id": "2b642ffb", - "metadata": {}, - "source": [ - "This tutorial is intended to introduce you to bedstat, pipeline that produces statistics and plots based on bed and bigbed files" - ] - }, - { - "cell_type": "markdown", - "id": "a5f49a8c", - "metadata": {}, - "source": [ - "### 1. Install all dependencies and initialize database for it" - ] - }, - { - "cell_type": "markdown", - "id": "7392c92e", - "metadata": {}, - "source": [ - "- Install dependecies: [How to install R dependencies](./how_to_install_r_dep/)\n", - "- Initialize database: [How to initialize database](./how_to_create_database/)\n", - "- Create config file: [How to create config file](./how_to_bedbase_config/)" - ] - }, - { - "cell_type": "markdown", - "id": "668c260f", - "metadata": {}, - "source": [ - "### 2. Create working repository" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "95ff14bf", - "metadata": {}, - "outputs": [], - "source": [ - "mkdir stat_tutorial ; cd stat_tutorial " - ] - }, - { - "cell_type": "markdown", - "id": "edbecd02", - "metadata": {}, - "source": [ - "Create config file by downloading it and configuring it" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "1daff328", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "path:\n", - " pipeline_output_path: $BEDBOSS_OUTPUT_PATH # do not change it\n", - " bedstat_dir: bedstat_output\n", - " remote_url_base: null\n", - " bedbuncher_dir: bedbucher_output\n", - "database:\n", - " host: localhost\n", - " port: 5432\n", - " password: docker\n", - " user: postgres\n", - " name: pep-db\n", - " dialect: postgresql\n", - " driver: psycopg2\n", - "server:\n", - " host: 0.0.0.0\n", - " port: 8000\n", - "remotes:\n", - " http:\n", - " prefix: https://data.bedbase.org/\n", - " description: HTTP compatible path\n", - " s3:\n", - " prefix: s3://data.bedbase.org/\n", - " description: S3 compatible path\n" - ] - } - ], - "source": [ - "cat bedbase_config_test.yaml" - ] - }, - { - "cell_type": "markdown", - "id": "0ee154a8", - "metadata": {}, - "source": [ - "### 3. Download bed and bigbed files" - ] - }, - { - "cell_type": "markdown", - "id": "6010e161", - "metadata": {}, - "source": [ - "Bed file" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "53346258", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--2023-02-28 15:32:57-- https://github.com/bedbase/bedboss/raw/dev/test/data/bed/hg19/correct/sample1.bed.gz\n", - "Resolving github.com (github.com)... 140.82.113.3\n", - "Connecting to github.com (github.com)|140.82.113.3|:443... connected.\n", - "HTTP request sent, awaiting response... 302 Found\n", - "Location: https://raw.githubusercontent.com/bedbase/bedboss/dev/test/data/bed/hg19/correct/sample1.bed.gz [following]\n", - "--2023-02-28 15:32:57-- https://raw.githubusercontent.com/bedbase/bedboss/dev/test/data/bed/hg19/correct/sample1.bed.gz\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.111.133, 185.199.109.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 7087126 (6.8M) [application/octet-stream]\n", - "Saving to: ‘sample1.bed.gz’\n", - "\n", - "sample1.bed.gz 100%[===================>] 6.76M --.-KB/s in 0.07s \n", - "\n", - "2023-02-28 15:32:58 (95.8 MB/s) - ‘sample1.bed.gz’ saved [7087126/7087126]\n", - "\n" - ] - } - ], - "source": [ - "wget -O sample1.bed.gz https://github.com/bedbase/bedboss/raw/dev/test/data/bed/hg19/correct/sample1.bed.gz\n" - ] - }, - { - "cell_type": "markdown", - "id": "6e933bd6", - "metadata": {}, - "source": [ - "BigBed file" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "8df43a61", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--2023-02-28 15:33:00-- https://github.com/bedbase/bedboss/raw/dev/test/data/bigbed/hg19/correct/sample1.bigBed\n", - "Resolving github.com (github.com)... 140.82.113.3\n", - "Connecting to github.com (github.com)|140.82.113.3|:443... connected.\n", - "HTTP request sent, awaiting response... 302 Found\n", - "Location: https://raw.githubusercontent.com/bedbase/bedboss/dev/test/data/bigbed/hg19/correct/sample1.bigBed [following]\n", - "--2023-02-28 15:33:00-- https://raw.githubusercontent.com/bedbase/bedboss/dev/test/data/bigbed/hg19/correct/sample1.bigBed\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.109.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 13092350 (12M) [application/octet-stream]\n", - "Saving to: ‘sample1.bigBed’\n", - "\n", - "sample1.bigBed 100%[===================>] 12.49M --.-KB/s in 0.1s \n", - "\n", - "2023-02-28 15:33:00 (101 MB/s) - ‘sample1.bigBed’ saved [13092350/13092350]\n", - "\n" - ] - } - ], - "source": [ - "wget -O sample1.bigBed https://github.com/bedbase/bedboss/raw/dev/test/data/bigbed/hg19/correct/sample1.bigBed\n" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "540122c5", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "bedbase_config_test.yaml \u001B[0m\u001B[01;31msample1.bed.gz\u001B[0m sample1.bigBed\n" - ] - } - ], - "source": [ - "ls" - ] - }, - { - "cell_type": "markdown", - "id": "7e8e007a", - "metadata": {}, - "source": [ - "### 4. Run statistics:" - ] - }, - { - "cell_type": "markdown", - "id": "9a69ec14", - "metadata": {}, - "source": [ - "Additionally we need some metadata about files. 1) genome assembly, config file and know output folder." - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "628234aa", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "usage: bedboss stat [-h] --bedfile BEDFILE --outfolder OUTFOLDER\n", - " [--open-signal-matrix OPEN_SIGNAL_MATRIX] [--ensdb ENSDB]\n", - " [--bigbed BIGBED] --bedbase-config BEDBASE_CONFIG\n", - " [-y SAMPLE_YAML] --genome GENOME_ASSEMBLY [--no-db-commit]\n", - " [--just-db-commit]\n", - "\n", - "options:\n", - " -h, --help show this help message and exit\n", - " --bedfile BEDFILE a full path to bed file to process [Required]\n", - " --outfolder OUTFOLDER\n", - " Pipeline output folder [Required]\n", - " --open-signal-matrix OPEN_SIGNAL_MATRIX\n", - " a full path to the openSignalMatrix required for the\n", - " tissue specificity plots\n", - " --ensdb ENSDB a full path to the ensdb gtf file required for genomes\n", - " not in GDdata\n", - " --bigbed BIGBED a full path to the bigbed files\n", - " --bedbase-config BEDBASE_CONFIG\n", - " a path to the bedbase configuration file [Required]\n", - " -y SAMPLE_YAML, --sample-yaml SAMPLE_YAML\n", - " a yaml config file with sample attributes to pass on\n", - " more metadata into the database\n", - " --genome GENOME_ASSEMBLY\n", - " genome assembly of the sample [Required]\n", - " --no-db-commit whether the JSON commit to the database should be\n", - " skipped\n", - " --just-db-commit whether just to commit the JSON to the database\n" - ] - } - ], - "source": [ - "bedboss stat --help" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "id": "468f5508", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Warning: You're running an interactive python session. This works, but pypiper cannot tee the output, so results are only logged to screen.\n", - "### Pipeline run code and environment:\n", - "\n", - "* Command: `/home/bnt4me/virginia/venv/jupyter/bin/bedboss stat --bedfile ./sample1.bed.gz --bigbed ./sample1.bigBed --outfolder ./test_output --genome hg19 --bedbase-config ./bedbase_config_test.yaml`\n", - "* Compute host: bnt4me-Precision-5560\n", - "* Working dir: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/stat_tutorial\n", - "* Outfolder: ./test_output/\n", - "* Pipeline started at: (02-28 15:46:52) elapsed: 0.0 _TIME_\n", - "\n", - "### Version log:\n", - "\n", - "* Python version: 3.10.6\n", - "* Pypiper dir: `/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper`\n", - "* Pypiper version: 0.12.3\n", - "* Pipeline dir: `/home/bnt4me/virginia/venv/jupyter/bin`\n", - "* Pipeline version: 0.1.0-dev1\n", - "\n", - "### Arguments passed to pipeline:\n", - "\n", - "\n", - "----------------------------------------\n", - "\n", - "Target to produce: `/home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/stat_tutorial/test_output/output/bedstat_output/c557c915a9901ce377ef724806ff7a2c/sample1.json` \n", - "\n", - "> `Rscript /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedstat/tools/regionstat.R --bedfilePath=./sample1.bed.gz --fileId=sample1 --openSignalMatrix=None --outputFolder=/home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/stat_tutorial/test_output/output/bedstat_output/c557c915a9901ce377ef724806ff7a2c --genome=hg19 --ensdb=None --digest=c557c915a9901ce377ef724806ff7a2c` (530529)\n", - "
\n",
-      "Loading required package: IRanges\n",
-      "Loading required package: BiocGenerics\n",
-      "\n",
-      "Attaching package: ‘BiocGenerics’\n",
-      "\n",
-      "The following objects are masked from ‘package:stats’:\n",
-      "\n",
-      "    IQR, mad, sd, var, xtabs\n",
-      "\n",
-      "The following objects are masked from ‘package:base’:\n",
-      "\n",
-      "    anyDuplicated, append, as.data.frame, basename, cbind, colnames,\n",
-      "    dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,\n",
-      "    grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,\n",
-      "    order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,\n",
-      "    rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,\n",
-      "    union, unique, unsplit, which.max, which.min\n",
-      "\n",
-      "Loading required package: S4Vectors\n",
-      "Loading required package: stats4\n",
-      "\n",
-      "Attaching package: ‘S4Vectors’\n",
-      "\n",
-      "The following objects are masked from ‘package:base’:\n",
-      "\n",
-      "    expand.grid, I, unname\n",
-      "\n",
-      "Loading required package: GenomicRanges\n",
-      "Loading required package: GenomeInfoDb\n",
-      "\u001B[?25hsnapshotDate(): 2021-10-19\n",
-      "\u001B[?25h\u001B[?25hLoading required package: GenomicFeatures\n",
-      "Loading required package: AnnotationDbi\n",
-      "Loading required package: Biobase\n",
-      "Welcome to Bioconductor\n",
-      "\n",
-      "    Vignettes contain introductory material; view with\n",
-      "    'browseVignettes()'. To cite Bioconductor, see\n",
-      "    'citation(\"Biobase\")', and for packages 'citation(\"pkgname\")'.\n",
-      "\n",
-      "Loading required package: AnnotationFilter\n",
-      "\n",
-      "Attaching package: 'ensembldb'\n",
-      "\n",
-      "The following object is masked from 'package:stats':\n",
-      "\n",
-      "    filter\n",
-      "\n",
-      "\u001B[?25h\u001B[?25h\u001B[?25hLoading required package: R.oo\n",
-      "Loading required package: R.methodsS3\n",
-      "R.methodsS3 v1.8.2 (2022-06-13 22:00:14 UTC) successfully loaded. See ?R.methodsS3 for help.\n",
-      "R.oo v1.25.0 (2022-06-12 02:20:02 UTC) successfully loaded. See ?R.oo for help.\n",
-      "\n",
-      "Attaching package: 'R.oo'\n",
-      "\n",
-      "The following object is masked from 'package:R.methodsS3':\n",
-      "\n",
-      "    throw\n",
-      "\n",
-      "The following object is masked from 'package:GenomicRanges':\n",
-      "\n",
-      "    trim\n",
-      "\n",
-      "The following object is masked from 'package:IRanges':\n",
-      "\n",
-      "    trim\n",
-      "\n",
-      "The following objects are masked from 'package:methods':\n",
-      "\n",
-      "    getClasses, getMethods\n",
-      "\n",
-      "The following objects are masked from 'package:base':\n",
-      "\n",
-      "    attach, detach, load, save\n",
-      "\n",
-      "R.utils v2.12.2 (2022-11-11 22:00:03 UTC) successfully loaded. See ?R.utils for help.\n",
-      "\n",
-      "Attaching package: 'R.utils'\n",
-      "\n",
-      "The following object is masked from 'package:utils':\n",
-      "\n",
-      "    timestamp\n",
-      "\n",
-      "The following objects are masked from 'package:base':\n",
-      "\n",
-      "    cat, commandArgs, getOption, isOpen, nullfile, parse, warnings\n",
-      "\n",
-      "\u001B[?25h\u001B[?25h\u001B[?25h\u001B[?25h\u001B[?25h\u001B[?25h\u001B[?25h\u001B[?25h\u001B[?25h\u001B[?25h\u001B[?25h\u001B[?25h\u001B[?25h\u001B[?25h\u001B[?25h\u001B[?25h\u001B[?25h\u001B[?25h\u001B[?25h\u001B[?25h\u001B[?25h\u001B[?25h\u001B[?25h[1] \"Plotting: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/stat_tutorial/test_output/output/bedstat_output/c557c915a9901ce377ef724806ff7a2c/sample1_tssdist\"\n",
-      "\u001B[1m\u001B[22mScale for \u001B[32mx\u001B[39m is already present.\n",
-      "Adding another scale for \u001B[32mx\u001B[39m, which will replace the existing scale.\n",
-      "[1] \"Writing plot json: output/bedstat_output/c557c915a9901ce377ef724806ff7a2c/sample1_tssdist\"\n",
-      "Successfully calculated and plot TSS distance.\n",
-      "[1] \"Plotting: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/stat_tutorial/test_output/output/bedstat_output/c557c915a9901ce377ef724806ff7a2c/sample1_chrombins\"\n",
-      "[1] \"Writing plot json: output/bedstat_output/c557c915a9901ce377ef724806ff7a2c/sample1_chrombins\"\n",
-      "Successfully calculated and plot chromosomes region distribution.\n",
-      "Calculating overlaps...\n",
-      "[1] \"Plotting: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/stat_tutorial/test_output/output/bedstat_output/c557c915a9901ce377ef724806ff7a2c/sample1_paritions\"\n",
-      "[1] \"Writing plot json: output/bedstat_output/c557c915a9901ce377ef724806ff7a2c/sample1_paritions\"\n",
-      "Successfully calculated and plot regions distribution over genomic partitions.\n",
-      "[1] \"Plotting: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/stat_tutorial/test_output/output/bedstat_output/c557c915a9901ce377ef724806ff7a2c/sample1_expected_partitions\"\n",
-      "[1] \"Writing plot json: output/bedstat_output/c557c915a9901ce377ef724806ff7a2c/sample1_expected_partitions\"\n",
-      "Successfully calculated and plot expected distribution over genomic partitions.\n",
-      "[1] \"Plotting: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/stat_tutorial/test_output/output/bedstat_output/c557c915a9901ce377ef724806ff7a2c/sample1_cumulative_partitions\"\n",
-      "[1] \"Writing plot json: output/bedstat_output/c557c915a9901ce377ef724806ff7a2c/sample1_cumulative_partitions\"\n",
-      "Successfully calculated and plot cumulative distribution over genomic partitions.\n",
-      "[1] \"Plotting: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/stat_tutorial/test_output/output/bedstat_output/c557c915a9901ce377ef724806ff7a2c/sample1_widths_histogram\"\n",
-      "[1] \"Writing plot json: output/bedstat_output/c557c915a9901ce377ef724806ff7a2c/sample1_widths_histogram\"\n",
-      "Successfully calculated and plot quantile-trimmed histogram of widths.\n",
-      "[1] \"Plotting: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/stat_tutorial/test_output/output/bedstat_output/c557c915a9901ce377ef724806ff7a2c/sample1_neighbor_distances\"\n",
-      "[1] \"Writing plot json: output/bedstat_output/c557c915a9901ce377ef724806ff7a2c/sample1_neighbor_distances\"\n",
-      "Successfully calculated and plot distance between neighbor regions.\n",
-      "open signal matrix not provided. Skipping tissue specificity plot ... \n",
-      "\u001B[?25h\u001B[?25h
\n", - "Command completed. Elapsed time: 0:00:20. Running peak memory: 1.358GB. \n", - " PID: 530529;\tCommand: Rscript;\tReturn code: 0;\tMemory used: 1.358GB\n", - "\n", - "These results exist for 'c557c915a9901ce377ef724806ff7a2c': bedfile, genome\n", - "\n", - "### Pipeline completed. Epilogue\n", - "* Elapsed time (this run): 0:00:20\n", - "* Total elapsed time (all runs): 0:00:20\n", - "* Peak memory (this run): 1.3577 GB\n", - "* Pipeline completed time: 2023-02-28 15:47:12\n" - ] - } - ], - "source": [ - "bedboss stat \\\n", - "--bedfile ./sample1.bed.gz \\\n", - "--bigbed ./sample1.bigBed \\\n", - "--outfolder ./test_output \\\n", - "--genome hg19 \\\n", - "--bedbase-config ./bedbase_config_test.yaml \n" - ] - }, - { - "cell_type": "markdown", - "id": "c745d9b1", - "metadata": {}, - "source": [ - "After plots and statistics were produced, we can look at them" - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "id": "208bfa9b", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sample1_chrombins.pdf \u001B[0m\u001B[01;35msample1_neighbor_distances.png\u001B[0m\n", - "\u001B[01;35msample1_chrombins.png\u001B[0m sample1_paritions.pdf\n", - "sample1_cumulative_partitions.pdf \u001B[01;35msample1_paritions.png\u001B[0m\n", - "\u001B[01;35msample1_cumulative_partitions.png\u001B[0m sample1_plots.json\n", - "sample1_expected_partitions.pdf sample1_tssdist.pdf\n", - "\u001B[01;35msample1_expected_partitions.png\u001B[0m \u001B[01;35msample1_tssdist.png\u001B[0m\n", - "sample1.json sample1_widths_histogram.pdf\n", - "sample1_neighbor_distances.pdf \u001B[01;35msample1_widths_histogram.png\u001B[0m\n" - ] - } - ], - "source": [ - "ls test_output/output/bedstat_output/c557c915a9901ce377ef724806ff7a2c" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "id": "fe670243", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\n", - " \"name\": [\"sample1\"],\n", - " \"regions_no\": [300000],\n", - " \"mean_region_width\": [663.9],\n", - " \"md5sum\": [\"c557c915a9901ce377ef724806ff7a2c\"],\n", - " \"median_TSS_dist\": [48580],\n", - " \"exon_frequency\": [14871],\n", - " \"exon_percentage\": [0.0496],\n", - " \"fiveUTR_frequency\": [8981],\n", - " \"fiveUTR_percentage\": [0.0299],\n", - " \"intergenic_frequency\": [141763],\n", - " \"intergenic_percentage\": [0.4725],\n", - " \"intron_frequency\": [106638],\n", - " \"intron_percentage\": [0.3555],\n", - " \"promoterCore_frequency\": [10150],\n", - " \"promoterCore_percentage\": [0.0338],\n", - " \"promoterProx_frequency\": [6851],\n", - " \"promoterProx_percentage\": [0.0228],\n", - " \"threeUTR_frequency\": [10746],\n", - " \"threeUTR_percentage\": [0.0358]\n", - "}\n" - ] - } - ], - "source": [ - "cat test_output/output/bedstat_output/c557c915a9901ce377ef724806ff7a2c/sample1.json" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Bash", - "language": "bash", - "name": "bash" - }, - "language_info": { - "codemirror_mode": "shell", - "file_extension": ".sh", - "mimetype": "text/x-sh", - "name": "bash" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/docs/bedboss/notebooks/tutorial-all.ipynb b/docs/bedboss/notebooks/tutorial-all.ipynb deleted file mode 100644 index 823e9fc..0000000 --- a/docs/bedboss/notebooks/tutorial-all.ipynb +++ /dev/null @@ -1,691 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "5ed57409", - "metadata": {}, - "source": [ - "# Bedboss-all tutorial" - ] - }, - { - "cell_type": "markdown", - "id": "e9e494b7", - "metadata": {}, - "source": [ - "This tutorial is attended to show base exaple of using bedboss all function that inclueds all 3 pipelines: bedmake, bedqc and bedstat" - ] - }, - { - "cell_type": "markdown", - "id": "3169c5cf", - "metadata": {}, - "source": [ - "### 1. First let's create new working repository" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "b089c6f1", - "metadata": {}, - "outputs": [], - "source": [ - "mkdir all_tutorial ; cd all_tutorial " - ] - }, - { - "cell_type": "markdown", - "id": "ecf10dee", - "metadata": {}, - "source": [ - "### 2. To run our pipelines we need to check if we have installed all dependencies. To do so we can run dependencies check script that can be found in docs." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "221c24cb", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--2023-08-11 06:58:27-- https://raw.githubusercontent.com/bedbase/bedboss/68910f5142a95d92c27ef53eafb9c35599af2fbd/test/bash_requirements_test.sh\n", - "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", - "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 3927 (3.8K) [text/plain]\n", - "Saving to: ‘req_test.sh’\n", - "\n", - "req_test.sh 100%[===================>] 3.83K --.-KB/s in 0.006s \n", - "\n", - "2023-08-11 06:58:28 (608 KB/s) - ‘req_test.sh’ saved [3927/3927]\n", - "\n" - ] - } - ], - "source": [ - "wget -O req_test.sh https://raw.githubusercontent.com/bedbase/bedboss/68910f5142a95d92c27ef53eafb9c35599af2fbd/test/bash_requirements_test.sh" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "32c7757a", - "metadata": {}, - "outputs": [], - "source": [ - "chmod u+x ./req_test.sh" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "c4df6265", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------------------------\n", - " \n", - " bedboss installation check \n", - " \n", - "-----------------------------------------------------------\n", - "Checking native installation... \n", - "Language compilers... \n", - "-----------------------------------------------------------\n", - "\u001b[0;32m✔ python is installed correctly\u001b[0m\n", - "\u001b[0;32m✔ R is installed correctly\u001b[0m\n", - "-----------------------------------------------------------\n", - "Checking bedmaker dependencies... \n", - "-----------------------------------------------------------\n", - "\u001b[0;32m✔ package bedboss @ file:///home/bnt4me/virginia/repos/bedbase_all/bedboss\u001b[0m\n", - "\u001b[0;32m✔ package refgenconf==0.12.2\u001b[0m\n", - "\u001b[0;32m✔ bedToBigBed is installed correctly\u001b[0m\n", - "\u001b[0;33m⚠ WARNING: 'bigBedToBed' is not installed. To install 'bigBedToBed' check bedboss documentation: https://bedboss.databio.org/\u001b[0m\n", - "\u001b[0;33m⚠ WARNING: 'bigWigToBedGraph' is not installed. To install 'bigWigToBedGraph' check bedboss documentation: https://bedboss.databio.org/\u001b[0m\n", - "\u001b[0;33m⚠ WARNING: 'wigToBigWig' is not installed. To install 'wigToBigWig' check bedboss documentation: https://bedboss.databio.org/\u001b[0m\n", - "-----------------------------------------------------------\n", - "Checking required R packages for bedstat... \n", - "-----------------------------------------------------------\n", - "\u001b[0;32m✔ SUCCESS: R package: optparse\u001b[0m\n", - "\u001b[0;32m✔ SUCCESS: R package: ensembldb\u001b[0m\n", - "\u001b[0;32m✔ SUCCESS: R package: ExperimentHub\u001b[0m\n", - "\u001b[0;32m✔ SUCCESS: R package: AnnotationHub\u001b[0m\n", - "\u001b[0;32m✔ SUCCESS: R package: AnnotationFilter\u001b[0m\n", - "\u001b[0;32m✔ SUCCESS: R package: BSgenome\u001b[0m\n", - "\u001b[0;32m✔ SUCCESS: R package: GenomicFeatures\u001b[0m\n", - "\u001b[0;32m✔ SUCCESS: R package: GenomicDistributions\u001b[0m\n", - "\u001b[0;32m✔ SUCCESS: R package: GenomicDistributionsData\u001b[0m\n", - "\u001b[0;32m✔ SUCCESS: R package: GenomeInfoDb\u001b[0m\n", - "\u001b[0;32m✔ SUCCESS: R package: ensembldb\u001b[0m\n", - "\u001b[0;32m✔ SUCCESS: R package: tools\u001b[0m\n", - "\u001b[0;32m✔ SUCCESS: R package: R.utils\u001b[0m\n", - "\u001b[0;32m✔ SUCCESS: R package: LOLA\u001b[0m\n", - "Number of WARNINGS: 3\n" - ] - } - ], - "source": [ - "./req_test.sh" - ] - }, - { - "cell_type": "markdown", - "id": "44aa2dcd", - "metadata": {}, - "source": [ - "### 3. All requirements are installed, now lets run our pipeline" - ] - }, - { - "cell_type": "markdown", - "id": "50549ec4", - "metadata": {}, - "source": [ - "To run pipeline, we need to provide few required arguments:\n", - "1. sample_name\n", - "2. input_file\n", - "3. input_type\n", - "4. outfolder\n", - "5. genome\n", - "6. bedbase_config\n", - "\n", - "If you don't have bedbase config file, or initialized bedbase db you can check documnetation how to do it: https://bedboss.databio.org/" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "b71f7610", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: bedboss==0.1.0a2 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (0.1.0a2)\n", - "Requirement already satisfied: piper>=0.13.2 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from bedboss==0.1.0a2) (0.13.2)\n", - "Requirement already satisfied: pandas>=1.5.3 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from bedboss==0.1.0a2) (2.0.3)\n", - "Requirement already satisfied: peppy>=0.35.7 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from bedboss==0.1.0a2) (0.35.7)\n", - "Requirement already satisfied: requests>=2.28.2 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from bedboss==0.1.0a2) (2.28.2)\n", - "Requirement already satisfied: logmuse>=0.2.7 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from bedboss==0.1.0a2) (0.2.7)\n", - "Requirement already satisfied: yacman>=0.8.4 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from bedboss==0.1.0a2) (0.9.1)\n", - "Requirement already satisfied: refgenconf>=0.12.2 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from bedboss==0.1.0a2) (0.12.2)\n", - "Requirement already satisfied: bbconf==0.4.0a1 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from bedboss==0.1.0a2) (0.4.0a1)\n", - "Requirement already satisfied: ubiquerg>=0.6.2 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from bedboss==0.1.0a2) (0.6.2)\n", - "Requirement already satisfied: pipestat>=0.4.0 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from bbconf==0.4.0a1->bedboss==0.1.0a2) (0.4.1)\n", - "Requirement already satisfied: sqlalchemy<2.0.0 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from bbconf==0.4.0a1->bedboss==0.1.0a2) (1.4.41)\n", - "Requirement already satisfied: tzdata>=2022.1 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from pandas>=1.5.3->bedboss==0.1.0a2) (2023.3)\n", - "Requirement already satisfied: pytz>=2020.1 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from pandas>=1.5.3->bedboss==0.1.0a2) (2022.7.1)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from pandas>=1.5.3->bedboss==0.1.0a2) (2.8.2)\n", - "Requirement already satisfied: numpy>=1.21.0 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from pandas>=1.5.3->bedboss==0.1.0a2) (1.24.1)\n", - "Requirement already satisfied: attmap>=0.13.2 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from peppy>=0.35.7->bedboss==0.1.0a2) (0.13.2)\n", - "Requirement already satisfied: pyyaml in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from peppy>=0.35.7->bedboss==0.1.0a2) (6.0)\n", - "Requirement already satisfied: rich>=10.3.0 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from peppy>=0.35.7->bedboss==0.1.0a2) (13.3.0)\n", - "Requirement already satisfied: psutil in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from piper>=0.13.2->bedboss==0.1.0a2) (5.9.4)\n", - "Requirement already satisfied: tqdm in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from refgenconf>=0.12.2->bedboss==0.1.0a2) (4.64.1)\n", - "Requirement already satisfied: pyfaidx in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from refgenconf>=0.12.2->bedboss==0.1.0a2) (0.7.1)\n", - "Requirement already satisfied: future in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from refgenconf>=0.12.2->bedboss==0.1.0a2) (0.18.3)\n", - "Requirement already satisfied: jsonschema>=3.0.1 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from refgenconf>=0.12.2->bedboss==0.1.0a2) (4.17.3)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from requests>=2.28.2->bedboss==0.1.0a2) (3.0.1)\n", - "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from requests>=2.28.2->bedboss==0.1.0a2) (1.26.14)\n", - "Requirement already satisfied: idna<4,>=2.5 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from requests>=2.28.2->bedboss==0.1.0a2) (3.4)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from requests>=2.28.2->bedboss==0.1.0a2) (2022.12.7)\n", - "Requirement already satisfied: oyaml in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from yacman>=0.8.4->bedboss==0.1.0a2) (1.0)\n", - "Requirement already satisfied: attrs>=17.4.0 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from jsonschema>=3.0.1->refgenconf>=0.12.2->bedboss==0.1.0a2) (22.2.0)\n", - "Requirement already satisfied: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from jsonschema>=3.0.1->refgenconf>=0.12.2->bedboss==0.1.0a2) (0.19.3)\n", - "Requirement already satisfied: psycopg2-binary in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from pipestat>=0.4.0->bbconf==0.4.0a1->bedboss==0.1.0a2) (2.9.5)\n", - "Requirement already satisfied: eido in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from pipestat>=0.4.0->bbconf==0.4.0a1->bedboss==0.1.0a2) (0.2.1)\n", - "Requirement already satisfied: sqlmodel>=0.0.8 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from pipestat>=0.4.0->bbconf==0.4.0a1->bedboss==0.1.0a2) (0.0.8)\n", - "Requirement already satisfied: pydantic<2.0.0,>=1.10.7 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from pipestat>=0.4.0->bbconf==0.4.0a1->bedboss==0.1.0a2) (1.10.12)\n", - "Requirement already satisfied: six>=1.5 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas>=1.5.3->bedboss==0.1.0a2) (1.16.0)\n", - "Requirement already satisfied: markdown-it-py<3.0.0,>=2.1.0 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from rich>=10.3.0->peppy>=0.35.7->bedboss==0.1.0a2) (2.1.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.14.0 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from rich>=10.3.0->peppy>=0.35.7->bedboss==0.1.0a2) (2.14.0)\n", - "Requirement already satisfied: greenlet!=0.4.17 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from sqlalchemy<2.0.0->bbconf==0.4.0a1->bedboss==0.1.0a2) (2.0.1)\n", - "Requirement already satisfied: setuptools>=0.7 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from pyfaidx->refgenconf>=0.12.2->bedboss==0.1.0a2) (65.5.1)\n", - "Requirement already satisfied: mdurl~=0.1 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from markdown-it-py<3.0.0,>=2.1.0->rich>=10.3.0->peppy>=0.35.7->bedboss==0.1.0a2) (0.1.2)\n", - "Requirement already satisfied: typing-extensions>=4.2.0 in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from pydantic<2.0.0,>=1.10.7->pipestat>=0.4.0->bbconf==0.4.0a1->bedboss==0.1.0a2) (4.4.0)\n", - "Requirement already satisfied: sqlalchemy2-stubs in /home/bnt4me/virginia/venv/bedboss/lib/python3.10/site-packages (from sqlmodel>=0.0.8->pipestat>=0.4.0->bbconf==0.4.0a1->bedboss==0.1.0a2) (0.0.2a35)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" - ] - } - ], - "source": [ - "pip install bedboss==0.1.0a2" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "id": "627ee6a3", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "usage: bedboss all [-h] --outfolder OUTFOLDER -s SAMPLE_NAME -f INPUT_FILE -t\n", - " INPUT_TYPE -g GENOME [-r RFG_CONFIG]\n", - " [--chrom-sizes CHROM_SIZES] [-n] [--standard-chrom]\n", - " [--check-qc] [--open-signal-matrix OPEN_SIGNAL_MATRIX]\n", - " [--ensdb ENSDB] --bedbase-config BEDBASE_CONFIG\n", - " [-y SAMPLE_YAML] [--no-db-commit] [--just-db-commit]\n", - "bedboss all: error: the following arguments are required: --outfolder, -s/--sample-name, -f/--input-file, -t/--input-type, -g/--genome, --bedbase-config\n" - ] - }, - { - "ename": "", - "evalue": "2", - "output_type": "error", - "traceback": [] - } - ], - "source": [ - "bedboss all" - ] - }, - { - "cell_type": "markdown", - "id": "e9a7acf1", - "metadata": {}, - "source": [ - "Let's download sample file. Information about this file you can find here: https://pephub.databio.org/bedbase/GSE177859?tag=default" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "107b36af", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--2023-08-11 07:12:28-- ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM5379nnn/GSM5379062/suppl/GSM5379062_ENCFF834LRN_peaks_GRCh38.bed.gz\n", - " => ‘sample1.bed.gz’\n", - "Resolving ftp.ncbi.nlm.nih.gov (ftp.ncbi.nlm.nih.gov)... 130.14.250.12, 130.14.250.10, 2607:f220:41f:250::229, ...\n", - "Connecting to ftp.ncbi.nlm.nih.gov (ftp.ncbi.nlm.nih.gov)|130.14.250.12|:21... connected.\n", - "Logging in as anonymous ... Logged in!\n", - "==> SYST ... done. ==> PWD ... done.\n", - "==> TYPE I ... done. ==> CWD (1) /geo/samples/GSM5379nnn/GSM5379062/suppl ... done.\n", - "==> SIZE GSM5379062_ENCFF834LRN_peaks_GRCh38.bed.gz ... 5470278\n", - "==> PASV ... done. ==> RETR GSM5379062_ENCFF834LRN_peaks_GRCh38.bed.gz ... done.\n", - "Length: 5470278 (5.2M) (unauthoritative)\n", - "\n", - "GSM5379062_ENCFF834 100%[===================>] 9.76M 1008KB/s in 24s \n", - "\n", - "2023-08-11 07:12:55 (419 KB/s) - ‘sample1.bed.gz’ saved [10231006]\n", - "\n" - ] - } - ], - "source": [ - "wget -O sample1.bed.gz ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM5379nnn/GSM5379062/suppl/GSM5379062_ENCFF834LRN_peaks_GRCh38.bed.gz" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6d961bcd", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "id": "c873a853", - "metadata": {}, - "source": [ - "let's create bedbase config file:" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "127df991", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "cat: bedbase_config_test.yaml: No such file or directory\n" - ] - }, - { - "ename": "", - "evalue": "1", - "output_type": "error", - "traceback": [] - } - ], - "source": [ - "cat bedbase_config_test.yaml" - ] - }, - { - "cell_type": "markdown", - "id": "45d79641", - "metadata": {}, - "source": [ - "Now let's run bedboss:" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "0daa1402", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Warning: You're running an interactive python session. This works, but pypiper cannot tee the output, so results are only logged to screen.\n", - "### Pipeline run code and environment:\n", - "\n", - "* Command: `/home/bnt4me/virginia/venv/jupyter/bin/bedboss all --sample-name tutorial_f1 --input-file sample1.bed.gz --input-type bed --outfolder ./tutorial --genome GRCh38 --bedbase-config bedbase_config_test.yaml`\n", - "* Compute host: bnt4me-Precision-5560\n", - "* Working dir: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/all_tutorial\n", - "* Outfolder: ./tutorial/\n", - "* Pipeline started at: (02-27 12:47:26) elapsed: 0.0 _TIME_\n", - "\n", - "### Version log:\n", - "\n", - "* Python version: 3.10.6\n", - "* Pypiper dir: `/home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/pypiper`\n", - "* Pypiper version: 0.12.3\n", - "* Pipeline dir: `/home/bnt4me/virginia/venv/jupyter/bin`\n", - "* Pipeline version: None\n", - "\n", - "### Arguments passed to pipeline:\n", - "\n", - "\n", - "----------------------------------------\n", - "\n", - "Unused arguments: {'command': 'all'}\n", - "Getting Open Signal Matrix file path...\n", - "output_bed = ./tutorial/bed_files/sample1.bed.gz\n", - "output_bigbed = ./tutorial/bigbed_files\n", - "Output directory does not exist. Creating: ./tutorial/bed_files\n", - "BigBed directory does not exist. Creating: ./tutorial/bigbed_files\n", - "bedmaker logs directory doesn't exist. Creating one...\n", - "Got input type: bed\n", - "Converting sample1.bed.gz to BED format.\n", - "Target to produce: `./tutorial/bed_files/sample1.bed.gz` \n", - "\n", - "> `cp sample1.bed.gz ./tutorial/bed_files/sample1.bed.gz` (434320)\n", - "
\n",
-      "
\n", - "Command completed. Elapsed time: 0:00:00. Running peak memory: 0GB. \n", - " PID: 434320;\tCommand: cp;\tReturn code: 0;\tMemory used: 0.0GB\n", - "\n", - "Running bedqc...\n", - "Unused arguments: {}\n", - "Target to produce: `./tutorial/bed_files/bedmaker_logs/tutorial_f1/rigumni8` \n", - "\n", - "> `zcat ./tutorial/bed_files/sample1.bed.gz > ./tutorial/bed_files/bedmaker_logs/tutorial_f1/rigumni8` (434322)\n", - "
\n",
-      "
\n", - "Command completed. Elapsed time: 0:00:00. Running peak memory: 0.003GB. \n", - " PID: 434322;\tCommand: zcat;\tReturn code: 0;\tMemory used: 0.003GB\n", - "\n", - "Targetless command, running... \n", - "\n", - "> `bash /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedqc/est_line.sh ./tutorial/bed_files/bedmaker_logs/tutorial_f1/rigumni8 ` (434324)\n", - "
\n",
-      "236000
\n", - "Command completed. Elapsed time: 0:00:00. Running peak memory: 0.003GB. \n", - " PID: 434324;\tCommand: bash;\tReturn code: 0;\tMemory used: 0.0GB\n", - "\n", - "File (./tutorial/bed_files/bedmaker_logs/tutorial_f1/rigumni8) has passed Quality Control!\n", - "Generating bigBed files for: sample1.bed.gz\n", - "Determining path to chrom.sizes asset via Refgenie.\n", - "Creating refgenie genome config file...\n", - "Reading refgenie genome configuration file from file: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/all_tutorial/genome_config.yaml\n", - "/home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/all_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes\n", - "Determined path to chrom.sizes asset: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/all_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes\n", - "Target to produce: `./tutorial/bigbed_files/vzxyqexz` \n", - "\n", - "> `zcat ./tutorial/bed_files/sample1.bed.gz | sort -k1,1 -k2,2n > ./tutorial/bigbed_files/vzxyqexz` (434335,434336)\n", - "
\n",
-      "
\n", - "Command completed. Elapsed time: 0:00:00. Running peak memory: 0.007GB. \n", - " PID: 434335;\tCommand: zcat;\tReturn code: 0;\tMemory used: 0.002GB \n", - " PID: 434336;\tCommand: sort;\tReturn code: 0;\tMemory used: 0.007GB\n", - "\n", - "Running: /home/bnt4me/virginia/repos/bedbase_all/bedboss/bedToBigBed -type=bed6+4 ./tutorial/bigbed_files/vzxyqexz /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/all_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes ./tutorial/bigbed_files/sample1.bigBed\n", - "Target to produce: `./tutorial/bigbed_files/sample1.bigBed` \n", - "\n", - "> `/home/bnt4me/virginia/repos/bedbase_all/bedboss/bedToBigBed -type=bed6+4 ./tutorial/bigbed_files/vzxyqexz /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/all_tutorial/alias/hg38/fasta/default/hg38.chrom.sizes ./tutorial/bigbed_files/sample1.bigBed` (434338)\n", - "
\n",
-      "pass1 - making usageList (25 chroms): 27 millis\n",
-      "pass2 - checking and writing primary data (222016 records, 10 fields): 413 millis\n",
-      "
\n", - "Command completed. Elapsed time: 0:00:01. Running peak memory: 0.007GB. \n", - " PID: 434338;\tCommand: /home/bnt4me/virginia/repos/bedbase_all/bedboss/bedToBigBed;\tReturn code: 0;\tMemory used: 0.004GB\n", - "\n", - "Target to produce: `/home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/all_tutorial/tutorial/output/bedstat_output/eb617f28e129c401be94069e0fdedbb5/sample1.json` \n", - "\n", - "> `Rscript /home/bnt4me/virginia/venv/jupyter/lib/python3.10/site-packages/bedboss/bedstat/tools/regionstat.R --bedfilePath=./tutorial/bed_files/sample1.bed.gz --fileId=sample1 --openSignalMatrix=./openSignalMatrix/openSignalMatrix_hg38_percentile99_01_quantNormalized_round4d.txt.gz --outputFolder=/home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/all_tutorial/tutorial/output/bedstat_output/eb617f28e129c401be94069e0fdedbb5 --genome=hg38 --ensdb=None --digest=eb617f28e129c401be94069e0fdedbb5` (434343)\n", - "
\n",
-      "Loading required package: IRanges\n",
-      "Loading required package: BiocGenerics\n",
-      "\n",
-      "Attaching package: ‘BiocGenerics’\n",
-      "\n",
-      "The following objects are masked from ‘package:stats’:\n",
-      "\n",
-      "    IQR, mad, sd, var, xtabs\n",
-      "\n",
-      "The following objects are masked from ‘package:base’:\n",
-      "\n",
-      "    anyDuplicated, append, as.data.frame, basename, cbind, colnames,\n",
-      "    dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,\n",
-      "    grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,\n",
-      "    order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,\n",
-      "    rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,\n",
-      "    union, unique, unsplit, which.max, which.min\n",
-      "\n",
-      "Loading required package: S4Vectors\n",
-      "Loading required package: stats4\n",
-      "\n",
-      "Attaching package: ‘S4Vectors’\n",
-      "\n",
-      "The following objects are masked from ‘package:base’:\n",
-      "\n",
-      "    expand.grid, I, unname\n",
-      "\n",
-      "Loading required package: GenomicRanges\n",
-      "Loading required package: GenomeInfoDb\n",
-      "\u001b[?25hsnapshotDate(): 2021-10-19\n",
-      "\u001b[?25h\u001b[?25hLoading required package: GenomicFeatures\n",
-      "Loading required package: AnnotationDbi\n",
-      "Loading required package: Biobase\n",
-      "Welcome to Bioconductor\n",
-      "\n",
-      "    Vignettes contain introductory material; view with\n",
-      "    'browseVignettes()'. To cite Bioconductor, see\n",
-      "    'citation(\"Biobase\")', and for packages 'citation(\"pkgname\")'.\n",
-      "\n",
-      "Loading required package: AnnotationFilter\n",
-      "\n",
-      "Attaching package: 'ensembldb'\n",
-      "\n",
-      "The following object is masked from 'package:stats':\n",
-      "\n",
-      "    filter\n",
-      "\n",
-      "\u001b[?25h\u001b[?25h\u001b[?25hLoading required package: R.oo\n",
-      "Loading required package: R.methodsS3\n",
-      "R.methodsS3 v1.8.2 (2022-06-13 22:00:14 UTC) successfully loaded. See ?R.methodsS3 for help.\n",
-      "R.oo v1.25.0 (2022-06-12 02:20:02 UTC) successfully loaded. See ?R.oo for help.\n",
-      "\n",
-      "Attaching package: 'R.oo'\n",
-      "\n",
-      "The following object is masked from 'package:R.methodsS3':\n",
-      "\n",
-      "    throw\n",
-      "\n",
-      "The following object is masked from 'package:GenomicRanges':\n",
-      "\n",
-      "    trim\n",
-      "\n",
-      "The following object is masked from 'package:IRanges':\n",
-      "\n",
-      "    trim\n",
-      "\n",
-      "The following objects are masked from 'package:methods':\n",
-      "\n",
-      "    getClasses, getMethods\n",
-      "\n",
-      "The following objects are masked from 'package:base':\n",
-      "\n",
-      "    attach, detach, load, save\n",
-      "\n",
-      "R.utils v2.12.2 (2022-11-11 22:00:03 UTC) successfully loaded. See ?R.utils for help.\n",
-      "\n",
-      "Attaching package: 'R.utils'\n",
-      "\n",
-      "The following object is masked from 'package:utils':\n",
-      "\n",
-      "    timestamp\n",
-      "\n",
-      "The following objects are masked from 'package:base':\n",
-      "\n",
-      "    cat, commandArgs, getOption, isOpen, nullfile, parse, warnings\n",
-      "\n",
-      "\u001b[?25h\u001b[?25h\u001b[?25h\u001b[?25h\u001b[?25h\u001b[?25h\u001b[?25h\u001b[?25h\u001b[?25h\u001b[?25h\u001b[?25h\u001b[?25h\u001b[?25h\u001b[?25h\u001b[?25h\u001b[?25h\u001b[?25h\u001b[?25h\u001b[?25h\u001b[?25h\u001b[?25h\u001b[?25h\u001b[?25hsee ?GenomicDistributionsData and browseVignettes('GenomicDistributionsData') for documentation\n",
-      "loading from cache\n",
-      "[1] \"Plotting: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/all_tutorial/tutorial/output/bedstat_output/eb617f28e129c401be94069e0fdedbb5/sample1_tssdist\"\n",
-      "\u001b[1m\u001b[22mScale for \u001b[32mx\u001b[39m is already present.\n",
-      "Adding another scale for \u001b[32mx\u001b[39m, which will replace the existing scale.\n",
-      "[1] \"Writing plot json: output/bedstat_output/eb617f28e129c401be94069e0fdedbb5/sample1_tssdist\"\n",
-      "Successfully calculated and plot TSS distance.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[1] \"Plotting: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/all_tutorial/tutorial/output/bedstat_output/eb617f28e129c401be94069e0fdedbb5/sample1_chrombins\"\n",
-      "see ?GenomicDistributionsData and browseVignettes('GenomicDistributionsData') for documentation\n",
-      "loading from cache\n",
-      "[1] \"Writing plot json: output/bedstat_output/eb617f28e129c401be94069e0fdedbb5/sample1_chrombins\"\n",
-      "Successfully calculated and plot chromosomes region distribution.\n",
-      "see ?GenomicDistributionsData and browseVignettes('GenomicDistributionsData') for documentation\n",
-      "loading from cache\n",
-      "Calculating overlaps...\n",
-      "[1] \"Plotting: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/all_tutorial/tutorial/output/bedstat_output/eb617f28e129c401be94069e0fdedbb5/sample1_paritions\"\n",
-      "[1] \"Writing plot json: output/bedstat_output/eb617f28e129c401be94069e0fdedbb5/sample1_paritions\"\n",
-      "Successfully calculated and plot regions distribution over genomic partitions.\n",
-      "[1] \"Plotting: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/all_tutorial/tutorial/output/bedstat_output/eb617f28e129c401be94069e0fdedbb5/sample1_expected_partitions\"\n",
-      "see ?GenomicDistributionsData and browseVignettes('GenomicDistributionsData') for documentation\n",
-      "loading from cache\n",
-      "see ?GenomicDistributionsData and browseVignettes('GenomicDistributionsData') for documentation\n",
-      "loading from cache\n",
-      "[1] \"Writing plot json: output/bedstat_output/eb617f28e129c401be94069e0fdedbb5/sample1_expected_partitions\"\n",
-      "Successfully calculated and plot expected distribution over genomic partitions.\n",
-      "[1] \"Plotting: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/all_tutorial/tutorial/output/bedstat_output/eb617f28e129c401be94069e0fdedbb5/sample1_cumulative_partitions\"\n",
-      "see ?GenomicDistributionsData and browseVignettes('GenomicDistributionsData') for documentation\n",
-      "loading from cache\n",
-      "[1] \"Writing plot json: output/bedstat_output/eb617f28e129c401be94069e0fdedbb5/sample1_cumulative_partitions\"\n",
-      "Successfully calculated and plot cumulative distribution over genomic partitions.\n",
-      "[1] \"Plotting: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/all_tutorial/tutorial/output/bedstat_output/eb617f28e129c401be94069e0fdedbb5/sample1_widths_histogram\"\n",
-      "[1] \"Writing plot json: output/bedstat_output/eb617f28e129c401be94069e0fdedbb5/sample1_widths_histogram\"\n",
-      "Successfully calculated and plot quantile-trimmed histogram of widths.\n",
-      "[1] \"Plotting: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/all_tutorial/tutorial/output/bedstat_output/eb617f28e129c401be94069e0fdedbb5/sample1_neighbor_distances\"\n",
-      "[1] \"Writing plot json: output/bedstat_output/eb617f28e129c401be94069e0fdedbb5/sample1_neighbor_distances\"\n",
-      "Successfully calculated and plot distance between neighbor regions.\n",
-      "[1] \"Plotting: /home/bnt4me/virginia/repos/bedbase_all/bedboss/docs_jupyter/all_tutorial/tutorial/output/bedstat_output/eb617f28e129c401be94069e0fdedbb5/sample1_open_chromatin\"\n",
-      "[1] \"Writing plot json: output/bedstat_output/eb617f28e129c401be94069e0fdedbb5/sample1_open_chromatin\"\n",
-      "Successfully calculated and plot cell specific enrichment for open chromatin.\n",
-      "\u001b[?25h\u001b[?25h
\n", - "Command completed. Elapsed time: 0:00:49. Running peak memory: 3.843GB. \n", - " PID: 434343;\tCommand: Rscript;\tReturn code: 0;\tMemory used: 3.843GB\n", - "\n", - "These results exist for 'eb617f28e129c401be94069e0fdedbb5': name, regions_no, mean_region_width, md5sum, bedfile, genome, bigbedfile, widths_histogram, neighbor_distances\n", - "Starting cleanup: 2 files; 0 conditional files for cleanup\n", - "\n", - "Cleaning up flagged intermediate files. . .\n", - "\n", - "### Pipeline completed. Epilogue\n", - "* Elapsed time (this run): 0:00:50\n", - "* Total elapsed time (all runs): 0:00:50\n", - "* Peak memory (this run): 3.8432 GB\n", - "* Pipeline completed time: 2023-02-27 12:48:16\n" - ] - } - ], - "source": [ - "bedboss all --sample-name tutorial_f1 \\\n", - "--input-file sample1.bed.gz \\\n", - "--input-type bed \\\n", - "--outfolder ./tutorial \\\n", - "--genome GRCh38 \\\n", - "--bedbase-config bedbase_config_test.yaml" - ] - }, - { - "cell_type": "markdown", - "id": "63d83f3c", - "metadata": {}, - "source": [ - "Now let's check if all files where saved" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "7a50535d", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[0m\u001b[01;34mbedmaker_logs\u001b[0m \u001b[01;31msample1.bed.gz\u001b[0m\n" - ] - } - ], - "source": [ - "ls tutorial/bed_files" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "9a826059", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sample1.bigBed\n" - ] - } - ], - "source": [ - "ls tutorial/bigbed_files" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "aa8609fb", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sample1_chrombins.pdf sample1_open_chromatin.pdf\n", - "\u001b[0m\u001b[01;35msample1_chrombins.png\u001b[0m \u001b[01;35msample1_open_chromatin.png\u001b[0m\n", - "sample1_cumulative_partitions.pdf sample1_paritions.pdf\n", - "\u001b[01;35msample1_cumulative_partitions.png\u001b[0m \u001b[01;35msample1_paritions.png\u001b[0m\n", - "sample1_expected_partitions.pdf sample1_plots.json\n", - "\u001b[01;35msample1_expected_partitions.png\u001b[0m sample1_tssdist.pdf\n", - "sample1.json \u001b[01;35msample1_tssdist.png\u001b[0m\n", - "sample1_neighbor_distances.pdf sample1_widths_histogram.pdf\n", - "\u001b[01;35msample1_neighbor_distances.png\u001b[0m \u001b[01;35msample1_widths_histogram.png\u001b[0m\n" - ] - } - ], - "source": [ - "ls tutorial/output/bedstat_output/eb617f28e129c401be94069e0fdedbb5/" - ] - }, - { - "cell_type": "markdown", - "id": "2208d244", - "metadata": {}, - "source": [ - "Everything was ran correctly:)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Bash", - "language": "bash", - "name": "bash" - }, - "language_info": { - "codemirror_mode": "shell", - "file_extension": ".sh", - "mimetype": "text/x-sh", - "name": "bash" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/docs/bedboss/tutorials/bedbuncher_tutorial.md b/docs/bedboss/tutorials/bedbuncher_tutorial.md index 761cf98..66b164d 100644 --- a/docs/bedboss/tutorials/bedbuncher_tutorial.md +++ b/docs/bedboss/tutorials/bedbuncher_tutorial.md @@ -3,6 +3,10 @@ Bedbuncher is used to create bedset of bed files in the bedbase database. ### 1) Create bedbase config file + +How to create config file: [configuration section](../how-to-configure.md). + + ### 2) Create pep with bed file record identifiers. To do so, you need to create a PEP with the following fields: sample_name (where sample_name is record_identifier), or `sample_name` + `record_identifier` e.g. sample_table: diff --git a/docs/bedboss/tutorials/bedindex_tutorial.md b/docs/bedboss/tutorials/bedindex_tutorial.md index 1902796..8f93c5e 100644 --- a/docs/bedboss/tutorials/bedindex_tutorial.md +++ b/docs/bedboss/tutorials/bedindex_tutorial.md @@ -1,6 +1,10 @@ ### Indexing to qdrant database ### 1. Create bedbase config file + +How to create a BEDbase configuration file is described in the [configuration section](../how-to-configure.md). + + ### 2. Run bedboss index #### From command line diff --git a/docs/bedboss/tutorials/tutorial_all.md b/docs/bedboss/tutorials/tutorial_all.md index e642a75..b14bd55 100644 --- a/docs/bedboss/tutorials/tutorial_all.md +++ b/docs/bedboss/tutorials/tutorial_all.md @@ -1,6 +1,6 @@ ## Bedboss run-all -Bedboss run-all is intended to run on ONE sample (bed file) and run all bedboss pipelines: +Bedboss run-all is intended to run on **ONE** sample (bed file) and run all bedboss pipelines: bedmaker (+ bedclassifier + bedqc) -> bedstat. After that optionally it can run bedbuncher, qdrant indexing and upload metadata to PEPhub. ### Step 1: Install all dependencies @@ -14,7 +14,7 @@ If requirements are not satisfied, you will see the list of missing packages. ### Step 2: Create bedconf.yaml file To run bedboss, you need to create a bedconf.yaml file with configuration. -Detail instructions are in the configuration section. +Detail instructions are in the [configuration section](../how-to-configure.md). ### Step 3: Run bedboss To run bedboss, you need to run the next command: @@ -32,6 +32,7 @@ Above command will run bedboss on the bed file and create a bedstat file in the It contains only required parameters. For more details, please check the usage section. By default, results will be uploaded only to the PostgreSQL database. + - To upload results to PEPhub, you need to make the `databio` org available on GitHub, then login to PEPhub, and add the `--upload-pephub` flag to the command. - To upload results to Qdrant, you need to add the `--upload-qdrant` flag to the command. - To upload actual files to S3, you need to add the `--upload-s3` flag to the command, and before uploading, you have to set up all necessary environment variables: AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, and AWS_ENDPOINT_URL. diff --git a/docs/bedboss/tutorials/tutorial_run_pep.md b/docs/bedboss/tutorials/tutorial_run_pep.md index 71df030..f497084 100644 --- a/docs/bedboss/tutorials/tutorial_run_pep.md +++ b/docs/bedboss/tutorials/tutorial_run_pep.md @@ -15,7 +15,7 @@ If requirements are not satisfied, you will see the list of missing packages. ### Step 2: Create bedconf.yaml file To run bedboss run-pep, you need to create a bedconf.yaml file with configuration. -Detailed instructions are in the configuration section. +Detailed instructions are in the [configuration section](../how-to-configure.md). ### Step 3: Create PEP with bed files. BEDboss PEP should contain next fields: sample_name, input_file, input_type, genome. @@ -36,6 +36,7 @@ Above command will run bedboss on the bed file and create a file with statistics It contains only required parameters. For more details, please check the usage section. By default, results will be uploaded only to the PostgreSQL database. + - To upload results to PEPhub, you need to make the `databio` org available on GitHub, then login to PEPhub, and add the `--upload-pephub` flag to the command. - To upload results to Qdrant, you need to add the `--upload-qdrant` flag to the command. - To upload actual files to S3, you need to add the `--upload-s3` flag to the command, and before uploading, you have to set up all necessary environment variables: AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, and AWS_ENDPOINT_URL. diff --git a/mkdocs.yml b/mkdocs.yml index d33825a..326fbf8 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -57,13 +57,13 @@ nav: - Changelog: bbconf/changelog.md - Reference: - How to cite: citations.md - - Usage: bedboss_usage.md + - Usage: bedboss/usage.md - Support: https://github.com/bedbase/bedboss/issues - Contributing: contributing.md - Changelog: changelog.md - BEDboss: - BEDboss: bedboss/README.md - - Tutorial: + - Tutorials: - BEDboss run-pep: bedboss/tutorials/tutorial_run_pep.md - BEDboss-all pipeline: bedboss/tutorials/tutorial_all.md - BEDmaker tutorial: bedboss/tutorials/bedmaker_tutorial.md From 87289d7144236969fdc89b4316b0eaf21ee416fe Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 15 Apr 2024 12:57:00 -0400 Subject: [PATCH 6/7] updated bbconf docs --- docs/bbconf/README.md | 37 +- docs/bbconf/bbc_api.md | 192 +- docs/bbconf/changelog.md | 39 +- docs/bbconf/code/config.md | 15301 ------------------------- docs/bbconf/code/demo.md | 16030 --------------------------- docs/bbconf/demo.md | 1 + docs/bbconf/notebooks/config.ipynb | 149 - docs/bbconf/notebooks/demo.ipynb | 552 - docs/bedbase/README.md | 27 +- docs/bedhost/README.md | 6 + docs/bedhost/changelog.md | 5 + mkdocs.yml | 4 +- 12 files changed, 105 insertions(+), 32238 deletions(-) delete mode 100644 docs/bbconf/code/config.md delete mode 100644 docs/bbconf/code/demo.md create mode 100644 docs/bbconf/demo.md delete mode 100644 docs/bbconf/notebooks/config.ipynb delete mode 100644 docs/bbconf/notebooks/demo.ipynb diff --git a/docs/bbconf/README.md b/docs/bbconf/README.md index a22df26..d213de3 100644 --- a/docs/bbconf/README.md +++ b/docs/bbconf/README.md @@ -1,3 +1,36 @@ -# bbconf +

bbconf

+ +![Run pytests](https://github.com/databio/bbconf/workflows/Run%20pytests/badge.svg) +[![pypi-badge](https://img.shields.io/pypi/v/bbconf?color=%2334D058)](https://pypi.org/project/bbconf/) +[![pypi-version](https://img.shields.io/pypi/pyversions/bbconf.svg?color=%2334D058)](https://pypi.org/project/bbconf) +[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) +[![Github badge](https://img.shields.io/badge/source-github-354a75?logo=github)](https://github.com/databio/bbconf) +[![coverage](https://coverage-badge.samuelcolvin.workers.dev/databio/bbconf.svg)](https://coverage-badge.samuelcolvin.workers.dev/redirect/databio/bbconf) + + +*BEDBASE* project configuration package (agent) + +## What is this? + +`bbconf` is a configuration and management tool for BEDbase, facilitating the reading of configuration files, +setting up connections to PostgreSQL, PEPhub, S3, and Qdrant databases, managing file paths, and storing transformer models. +It formalizes communication pathways for pipelines and downstream tools, ensuring seamless interaction." + +--- + +**Documentation**: https://docs.bedbase.org/bedboss + +**Source Code**: https://github.com/databio/bbconf + +--- + +## Installation +To install `bbclient` use this command: +``` +pip install bbclient +``` +or install the latest version from the GitHub repository: +``` +pip install git+https://github.com/databio/bbconf.git +``` -`bbconf` defines `BedBaseConf` class which is an in-memory representation of the configuration file for the *BEDBASE* project. This is the source of the project-wide constant variables. Additionally it implements multiple convenience methods for interacting with the database backend, i.e. [PostgreSQL](https://www.postgresql.org/) \ No newline at end of file diff --git a/docs/bbconf/bbc_api.md b/docs/bbconf/bbc_api.md index e933672..e92401c 100644 --- a/docs/bbconf/bbc_api.md +++ b/docs/bbconf/bbc_api.md @@ -1,191 +1 @@ -Final targets: BedBaseConf, get_bedbase_cfg - - - - - -# Package `bbconf` Documentation - -## Class `BedBaseConf` -This class standardizes reporting of bedstat and bedbuncher results. It formalizes a way for these pipelines and downstream tools to communicate -- the produced results can easily and reliably become an input for the server. The object exposes API for interacting with the results and is backed by a [PostgreSQL](https://www.postgresql.org/) database. - - -```python -def __init__(self, config_path=None, database_only=False) -``` - -Initialize the object -#### Parameters: - -- `config_path` (`str`): path to the bedbase configuration file -- `database_only` (`bool`): whether the database managers should notkeep an in-memory copy of the data in the database - - - - -```python -def bed(self) -``` - -PipestatManager of the bedfiles table -#### Returns: - -- `pipestat.PipestatManager`: manager of the bedfiles table - - - - -```python -def bedset(self) -``` - -PipestatManager of the bedsets table -#### Returns: - -- `pipestat.PipestatManager`: manager of the bedsets table - - - - -```python -def config(self) -``` - -Config used to initialize the object -#### Returns: - -- `yacman.YacAttMap`: bedbase configuration file contents - - - - -```python -def get_bedbuncher_output_path(self, remote=False) -``` - -Get path to the output of the bedbuncher pipeline -#### Parameters: - -- `remote` (`bool`): whether to use remote url base - - -#### Returns: - -- `str`: path to the bedbuncher pipeline output - - - - -```python -def get_bedstat_output_path(self, remote=False) -``` - -Get path to the output of the bedstat pipeline -#### Parameters: - -- `remote` (`bool`): whether to use remote url base - - -#### Returns: - -- `str`: path to the bedstat pipeline output - - - - -```python -def remove_relationship(self, bedset_id, bedfile_ids=None) -``` - -Remove entries from the relationships table -#### Parameters: - -- `bedset_id` (`str`): id of the bedset to remove -- `bedfile_ids` (`list[str]`): ids of the bedfiles to remove for theselected bedset. If none provided, all the relationsips for the selected bedset will be removed. - - - - -```python -def report_relationship(self, bedset_id, bedfile_id) -``` - -Report a bedfile for bedset. - -Inserts the ID pair into the relationship table, which allows to -manage many to many bedfile bedset relationships -#### Parameters: - -- `bedset_id` (`int`): id of the bedset to report bedfile for -- `bedfile_id` (`int`): id of the bedfile to report - - - - -```python -def select_bedfiles_for_bedset(self, condition=None, condition_val=None, bedfile_col=None) -``` - -Select bedfiles that are part of a bedset that matches the query -#### Parameters: - -- `condition` (`str`): bedsets table query to restrict the results with,for instance `"id=%s"` -- `condition_val` (`list[str]`): values to populate the condition stringwith -- `bedfile_col` (`list[str] | str`): bedfile columns to include in theresult, if none specified all columns will be included - - -#### Returns: - -- `list[psycopg2.extras.DictRow]`: matched bedfiles table contents - - - - -```python -def get_bedbase_cfg(cfg=None) -``` - -Determine path to the bedbase configuration file - -The path can be either explicitly provided -or read from a $BEDBASE environment variable -#### Parameters: - -- `cfg` (`str`): path to the config file.Optional, the $BEDBASE config env var will be used if not provided - - -#### Returns: - -- `str`: configuration file path - - - - - - - -*Version Information: `bbconf` v0.1.0, generated by `lucidoc` v0.4.3* +### 🚧 Docs in progress! Stay tuned for updates. We're working hard to bring you valuable content soon! \ No newline at end of file diff --git a/docs/bbconf/changelog.md b/docs/bbconf/changelog.md index 91d205c..e50d2b6 100644 --- a/docs/bbconf/changelog.md +++ b/docs/bbconf/changelog.md @@ -2,12 +2,45 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. -## [0.4.0] - 2023-10-17 +# [0.5.1] - 2024-04-09 +## Changed + +- updated qdrant uploader +- bedset bedfile list query improvement +- other minor fixes in uploading + +# [0.5.0] - 2024-04-08 +### Changed + +- Rebuild bbconf +- Introduced new DB schema +- Added bbagent that will be used to interact with the database +- Updated config schema +- Added new functionality to the bbagent +- New tests + + +## [0.4.2] - 2024-03-12 +### Change +- Updated logger +- Updated requirements +- Added `upload_status` column to the `bedfile` table + + +## [0.4.1] - 2024-01-01 +### Fix +- Requirements + + +## [0.4.0] - 2023-12-18 ### Change -- bbconf to use pipestat v0.4.1 and SQLModel -- Added qdrant search, and insert methods +- bbconf to use pipestat v0.6.0 and SQLModel - Fixed tests +### Added +- `qdrant` search, insert and update functionality +- functions that return results in the DRS format for both bed and bedhost. [DRS](https://ga4gh.github.io/data-repository-service-schemas/preview/release/drs-1.2.0/docs/) + ## [0.3.0] - 2022-08-18 ### Change - update select_bedfiles_for_distance diff --git a/docs/bbconf/code/config.md b/docs/bbconf/code/config.md deleted file mode 100644 index fd25f1f..0000000 --- a/docs/bbconf/code/config.md +++ /dev/null @@ -1,15301 +0,0 @@ - - - - - -config - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/docs/bbconf/code/demo.md b/docs/bbconf/code/demo.md deleted file mode 100644 index 1b363e2..0000000 --- a/docs/bbconf/code/demo.md +++ /dev/null @@ -1,16030 +0,0 @@ - - - - - -demo - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/docs/bbconf/demo.md b/docs/bbconf/demo.md new file mode 100644 index 0000000..e92401c --- /dev/null +++ b/docs/bbconf/demo.md @@ -0,0 +1 @@ +### 🚧 Docs in progress! Stay tuned for updates. We're working hard to bring you valuable content soon! \ No newline at end of file diff --git a/docs/bbconf/notebooks/config.ipynb b/docs/bbconf/notebooks/config.ipynb deleted file mode 100644 index 7e285d3..0000000 --- a/docs/bbconf/notebooks/config.ipynb +++ /dev/null @@ -1,149 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Bedbase configuration file documentation\n", - "\n", - "In order to start working with the `BedBaseConf` object, it has to be initialized first. The constuctor requires one argument, which is a path to the configuration file (in YAML format).\n", - "\n", - "## Minimal config file\n", - "\n", - "The minimal configuration must define the `path` section with 3 keys:\n", - "\n", - "- `pipeline_output_path`: path to the desired output directory for the pipelines\n", - "- `bedstat_dir`: name of the [bedstat](https://github.com/databio/bedstat) pipeline output directory\n", - "- `bedbuncher_dir`: name of the [bedbuncher](https://github.com/databio/bedbuncher) pipeline output directory\n", - "\n", - "Here's an example of a minimal bedbase configuration file:" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "# min config example. Refer to bbconf/const.py for key names and default values\r\n", - "\r\n", - "path:\r\n", - " pipeline_output_path: $HOME/bedbase\r\n", - " bedstat_dir: bedstat_output\r\n", - " bedbuncher_dir: bedbuncher_output" - ] - } - ], - "source": [ - "!cat ../tests/data/config_min.yaml" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Example config file\n", - "\n", - "Apart from the required `path` section, there are 2 other sections that can be used to configure the PostgreSQL database, used to store the metadata about the bedfiles and bedsets (`database` section) and to configure the bedhost server that displays the pipeline results and provides an API to query them (`server` section).\n", - "\n", - "Here's an example of a complete bedbase configuration file:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "database:\r\n", - " name: pipestat-test\r\n", - " user: postgres\r\n", - " password: pipestat-password\r\n", - " host: localhost\r\n", - "# port: 5432; intentionally commented out to test the defaults setting system\r\n", - "path:\r\n", - " pipeline_output_path: $BEDBASE_DATA_PATH/outputs\r\n", - " bedstat_dir: bedstat_output\r\n", - " bedbuncher_dir: bedbuncher_output\r\n", - " remote_url_base: null\r\n", - "server:\r\n", - " host: 0.0.0.0\r\n", - " port: 8000" - ] - } - ], - "source": [ - "!cat ../tests/data/config.yaml" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Default values\n", - "\n", - "In case any of the values shown below is not provided in the configuration file, it will be set to a default value" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "AttMap\n", - "path:\n", - " remote_url_base: null\n", - "database:\n", - " user: postgres\n", - " password: bedbasepassword\n", - " name: postgres\n", - " port: 5432\n", - " host: localhost\n", - "server:\n", - " host: 0.0.0.0\n", - " port: 80" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from bbconf.const import DEFAULT_SECTION_VALUES\n", - "from attmap import AttMap\n", - "AttMap(DEFAULT_SECTION_VALUES)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.5" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/docs/bbconf/notebooks/demo.ipynb b/docs/bbconf/notebooks/demo.ipynb deleted file mode 100644 index 634cc11..0000000 --- a/docs/bbconf/notebooks/demo.ipynb +++ /dev/null @@ -1,552 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# `BedBaseConf` object usage demonstration\n", - "\n", - "`bbconf` standardizes reporting of [bedstat](https://github.com/databio/bedstat) and [bedbuncher](https://github.com/databio/bedsbuncher) results. It formalizes a way for these pipelines and downstream tools communicate -- the produced results can easily and reliably become an\n", - "input for the server ([bedhost](https://github.com/databio/bedhost)). The object exposes API for interacting with the results and is backed by a [PostgreSQL](https://www.postgresql.org/) database.\n", - "\n", - "\n", - "`bbconf` provides a way to easily determine a path to the required configuration file. The file can be pointed to by the `$BEDBASE` environment variable. `get_bedbase_cfg` function returns a path which can be either excplicitly provided as an argument or read from the environment variable." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "DEBU 10:09:08 | bbconf:est:266 > Configured logger 'bbconf' using logmuse v0.2.6 \n" - ] - } - ], - "source": [ - "import logmuse\n", - "logmuse.init_logger(\"bbconf\", \"DEBUG\")\n", - "from bbconf import *\n", - "\n", - "bbc = BedBaseConf(config_path=\"../tests/data/config.yaml\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As you can see above, missing entries are populated with default values.\n", - "\n", - "## Object contents" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "`BedBaseConf` objects consist of two [`PipestatManager`](http://pipestat.databio.org/) instances. These objects are responsible for bedfiles and bedsets metadata management. Additionally, `BedBaseConf` maintains a \"relationship table\" that stores the information regarding the bedfile-bedset relationsips, i.e. which bedfile is a part of which bedset.\n", - "\n", - "The `PipestatManager` instances for bedfiles and bedsets can be accessed via the object properties: `BedBaseConf.bed` and `BedBaseConf.bedset`, respectively:\n", - "\n", - "### `BedBaseConf.bed`:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "PipestatManager (bedfiles)\n", - "Backend: PostgreSQL\n", - "Results schema source: /Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/bbconf/schemas/bedfiles_schema.yaml\n", - "Status schema source: /Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pipestat/schemas/status_schema.yaml\n", - "Records count: 11\n" - ] - } - ], - "source": [ - "print(bbc.bed)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### `BedBaseConf.bedset`:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "PipestatManager (bedsets)\n", - "Backend: PostgreSQL\n", - "Results schema source: /Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/bbconf/schemas/bedsets_schema.yaml\n", - "Status schema source: /Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pipestat/schemas/status_schema.yaml\n", - "Records count: 3\n" - ] - } - ], - "source": [ - "print(bbc.bedset)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### `BedBaseConf.config`:" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Additionally, there's a `BedBaseConf.config` property, that can be used to retrieve the bedbase project configuration values, which include both ones declared in the configuration file and default ones:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "database:\n", - " name: pipestat-test\n", - " user: postgres\n", - " password: pipestat-password\n", - " host: localhost\n", - " port: 5432\n", - "path:\n", - " pipeline_output_path: $BEDBASE_DATA_PATH/outputs\n", - " bedstat_dir: bedstat_output\n", - " bedbuncher_dir: bedbuncher_output\n", - " remote_url_base: null\n", - "server:\n", - " host: 0.0.0.0\n", - " port: 8000\n" - ] - } - ], - "source": [ - "print(bbc.config)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Running a database" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Before we start interacting with the database, we need to establish the connection. The required database information is sourced from the object itself. Obviously, the PostgreSQL database instance has to be launched before and running in the background. For example, to run the database in a Docker container, execute these two lines:\n", - "\n", - "```\n", - "docker volume create postgres-data\n", - "docker run -d --name bedbase-postgres -p 5432:5432 -e POSTGRES_PASSWORD=bedbasepassword -e POSTGRES_USER=postgres -e POSTGRES_DB=postgres -v postgres-data:/var/lib/postgresql/data postgres\n", - "```\n", - "The environment variables passed to the container need to match the settings in `BedBaseConf` object." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Standardized metadata specification\n", - "\n", - "`bbconf` package comes with a predefined schemas, that describe the required bed and bedset metadata including the identifiers and types. For example, name of the bedfile, that will be stored in the column `\"name\"` has to be a string, whereas columns `\"widths_histogram\"` expects an image:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'type': 'string', 'description': 'BED file name'}\n", - "{'type': 'image', 'description': 'Quantile-trimmed histogram of widths'}\n" - ] - } - ], - "source": [ - "print(bbc.bed.schema[\"name\"])\n", - "print(bbc.bed.schema[\"widths_histogram\"])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "A result of type `image` is in fact a mapping with three required elements: `path`, `thumbnail_path` and `title`. The actual jsonschema schemas can be accessed as `result_schemas` property for both tables:" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'type': 'object',\n", - " 'description': 'Quantile-trimmed histogram of widths',\n", - " 'properties': {'path': {'type': 'string'},\n", - " 'thumbnail_path': {'type': 'string'},\n", - " 'title': {'type': 'string'}},\n", - " 'required': ['path', 'thumbnail_path', 'title']}" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "bbc.bed.result_schemas[\"widths_histogram\"]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Convenient metadata management and exploration\n", - "\n", - "Building on `PipestatManager`s `BedBaseConf` offers multiple methods for bedfile and bedset metadata management and exploration. Here are some examples:\n", - "\n", - "### Get the number of reported bedfiles" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "11" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "bbc.bed.record_count" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Report metadata for a bedfile or bedset" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "These results exist for '78c0e4753d04b238fc07e4ebe5a02984': ['name']\n" - ] - }, - { - "data": { - "text/plain": [ - "False" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "bbc.bed.report(record_identifier=\"78c0e4753d04b238fc07e4ebe5a02984\", values={\"name\": \"some_name\"})" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Oops, `name` for this bedfile has been reported already. `BedBaseConf`, does not allow reporting results overwriting, unless it's explicitly forced with `force_overwrite=True`.\n", - "\n", - "Let's try reporting a different value:" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "ename": "AssertionError", - "evalue": "'test' is not a known result. Results defined in the schema are: ['name', 'md5sum', 'bedfile', 'bigbedfile', 'regions_no', 'gc_content', 'mean_absolute_tss_dist', 'mean_region_width', 'exon_frequency', 'intron_frequency', 'promoterprox_frequency', 'intergenic_frequency', 'promotercore_frequency', 'fiveutr_frequency', 'threeutr_frequency', 'fiveutr_percentage', 'threeutr_percentage', 'promoterprox_percentage', 'exon_percentage', 'intron_percentage', 'intergenic_percentage', 'promotercore_percentage', 'tssdist', 'chrombins', 'gccontent', 'paritions', 'expected_partitions', 'cumulative_partitions', 'widths_histogram', 'neighbor_distances', 'open_chromatin', 'other'].", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mbbc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbed\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreport\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrecord_identifier\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"78c0e4753d04b238fc07e4ebe5a02984\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m\"test\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m\"some_value\"\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pipestat/pipestat.py\u001b[0m in \u001b[0;36mreport\u001b[0;34m(self, values, record_identifier, force_overwrite, strict_type, return_id)\u001b[0m\n\u001b[1;32m 764\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mSchemaNotFoundError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"report results\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 765\u001b[0m \u001b[0mresult_identifiers\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkeys\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 766\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0massert_results_defined\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresults\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mresult_identifiers\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 767\u001b[0m existing = self._check_which_results_exist(\n\u001b[1;32m 768\u001b[0m rid=record_identifier, results=result_identifiers)\n", - "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pipestat/pipestat.py\u001b[0m in \u001b[0;36massert_results_defined\u001b[0;34m(self, results)\u001b[0m\n\u001b[1;32m 1029\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mr\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mresults\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1030\u001b[0m assert r in known_results, SchemaError(\n\u001b[0;32m-> 1031\u001b[0;31m \u001b[0;34mf\"'{r}' is not a known result. Results defined in the \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1032\u001b[0m f\"schema are: {list(known_results)}.\")\n\u001b[1;32m 1033\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mAssertionError\u001b[0m: 'test' is not a known result. Results defined in the schema are: ['name', 'md5sum', 'bedfile', 'bigbedfile', 'regions_no', 'gc_content', 'mean_absolute_tss_dist', 'mean_region_width', 'exon_frequency', 'intron_frequency', 'promoterprox_frequency', 'intergenic_frequency', 'promotercore_frequency', 'fiveutr_frequency', 'threeutr_frequency', 'fiveutr_percentage', 'threeutr_percentage', 'promoterprox_percentage', 'exon_percentage', 'intron_percentage', 'intergenic_percentage', 'promotercore_percentage', 'tssdist', 'chrombins', 'gccontent', 'paritions', 'expected_partitions', 'cumulative_partitions', 'widths_histogram', 'neighbor_distances', 'open_chromatin', 'other']." - ] - } - ], - "source": [ - "bbc.bed.report(record_identifier=\"78c0e4753d04b238fc07e4ebe5a02984\", values={\"test\": \"some_value\"})" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Oops, the result `test` is not allowed, since it hasn't been specified in the schema. Results that are allowed are prinded in the error message above.\n", - "\n", - "Let's try reporting a new bedfile then:" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Reported records for '78c1e4111d04b238fc11e4ebe5a02984' in 'bedfiles' namespace:\n", - " - name: some_name\n" - ] - }, - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "bbc.bed.report(record_identifier=\"78c1e4111d04b238fc11e4ebe5a02984\", values={\"name\": \"some_name\"})" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Success, the name for the bedfile identified by `78c1e4111d04b238fc11e4ebe5a02984` has been reported.\n", - "\n", - "Therefore, we can retrieve this result:" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'some_name'" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "bbc.bed.retrieve(record_identifier=\"78c1e4111d04b238fc11e4ebe5a02984\", result_identifier=\"name\") " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Or all the reported results:" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'name': 'some_name'}" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "bbc.bed.retrieve(record_identifier=\"78c1e4111d04b238fc11e4ebe5a02984\") " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Naturally, a record can be removed:" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Removing '78c1e4111d04b238fc11e4ebe5a02984' record\n" - ] - }, - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "bbc.bed.remove(record_identifier=\"78c1e4111d04b238fc11e4ebe5a02984\") " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Report bedfile-bedset relationships" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Another useful feature of `BedBaseConf` is conveninent many to many bedfile-bedset relationships handling. To report one use `BedBaseConf.report_relationship` method:" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "bbc.report_relationship(bedfile_id=3, bedset_id=2)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now we can select bedfiles that are part of a bedsets with name \"bedsetOver1kRegions\". Therefore they need to match the following query: `name='bedsetOver1kRegions'`. With `bedfile_col` argument we select the bedfile table columns we're interested in:" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[[1, 'GSE105587_ENCFF018NNF_conservative_idr_thresholded_peaks_GRCh38'],\n", - " [2, 'GSE105977_ENCFF617QGK_optimal_idr_thresholded_peaks_GRCh38'],\n", - " [3, 'GSE105977_ENCFF793SZW_conservative_idr_thresholded_peaks_GRCh38'],\n", - " [4, 'GSE105977_ENCFF937CGY_peaks_GRCh38'],\n", - " [5, 'GSE91663_ENCFF316ASR_peaks_GRCh38'],\n", - " [6, 'GSE91663_ENCFF319TPR_conservative_idr_thresholded_peaks_GRCh38'],\n", - " [7, 'GSE91663_ENCFF553KIK_optimal_idr_thresholded_peaks_GRCh38'],\n", - " [8, 'GSM2423312_ENCFF155HVK_peaks_GRCh38'],\n", - " [9, 'GSM2423313_ENCFF722AOG_peaks_GRCh38'],\n", - " [10, 'GSM2827349_ENCFF196DNQ_peaks_GRCh38'],\n", - " [11, 'GSM2827350_ENCFF928JXU_peaks_GRCh38']]" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "bbc.select_bedfiles_for_bedset(condition=\"name=%s\", condition_val=[\"bedsetOver1kRegions\"], bedfile_col=[\"id\", \"name\"])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The unwanted relationships can be removed with `BedBaseConf.remove_relationship` method:" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "bbc.remove_relationship(bedfile_ids=[3], bedset_id=2)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.5" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/docs/bedbase/README.md b/docs/bedbase/README.md index d89360a..ccbb9f1 100644 --- a/docs/bedbase/README.md +++ b/docs/bedbase/README.md @@ -10,11 +10,22 @@ BEDbase is a unifying platform for aggregating, analyzing and serving genomic re ## Services -- API, production: -- API, dev: -- Front-end, production: (alias ) -- Front-end, dev: -- Object store, production: - base URL for cloudflare/backblaze +--- +**Deployed public instance**: https://bedbase.org/ + +**Documentation**: https://docs.bedbase.org/bedhost + +**API**: https://api.bedbase.org/ + +**API dev**: https://api-dev.bedbase.org/ + +**UI**: https://bedbase.org/ + +**UI dev**: https://dev.bedhost.pages.dev/ + +**Source Code**: https://github.com/databio/bedhost/ + +**Object store, production** - base URL for cloudflare/backblaze ## Tutorial @@ -22,10 +33,10 @@ There's a tutorial for bedbase in the [docs_jupyter](/docs_jupyter) folder (prob ## Components -- [bedboss](https://github.com/databio/bedboss): Main BEDbase processing pipeline, combining bedqc, bedmaker, bedstat, and bedbuncher -- [bbconf](http://github.com/databio/bbconf): BEDbase configuration +- [bedboss](https://github.com/databio/bedboss): Main BEDbase processing pipeline and managing tool, combining bedqc, bedmaker, bedstat, and bedbuncher +- [bbconf](http://github.com/databio/bbconf): BEDbase configuration package (core of the BEDbase stack) - [bedhost](http://github.com/databio/bedhost): FastAPI application with API for accessing data -- [bedhost-ui](http://github.com/databio/bedhost-ui): Front-end user interface built with React +- [bedhost-ui](http://github.com/databio/bedhost): Front-end user interface built with React - [bedbase.org repository](https://github.com/databio/bedbase.org): Repository for deploying the bedhost container to AWS. - [all_geo_beds](all_geo_beds): A subfolder, is the scripts to download all bed files on GEO using geofetch and build a backend to host the metadata using bedstat - [geniml](https://github.com/databio/geniml): Machine learning for genomic intervals diff --git a/docs/bedhost/README.md b/docs/bedhost/README.md index f3235fa..d2bd03f 100644 --- a/docs/bedhost/README.md +++ b/docs/bedhost/README.md @@ -15,6 +15,12 @@ It needs a path to the *bedbase configuration file*, which can be provided eithe **API**: https://api.bedbase.org/ +**API dev**: https://api-dev.bedbase.org/ + +**UI**: https://bedbase.org/ + +**UI dev**: https://dev.bedhost.pages.dev/ + **Source Code**: https://github.com/databio/bedhost/ --- diff --git a/docs/bedhost/changelog.md b/docs/bedhost/changelog.md index b17414d..16151f1 100644 --- a/docs/bedhost/changelog.md +++ b/docs/bedhost/changelog.md @@ -2,6 +2,11 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. +## [0.4.0] +- Support of new bbconf. +- Updated endpoints. + + ## [0.3.0] -- 2023-03-01 ### change - switch to pydantic2 diff --git a/mkdocs.yml b/mkdocs.yml index 326fbf8..d3bd227 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -51,8 +51,8 @@ nav: - Changelog: bedhost/changelog.md - BBConf: - BBConf: bbconf/README.md - - Configuring: bbconf/code/config.md - - Demo: bbconf/code/demo.md + - Configuring: bedboss/how-to-configure.md + - Demo: bbconf/demo.md - Guide: bbconf/bbc_api.md - Changelog: bbconf/changelog.md - Reference: From 140691f8601a5a4ee3f5da8980d507cacd538bb6 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 15 Apr 2024 13:16:59 -0400 Subject: [PATCH 7/7] updated bbconf overview --- docs/bbconf/demo.md | 1 - docs/bbconf/overview.md | 67 +++++++++++++++++++++++++++++++++++++++++ mkdocs.yml | 2 +- 3 files changed, 68 insertions(+), 2 deletions(-) delete mode 100644 docs/bbconf/demo.md create mode 100644 docs/bbconf/overview.md diff --git a/docs/bbconf/demo.md b/docs/bbconf/demo.md deleted file mode 100644 index e92401c..0000000 --- a/docs/bbconf/demo.md +++ /dev/null @@ -1 +0,0 @@ -### 🚧 Docs in progress! Stay tuned for updates. We're working hard to bring you valuable content soon! \ No newline at end of file diff --git a/docs/bbconf/overview.md b/docs/bbconf/overview.md new file mode 100644 index 0000000..853d428 --- /dev/null +++ b/docs/bbconf/overview.md @@ -0,0 +1,67 @@ +# DEMO of the bbconf module + +`bbconf` is a configuration and management tool for BEDbase, facilitating the reading of configuration file, +setting up connections to PostgreSQL, PEPhub, S3, and Qdrant databases, managing file paths, and storing transformer models. + +### Introduction + +`bbconf` is divided into 2 main modules: +- `bbconf.config_parser` - reads the configuration file and sets up connections to databases. +`BedBaseConfig` class is used to store the passwords, configuration, connection objects, and other information. + +- `bbconf.modules` - contains modules for managing `bed_files`, `bedsets`, and other common functionalities. +Users will mainly use this mudule because it provides classes with methods for managing the database. + +### Example: + +#### 1) Init the `BedBaseAgent` class + +```python +from bbconf.bbagent import BedBaseAgent + +bbagent = BedBaseAgent(bbconf_file_path) +``` +Where `bbconf_file_path` is the path to the configuration file. How to create a configuration file is described in the configuration section. + +#### Upload a bedfile to the database + +```python + + bbagent.bed.add( + identifier=bed_metadata.bed_digest, + stats=stats.model_dump(exclude_unset=True), + metadata=other_metadata, + plots=plots.model_dump(exclude_unset=True), + files=files.model_dump(exclude_unset=True), + classification=classification.model_dump(exclude_unset=True), + upload_qdrant=upload_qdrant, + upload_pephub=upload_pephub, + upload_s3=upload_s3, + local_path=outfolder, + overwrite=force_overwrite, + nofail=True, + ) +``` + +#### Get a bedfile from the database + +```python +bed = bbagent.bed.get(identifier=bed_id, full=True,) +``` + +#### Get a bedset from the database + +```python +bedset = bbagent.bedset.get(identifier=bedset_id, full=True,) +``` + +#### User can access credentials and other configurations from the `BedBaseConfig` class + +e.g. to get pephub namespace used in config you can use the following code: + +```python +bbagent.config._config["pephub"]["namespace"] +``` + + +Full API of bbconf can be found [here](./bbc_api.md) \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index d3bd227..2af8af6 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -52,7 +52,7 @@ nav: - BBConf: - BBConf: bbconf/README.md - Configuring: bedboss/how-to-configure.md - - Demo: bbconf/demo.md + - Overview: bbconf/overview.md - Guide: bbconf/bbc_api.md - Changelog: bbconf/changelog.md - Reference: