-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
355 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,264 @@ | ||
{ | ||
"$schema": "http://json-schema.org/draft-07/schema#", | ||
"type": "array", | ||
"items": { | ||
"anyOf": [ | ||
{ | ||
"$ref": "#/definitions/PipelineProvenanceRecord", | ||
"description": "A record of the pipeline that was run." | ||
}, | ||
{ | ||
"$ref": "#/definitions/ProcessProvenanceRecord", | ||
"description": "A record of a process that was run." | ||
}, | ||
{ | ||
"$ref": "#/definitions/InputFileProvenanceRecord", | ||
"description": "A record of an input file that was used." | ||
} | ||
] | ||
}, | ||
"definitions": { | ||
"PipelineProvenanceRecord": { | ||
"title": "PipelineProvenanceRecord", | ||
"type": "object", | ||
"properties": { | ||
"pipeline_name": { | ||
"type": "string", | ||
"description": "Name of the pipeline.", | ||
"examples": [ | ||
"BCCDC-PHL/routine-assembly", | ||
"BCCDC-PHL/plasmid-screen" | ||
] | ||
}, | ||
"pipeline_version": { | ||
"type": "string", | ||
"description": "Version of the pipeline.", | ||
"examples": [ | ||
"v0.1.0", | ||
"0.1.0", | ||
"1", | ||
"2.0-beta" | ||
] | ||
}, | ||
"timestamp_analysis_start": { | ||
"type": "string", | ||
"description": "Timestamp for the start of a pipeline run. ISO-8601-formatted date, followed by 'T' and a 24-hour timestamp, assumed to be in the local timezone if not specified. Timezone may be specified with an offset from UTC. Timestamp precision is not guaranteed.", | ||
"format": "date-time", | ||
"examples": [ | ||
"2021-12-06T16:12:31.252055", | ||
"2022-01-12T01:22:51-08:00", | ||
"2022-02-04T16:55:03.182-08:00" | ||
] | ||
} | ||
}, | ||
"required": [ | ||
"pipeline_name", | ||
"pipeline_version" | ||
] | ||
}, | ||
"ProcessProvenanceRecord": { | ||
"title": "ProcessProvenanceRecord", | ||
"type": "object", | ||
"properties": { | ||
"process_name": { | ||
"type": "string", | ||
"description": "Name of the process.", | ||
"examples": [ | ||
"fastp", | ||
"bwa_mem", | ||
"samtools_mpileup", | ||
"align_reads_to_ref", | ||
"trim_reads", | ||
"CALL_VARIANTS" | ||
] | ||
}, | ||
"tools": { | ||
"type": "array", | ||
"description": "The tools used to run the process.", | ||
"items": { | ||
"$ref": "#/definitions/Tool" | ||
}, | ||
"examples": [ | ||
[ | ||
{ | ||
"tool_name": "fastp", | ||
"tool_version": "0.20.0", | ||
"subcommand": "trim", | ||
"parameters": [ | ||
{ | ||
"parameter": "cut_tail", | ||
"value": null | ||
} | ||
] | ||
} | ||
], | ||
[ | ||
{ | ||
"tool_name": "bwa", | ||
"tool_version": "0.7.17-r1188", | ||
"subcommand": "mem", | ||
"parameters": [ | ||
{ | ||
"parameter": "exclude_flags", | ||
"value": 1540 | ||
}, | ||
{ | ||
"parameter": "min_base_quality", | ||
"value": 20 | ||
} | ||
] | ||
} | ||
] | ||
] | ||
} | ||
}, | ||
"required": [ | ||
"process_name" | ||
] | ||
}, | ||
"Tool": { | ||
"title": "Tool", | ||
"type": "object", | ||
"properties": { | ||
"tool_name": { | ||
"type": "string", | ||
"description": "Name of the tool.", | ||
"examples": [ | ||
"fastp", | ||
"bwa", | ||
"samtools", | ||
"bcftools", | ||
"medaka" | ||
] | ||
}, | ||
"tool_version": { | ||
"type": "string", | ||
"description": "A number or string associated with a specific snapshot of the development state of a tool. Should (but may not always) map to a tagged release on GitHub or another version control system.", | ||
"examples": [ | ||
"0.1.0", | ||
"v0.1.1", | ||
"1.1", | ||
"2", | ||
"0.7.17-r1188" | ||
] | ||
}, | ||
"subcommand": { | ||
"type": "string", | ||
"description": "Subcommand of the tool.", | ||
"examples": [ | ||
"mem", | ||
"mpileup", | ||
"filter" | ||
] | ||
}, | ||
"parameters": { | ||
"type": "array", | ||
"items": { | ||
"$ref": "#/definitions/ToolParameter" | ||
}, | ||
"description": "The specific invocation of a process may depend on values that can be varied. These values are the parameters to that process. Each parameter has a name and a value.", | ||
"examples": [ | ||
[ | ||
{ | ||
"parameter": "cut_tail", | ||
"value": null | ||
} | ||
], | ||
[ | ||
{ | ||
"parameter": "exclude_flags", | ||
"value": 1540 | ||
} | ||
], | ||
[ | ||
{ | ||
"parameter": "min_base_quality", | ||
"value": 20 | ||
} | ||
], | ||
[ | ||
{ | ||
"parameter": "min_coverage", | ||
"value": 10 | ||
} | ||
] | ||
] | ||
} | ||
} | ||
}, | ||
"ToolParameter": { | ||
"title": "ToolParameter", | ||
"type": "object", | ||
"properties": { | ||
"parameter": { | ||
"type": "string", | ||
"description": "Name of the parameter.", | ||
"examples": [ | ||
"cut_tail", | ||
"exclude_flags", | ||
"min_base_quality", | ||
"min_coverage" | ||
] | ||
}, | ||
"value": { | ||
"type": "string", | ||
"description": "Value of the parameter, or null if the parameter is a flag without a value.", | ||
"examples": [ | ||
"null", | ||
"1540", | ||
"20", | ||
"10" | ||
] | ||
} | ||
}, | ||
"required": [ | ||
"parameter" | ||
] | ||
}, | ||
"InputFileProvenanceRecord": { | ||
"title": "InputFileProvenanceRecord", | ||
"type": "object", | ||
"properties": { | ||
"input_filename": { | ||
"type": "string", | ||
"description": "Filename of an input file.", | ||
"examples": [ | ||
"sample-01_R1.fastq.gz", | ||
"sample-01_R2.fastq.gz", | ||
"ref.fa", | ||
"sample-01.bam", | ||
"sample-01.bam.bai" | ||
] | ||
}, | ||
"input_path": { | ||
"type": "string", | ||
"description": "Absolute path to an input file, at the time that the pipeline was invoked. May be invalid if the input file is moved or renamed after pipeline invocation.", | ||
"examples":[ | ||
"/data/ref_data/ecoli.fa", | ||
"/data/sequence/miseq/210101_M00123_0123_000000000-ABC123/Data/Intensities/BaseCalls/sample-01_S1_L001_R1_001.fastq.gz", | ||
"/data/sequence/miseq/210101_M00123_0123_000000000-ABC123/Data/Intensities/BaseCalls/sample-01_S1_L001_R2_001.fastq.gz" | ||
] | ||
}, | ||
"sha256": { | ||
"type": "string", | ||
"description": "The checksum of a file, calculated with the SHA256 algorithm. Files with identical contents have identical checksums. If a single byte differs, the checksums will be completely different.", | ||
"examples":[ | ||
"b0534592d61321243897e842a9ea655d396d4496cbf6d926b6c6fea8e06aa98d", | ||
"cc66309103da91e337143eb649196d84ed3ebe2ff08a45b197cd4151d137a167" | ||
] | ||
}, | ||
"file_size": { | ||
"type": "integer", | ||
"description": "Size of the file in bytes.", | ||
"examples": [ | ||
123456789, | ||
1234567890 | ||
] | ||
} | ||
}, | ||
"required": [ | ||
"input_filename" | ||
] | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
name: check-outputs | ||
channels: | ||
- conda-forge | ||
- bioconda | ||
- defaults | ||
dependencies: | ||
- python=3 | ||
- jsonschema=4.20.0 | ||
- pyyaml=6.0.1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,69 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import argparse | ||
import csv | ||
import glob | ||
import json | ||
import urllib.request | ||
|
||
from jsonschema import validate | ||
import yaml | ||
|
||
|
||
def check_provenance_format_valid(provenance_files, schema): | ||
""" | ||
Check that the provenance files are valid according to the schema. | ||
""" | ||
for provenance_file in provenance_files: | ||
with open(provenance_file) as f: | ||
try: | ||
provenance = yaml.load(f, Loader=yaml.BaseLoader) | ||
validate(provenance, schema) | ||
except Exception as e: | ||
return False | ||
|
||
return True | ||
|
||
|
||
def main(args): | ||
exit(-1) | ||
provenance_schema_url = "https://raw.githubusercontent.com/BCCDC-PHL/pipeline-provenance-schema/main/schema/pipeline-provenance.json" | ||
provenance_schema_path = ".github/data/pipeline-provenance.json" | ||
urllib.request.urlretrieve(provenance_schema_url, provenance_schema_path) | ||
|
||
provenance_schema = None | ||
with open(provenance_schema_path) as f: | ||
provenance_schema = json.load(f) | ||
|
||
provenace_files_glob = f"{args.pipeline_outdir}/**/*_provenance.yml" | ||
provenance_files = glob.glob(provenace_files_glob, recursive=True) | ||
|
||
tests = [ | ||
{ | ||
"test_name": "provenance_format_valid", | ||
"test_result": check_provenance_format_valid(provenance_files, provenance_schema), | ||
} | ||
] | ||
|
||
output_fields = [ | ||
"test_name", | ||
"test_result" | ||
] | ||
|
||
output_path = args.output | ||
with open(output_path, 'w') as f: | ||
writer = csv.DictWriter(f, fieldnames=output_fields) | ||
writer.writeheader() | ||
for test in tests: | ||
if test["test_result"]: | ||
test["test_result"] = "PASS" | ||
else: | ||
test["test_result"] = "FAIL" | ||
writer.writerow(test) | ||
|
||
|
||
if __name__ == '__main__': | ||
parser = argparse.ArgumentParser(description='Check outputs') | ||
parser.add_argument('--pipeline-outdir', type=str, help='Path to the pipeline output directory') | ||
parser.add_argument('-o', '--output', type=str, help='Path to the output file') | ||
args = parser.parse_args() | ||
main(args) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
#!/usr/bin/env bash | ||
|
||
set -eo pipefail | ||
|
||
source ${HOME}/.bashrc | ||
|
||
eval "$(conda shell.bash hook)" | ||
|
||
conda activate check-outputs | ||
|
||
|
||
.github/scripts/check_outputs.py --pipeline-outdir .github/data/test_output -o artifacts/check_outputs_results.csv | ||
|
||
grep -v 'FAIL' .github/artifacts/check_outputs_results.csv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
#!/bin/bash | ||
|
||
conda env create -f .github/environments/check-outputs.yml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,9 @@ | ||
*~ | ||
.nextflow* | ||
.github/data/assemblies | ||
.github/data/fastq | ||
.github/data/test_output | ||
artifacts | ||
work | ||
test_input | ||
test_output |