From 1dc0190d54218477070eb0d152685ba094373b4e Mon Sep 17 00:00:00 2001 From: Denis Yuen Date: Mon, 4 Jul 2016 17:09:01 -0400 Subject: [PATCH 1/6] Verified and plain descriptor formats Add fields to return verified information #12 Also add plain descriptor formats --- .../swagger/ga4gh-tool-discovery.yaml | 22 +++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/src/main/resources/swagger/ga4gh-tool-discovery.yaml b/src/main/resources/swagger/ga4gh-tool-discovery.yaml index 11565e8a..ae2bfc11 100644 --- a/src/main/resources/swagger/ga4gh-tool-discovery.yaml +++ b/src/main/resources/swagger/ga4gh-tool-discovery.yaml @@ -125,11 +125,14 @@ paths: parameters: - name: format in: query - description: The output type of the descriptor. If not specified it is up to the underlying implementation to determine which output format to return. + description: The output type of the descriptor. If not specified it is up to the underlying implementation to determine which output format to return. Plain formats return the bare descriptor while the "non-plain" formats return a descriptor wrapped + with metadata type: string enum: - CWL - WDL + - plain-CWL + - plain-WDL - name: id in: path description: A unique identifier of the tool, scoped to this registry, for example `123456` @@ -159,11 +162,14 @@ paths: parameters: - name: format in: query - description: The output type of the descriptor. If not specified it is up to the underlying implementation to determine which output format to return. + description: The output type of the descriptor. If not specified it is up to the underlying implementation to determine which output format to return. Plain formats return the bare descriptor while the "non-plain" formats return a descriptor wrapped + with metadata type: string enum: - CWL - WDL + - plain-CWL + - plain-WDL - name: id in: path description: A unique identifier of the tool, scoped to this registry, for example `123456` @@ -281,6 +287,12 @@ definitions: type: array items: type: string + verified: + type: boolean + description: Reports whether this tool has been verified by a specific organization or individual + verified-source: + type: string + description: Source of metadata that can support a verified tool, such as an email or URL versions: description: A list of versions for this tool type: array @@ -314,6 +326,12 @@ definitions: meta-version: type: string description: 'The version of this tool version in the registry. Iterates when fields like the description, author, etc. are updated.' + verified: + type: boolean + description: Reports whether this tool has been verified by a specific organization or individual + verified-source: + type: string + description: Source of metadata that can support a verified tool, such as an email or URL ToolDescriptor: description: A tool descriptor is a metadata document that describes one or more tools. required: From edab31aa37f99f1926287979e6f9a39c701d71ad Mon Sep 17 00:00:00 2001 From: Denis Yuen Date: Mon, 4 Jul 2016 17:28:12 -0400 Subject: [PATCH 2/6] Add tool-type listing --- .../resources/swagger/ga4gh-tool-discovery.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/main/resources/swagger/ga4gh-tool-discovery.yaml b/src/main/resources/swagger/ga4gh-tool-discovery.yaml index ae2bfc11..a8a3c3c6 100644 --- a/src/main/resources/swagger/ga4gh-tool-discovery.yaml +++ b/src/main/resources/swagger/ga4gh-tool-discovery.yaml @@ -233,6 +233,21 @@ paths: description: A Metadata object describing this service. schema: $ref: '#/definitions/Metadata' + + /tool-types: + get: + summary: List all tool types + description: > + This endpoint returns all tool-types available + tags: + - GA4GH + responses: + '200': + description: An array of methods that match the filter. + schema: + type: array + items: + $ref: '#/definitions/ToolType' definitions: ToolType: From bedf6eaf2bb9ed0587126ceb1bfe489bb0a2b29f Mon Sep 17 00:00:00 2001 From: Denis Yuen Date: Tue, 5 Jul 2016 11:16:31 -0400 Subject: [PATCH 3/6] v 1 * also moving format to path for convenience --- src/main/resources/swagger/ga4gh-tool-discovery.yaml | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/main/resources/swagger/ga4gh-tool-discovery.yaml b/src/main/resources/swagger/ga4gh-tool-discovery.yaml index a8a3c3c6..4bda08ee 100644 --- a/src/main/resources/swagger/ga4gh-tool-discovery.yaml +++ b/src/main/resources/swagger/ga4gh-tool-discovery.yaml @@ -2,7 +2,7 @@ swagger: '2.0' info: title: GA4GH Tool Discovery API description: 'Proposed API for GA4GH tool repositories. A tool consists of a (currently Docker) image paired with a document that describes how to use that image (currently CWL or WDL) and a Dockerfile that describes how to re-produce the image in the future. We use the following terminology, an "image" describes a (Docker) container as stored on a filesystem, a "tool" describes one of the triples as described above, and a "container" should only be used to describe a running image' - version: 0.2.0 + version: 1.0 produces: - application/json - text/plain @@ -116,7 +116,7 @@ paths: type: array items: $ref: '#/definitions/Tool' - /tools/{id}/versions/{version-id}/descriptor: + /tools/{id}/versions/{version-id}/{format}/descriptor: get: summary: Get the tool descriptor (CWL/WDL) for the specified tool. description: Returns the CWL or WDL descriptor for the specified tool. @@ -124,7 +124,8 @@ paths: - GA4GH parameters: - name: format - in: query + required: true + in: path description: The output type of the descriptor. If not specified it is up to the underlying implementation to determine which output format to return. Plain formats return the bare descriptor while the "non-plain" formats return a descriptor wrapped with metadata type: string @@ -153,7 +154,7 @@ paths: schema: $ref: '#/definitions/Error' - /tools/{id}/versions/{version-id}/descriptor/{relative-path}: + /tools/{id}/versions/{version-id}/{format}/descriptor/{relative-path}: get: summary: Get additional tool descriptor files (CWL/WDL) relative to the main file description: Returns additional CWL or WDL descriptors for the specified tool in the same or subdirectories @@ -161,7 +162,8 @@ paths: - GA4GH parameters: - name: format - in: query + in: path + required: true description: The output type of the descriptor. If not specified it is up to the underlying implementation to determine which output format to return. Plain formats return the bare descriptor while the "non-plain" formats return a descriptor wrapped with metadata type: string From 807349c36cdb070dceb55de38bbdeec3bd4a14b8 Mon Sep 17 00:00:00 2001 From: Denis Yuen Date: Tue, 5 Jul 2016 11:18:17 -0400 Subject: [PATCH 4/6] Typo --- src/main/resources/swagger/ga4gh-tool-discovery.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/resources/swagger/ga4gh-tool-discovery.yaml b/src/main/resources/swagger/ga4gh-tool-discovery.yaml index 4bda08ee..39833ae9 100644 --- a/src/main/resources/swagger/ga4gh-tool-discovery.yaml +++ b/src/main/resources/swagger/ga4gh-tool-discovery.yaml @@ -2,7 +2,7 @@ swagger: '2.0' info: title: GA4GH Tool Discovery API description: 'Proposed API for GA4GH tool repositories. A tool consists of a (currently Docker) image paired with a document that describes how to use that image (currently CWL or WDL) and a Dockerfile that describes how to re-produce the image in the future. We use the following terminology, an "image" describes a (Docker) container as stored on a filesystem, a "tool" describes one of the triples as described above, and a "container" should only be used to describe a running image' - version: 1.0 + version: "1.0" produces: - application/json - text/plain From 5042c472252cca2625ed4192bfebf7f575ee83a4 Mon Sep 17 00:00:00 2001 From: Denis Yuen Date: Tue, 5 Jul 2016 14:46:34 -0400 Subject: [PATCH 5/6] Format appears to be an undocumented protected keyword in paths --- .../resources/swagger/ga4gh-tool-discovery.yaml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/main/resources/swagger/ga4gh-tool-discovery.yaml b/src/main/resources/swagger/ga4gh-tool-discovery.yaml index 39833ae9..bcd12525 100644 --- a/src/main/resources/swagger/ga4gh-tool-discovery.yaml +++ b/src/main/resources/swagger/ga4gh-tool-discovery.yaml @@ -116,17 +116,17 @@ paths: type: array items: $ref: '#/definitions/Tool' - /tools/{id}/versions/{version-id}/{format}/descriptor: + /tools/{id}/versions/{version-id}/{type}/descriptor: get: summary: Get the tool descriptor (CWL/WDL) for the specified tool. description: Returns the CWL or WDL descriptor for the specified tool. tags: - GA4GH parameters: - - name: format + - name: type required: true in: path - description: The output type of the descriptor. If not specified it is up to the underlying implementation to determine which output format to return. Plain formats return the bare descriptor while the "non-plain" formats return a descriptor wrapped + description: The output type of the descriptor. If not specified it is up to the underlying implementation to determine which output type to return. Plain types return the bare descriptor while the "non-plain" types return a descriptor wrapped with metadata type: string enum: @@ -150,21 +150,21 @@ paths: schema: $ref: '#/definitions/ToolDescriptor' '404': - description: The tool can not be output in the specified format. + description: The tool can not be output in the specified type. schema: $ref: '#/definitions/Error' - /tools/{id}/versions/{version-id}/{format}/descriptor/{relative-path}: + /tools/{id}/versions/{version-id}/{type}/descriptor/{relative-path}: get: summary: Get additional tool descriptor files (CWL/WDL) relative to the main file description: Returns additional CWL or WDL descriptors for the specified tool in the same or subdirectories tags: - GA4GH parameters: - - name: format + - name: type in: path required: true - description: The output type of the descriptor. If not specified it is up to the underlying implementation to determine which output format to return. Plain formats return the bare descriptor while the "non-plain" formats return a descriptor wrapped + description: The output type of the descriptor. If not specified it is up to the underlying implementation to determine which output type to return. Plain types return the bare descriptor while the "non-plain" types return a descriptor wrapped with metadata type: string enum: @@ -193,7 +193,7 @@ paths: schema: $ref: '#/definitions/ToolDescriptor' '404': - description: The tool can not be output in the specified format. + description: The tool can not be output in the specified type. schema: $ref: '#/definitions/Error' From a12a1cc0afd93400cd9bf21ddb8113a0bdcf47f8 Mon Sep 17 00:00:00 2001 From: Brian O'Connor Date: Mon, 25 Jul 2016 08:34:18 -0700 Subject: [PATCH 6/6] updating the README --- README.md | 66 ++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 43 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index f0c01c70..f6054eae 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,16 @@ ![ga4gh logo](http://genomicsandhealth.org/files/logo_ga.png) -Schemas for the Data Working Group - Containers and Workflows Task Team -======================================================================= +Schemas for the GA4GH Tool Registry API +======================================= -![](http://online.swagger.io/validator/?url=https://raw.githubusercontent.com/ga4gh/tool-registry-schemas/develop/src/main/resources/swagger/ga4gh-tool-discovery.yaml) +This repository is the home for the schema for the GA4GH Tool Registry API. The goal of the API is to provide a standardized way to describe the availability of tools and workflows. In this way, we can have multiple repositories that share Docker-based tools and WDL/CWL-based workflows and have a consistent way to interact, search, and retrieve information from these various registries. The end goal is to make it much easier to share scientific tools and workflows, enhancing our ability to make research reproducible, sharable, and transparent. -The [Global Alliance for Genomics and Health](http://genomicsandhealth.org/) is an international +**[View in the Swagger Editor](http://editor.swagger.io/#/?import=https://raw.githubusercontent.com/ga4gh/tool-registry-schemas/develop/src/main/resources/swagger/ga4gh-tool-discovery.yaml).** *Manually load the JSON if working from a non-develop branch version.* + +The [Global Alliance for Genomics and Health](http://genomicsandhealth.org/) (GA4GH) is an international coalition, formed to enable the sharing of genomic and clinical data. -The [Data Working Group](http://ga4gh.org/#/) concentrates on data representation, storage, +The GA4GH [Data Working Group](http://ga4gh.org/#/) concentrates on data representation, storage, and analysis, including working with platform development partners and industry leaders to develop standards that will facilitate interoperability. @@ -18,28 +20,27 @@ Containers and Workflows Task Team The Containers & Workflows working group is an informal, multi-vendor working group born out of the BOSC 2014 codefest, consisting of various organizations and individuals that have an interest in portability of data analysis workflows. Our goal is to create specifications that enable data scientists to describe analysis tools and workflows that are powerful, easy to use, portable, and support reproducibility for a variety of problem areas including data-intensive science like bioinformatics, physics, and astronomy; and business analytics such as log analysis, data mining, and ETL. -From within this group, two approaches have emerged, resulting in the production of two distinct but complementary specifications: the Common Workflow Language, or CWL, and the Workflow Description Language, or WDL. The CWL approach emphasizes execution features and machine-readability, and serves a core target audience of software and platform developers. The WDL approach, on the other hand, emphasizes scripting and human-readability, and serves a core target audience of research scientists. +From within this group, two approaches have emerged, resulting in the production of two distinct but complementary specifications: the Common Workflow Language, or CWL, and the Workflow Description Language, or WDL. The CWL approach emphasizes execution features and machine-readability, and serves a core target audience of software and platform developers. The WDL approach, on the other hand, emphasizes scripting and human-readability, and serves a core target audience of research scientists. Together, these two specifications cover a very wide spectrum of analysis use cases. Work is underway to ensure interoperability through conversion and related utilities. -What is this? ------------- +What is the Tool Registry API Schema? +------------------------------------- -Currently, this is the home of the Registry API proposal. The registry API is a minimal common API describing tools that we proposal for support by multiple registries like [Dockstore](https://www.dockstore.org/), [BioShadock](https://docker-ui.genouest.org/app/#/), and [Agora](https://github.com/broadinstitute/agora) for the purposes of exchange, indexing, and searching. +This is the home of the schema for the GA4GH Tool Registry API. The GA4GH Tool Registry API is a standard for listing and describing available tools (both stand-alone, Docker-based tools as well as workflows in CWL or WDL) in a given registry. This defines a minimal, common API describing tools that we proposal for support by multiple tool/workflow registries like [Dockstore](https://www.dockstore.org/), [BioShadock](https://docker-ui.genouest.org/app/#/), and [Agora](https://github.com/broadinstitute/agora) for the purposes of exchange, indexing, and searching. Our current proposal is to start with a read-only API due to potentially different views and approaches to registration/security. Key features of the current API proposal: * read-only API -* May serve up CWl, WDL, to describe a tool depending on what is available +* May serve up CWl or WDL to describe a tool or represent a workflow depending on the tool/workflow submitter * ID: globally unique across systems and also identifies the system that it came from (ex: 123456323@agora.broadinstitute.org ) -Outstanding questions: +Outstanding questions: * How do we track authorship? Should we track authorship of the tool metadata, the Docker image, or the underlying algorithm, or all of above? -* How to describe indexing and external services like an external [sparql](https://github.com/common-workflow-language/workflows#sparql) service. -* Versioning +* How to describe indexing and external services like an external [sparql](https://github.com/common-workflow-language/workflows#sparql) service. * Terminology discussion (do we describe triples separately from tools? should we describe them as aggregations of tools for just the case that CWL documents have more than one tool? etc.) @@ -48,23 +49,42 @@ How to view See the swagger editor to view our [schema in progress](http://editor.swagger.io/#/?import=https://raw.githubusercontent.com/ga4gh/tool-registry-schemas/develop/src/main/resources/swagger/ga4gh-tool-discovery.yaml). -If the current schema fails to validate, visit [debugging](http://online.swagger.io/validator/debug?url=https://raw.githubusercontent.com/ga4gh/tool-registry-schemas/develop/src/main/resources/swagger/ga4gh-tool-discovery.yaml) - - How to contribute changes ------------------------- Take cues for now from the [ga4gh/schemas](https://github.com/ga4gh/schemas/blob/master/CONTRIBUTING.rst) document. +At the very least, create an issue in our [Github tracker](https://github.com/ga4gh/tool-registry-schemas/issues). + +Even better, fork the codebase, fix the issue, and create a pull request back to the project along with your ticket. + +Adding registries +----------------- + +To add a registry that supports the GA4GH Registry API: + +1. fork the repo +1. modify [registry.json](registry.json) +1. submit a pull request back to the project +1. we will confirm the site is valid then accept your pull request + +Cross indexing Tool Registry sites +---------------------------------- + +See our [registry.json](registry.json) for a list of known registries that conform to the Tool Registry API standard. + License ------- -See the [LICENSE] +See the [LICENSE](LICENSE) + +For more information +-------------------- - []: http://genomicsandhealth.org/files/logo_ga.png - [Global Alliance for Genomics and Health]: http://genomicsandhealth.org/ - [INSTALL.md]: INSTALL.md - [CONTRIBUTING.md]: CONTRIBUTING.md - [LICENSE]: LICENSE - [Google Forum]: https://groups.google.com/forum/#!forum/ga4gh-dwg-containers-workflows +* http://genomicsandhealth.org/ +* [INSTALL.md](INSTALL.md) +* [CONTRIBUTING.md](CONTRIBUTING.md) +* [LICENSE](LICENSE) +* [Google Groups - old](https://groups.google.com/forum/#!forum/ga4gh-dwg-containers-workflows) +* [Google Groups - new](https://groups.google.com/a/genomicsandhealth.org/forum/#!forum/ga4gh-dwg-containers-workflows)