diff --git a/docs/source/images/ex_ambiguous_linker.png b/docs/source/images/ex_ambiguous_linker.png new file mode 100644 index 00000000..f329aced Binary files /dev/null and b/docs/source/images/ex_ambiguous_linker.png differ diff --git a/docs/source/images/ex_precise_linker.png b/docs/source/images/ex_precise_linker.png new file mode 100644 index 00000000..188c33f0 Binary files /dev/null and b/docs/source/images/ex_precise_linker.png differ diff --git a/docs/source/images/ex_revcomp_breakpoint.png b/docs/source/images/ex_revcomp_breakpoint.png new file mode 100644 index 00000000..07b8cd83 Binary files /dev/null and b/docs/source/images/ex_revcomp_breakpoint.png differ diff --git a/docs/source/images/ex_sequence_homology.png b/docs/source/images/ex_sequence_homology.png new file mode 100644 index 00000000..9f7b6241 Binary files /dev/null and b/docs/source/images/ex_sequence_homology.png differ diff --git a/docs/source/images/ex_simple_breakpoint.png b/docs/source/images/ex_simple_breakpoint.png new file mode 100644 index 00000000..c4888065 Binary files /dev/null and b/docs/source/images/ex_simple_breakpoint.png differ diff --git a/docs/source/images/ex_sv_haplotype.png b/docs/source/images/ex_sv_haplotype.png new file mode 100644 index 00000000..90ffbc0b Binary files /dev/null and b/docs/source/images/ex_sv_haplotype.png differ diff --git a/docs/source/images/ex_terminal_breakend.png b/docs/source/images/ex_terminal_breakend.png new file mode 100644 index 00000000..9ba0fa1c Binary files /dev/null and b/docs/source/images/ex_terminal_breakend.png differ diff --git a/examples/ambiguous_linker.yaml b/examples/ambiguous_linker.yaml index aa2d67db..082020b1 100644 --- a/examples/ambiguous_linker.yaml +++ b/examples/ambiguous_linker.yaml @@ -3,7 +3,7 @@ type: Adjacency linker: type: LengthExpression length: 20000 -sequenceTerminals: +adjoinedSequences: - type: SequenceLocation sequenceReference: refgetAccession: SQ.S_KjnFVz-FE7M0W6yoaUDgYxLPc1jyWU diff --git a/examples/precise_linker.yaml b/examples/precise_linker.yaml index 1cc1d15c..69e9fba1 100644 --- a/examples/precise_linker.yaml +++ b/examples/precise_linker.yaml @@ -3,7 +3,7 @@ type: Adjacency linker: type: LiteralSequenceExpression sequence: CCCGTC -sequenceTerminals: +adjoinedSequences: - type: SequenceLocation sequenceReference: refgetAccession: SQ.S_KjnFVz-FE7M0W6yoaUDgYxLPc1jyWU diff --git a/examples/revcomp_breakpoint.yaml b/examples/revcomp_breakpoint.yaml index ef5cc74a..d5ccbba3 100644 --- a/examples/revcomp_breakpoint.yaml +++ b/examples/revcomp_breakpoint.yaml @@ -1,6 +1,6 @@ id: truthset_1_1 type: Adjacency -sequenceTerminals: +adjoinedSequences: - type: SequenceLocation sequenceReference: id: NC_000001.10 diff --git a/examples/sequence_homology.yaml b/examples/sequence_homology.yaml index d3949f16..86f1c8ab 100644 --- a/examples/sequence_homology.yaml +++ b/examples/sequence_homology.yaml @@ -1,6 +1,6 @@ id: sequence_homology type: Adjacency -sequenceTerminals: +adjoinedSequences: - type: SequenceLocation sequenceReference: refgetAccession: SQ.S_KjnFVz-FE7M0W6yoaUDgYxLPc1jyWU diff --git a/examples/simple_breakpoint.yaml b/examples/simple_breakpoint.yaml index 0be26571..cfc1f69a 100644 --- a/examples/simple_breakpoint.yaml +++ b/examples/simple_breakpoint.yaml @@ -1,6 +1,6 @@ id: simple_breakpoint type: Adjacency -sequenceTerminals: +adjoinedSequences: - type: SequenceLocation sequenceReference: refgetAccession: SQ.S_KjnFVz-FE7M0W6yoaUDgYxLPc1jyWU diff --git a/examples/sv_haplotype.yaml b/examples/sv_haplotype.yaml index 86f67fca..ebfa7c13 100644 --- a/examples/sv_haplotype.yaml +++ b/examples/sv_haplotype.yaml @@ -6,7 +6,7 @@ members: linker: type: LiteralSequenceExpression sequence: GTC - sequenceTerminals: + adjoinedSequences: - type: SequenceLocation sequenceReference: refgetAccession: SQ.S_KjnFVz-FE7M0W6yoaUDgYxLPc1jyWU @@ -29,8 +29,9 @@ members: sequence: C - id: ga4gh:AJ.iQ1RGkhW9AyVBLl53silKqgGSBOJaFOr type: Adjacency - sequenceTerminals: - - end: 15000 + adjoinedSequences: + - type: SequenceLocation + end: 15000 - type: SequenceLocation sequenceReference: refgetAccession: SQ.S_KjnFVz-FE7M0W6yoaUDgYxLPc1jyWU diff --git a/examples/terminal_breakend.yaml b/examples/terminal_breakend.yaml index 6b8da11e..02aceb7c 100644 --- a/examples/terminal_breakend.yaml +++ b/examples/terminal_breakend.yaml @@ -1,6 +1,6 @@ id: terminal_breakend type: Adjacency -sequenceTerminals: +adjoinedSequences: - type: SequenceLocation sequenceReference: refgetAccession: SQ.S_KjnFVz-FE7M0W6yoaUDgYxLPc1jyWU diff --git a/schema/defs/vrs/Adjacency.rst b/schema/defs/vrs/Adjacency.rst index 66ce85eb..8b15d2ad 100644 --- a/schema/defs/vrs/Adjacency.rst +++ b/schema/defs/vrs/Adjacency.rst @@ -1,6 +1,6 @@ **Computational Definition** -The `Adjacency` class represents the termination of a sequence and (when present) the beginning of an adjacent sequence, potentially with an intervening linker sequence. +The `Adjacency` class can represent either the termination of a sequence or the adjoining of the end of a sequence with the beginning of an adjacent sequence, potentially with an intervening linker sequence. **Information Model** @@ -44,11 +44,11 @@ Some Adjacency attributes are inherited from :ref:`Variation`. - :ref:`Expression` - 0..m - - * - sequenceTerminals + * - adjoinedSequences - `IRI `_ | :ref:`Location` - 1..2 - - Sequence terminals involved in the adjacency. + - The terminal sequence or pair of adjoined sequences that defines in the adjacency. * - linker - :ref:`SequenceExpression` - 0..1 - - A linker sequence found between the sequence terminals. + - The sequence found between adjoined sequences. diff --git a/schema/defs/vrs/Haplotype.rst b/schema/defs/vrs/Haplotype.rst index be5629ff..78f88c78 100644 --- a/schema/defs/vrs/Haplotype.rst +++ b/schema/defs/vrs/Haplotype.rst @@ -47,4 +47,4 @@ Some Haplotype attributes are inherited from :ref:`Variation`. * - members - :ref:`Adjacency` | :ref:`Allele` | `IRI `_ - 2..m - - A list of :ref:`Alleles ` and :ref:`Adjacencies ` that comprise a Haplotype. Members must share the same reference sequence as adjacent members. Alleles should not have overlapping or adjacent coordinates with neighboring Alleles. Neighboring alleles should be ordered by ascending coordinates, unless represented on a DNA inversion (following an end-defined sequence terminal), in which case they should be ordered in descending coordinates. Sequence references MUST be consistent for all members between and including the end of one Adjacency and the beginning of another. + - A list of :ref:`Alleles ` and :ref:`Adjacencies ` that comprise a Haplotype. Members must share the same reference sequence as adjacent members. Alleles should not have overlapping or adjacent coordinates with neighboring Alleles. Neighboring alleles should be ordered by ascending coordinates, unless represented on a DNA inversion (following an Adjacency with end-defined adjoinedSequences), in which case they should be ordered in descending coordinates. Sequence references MUST be consistent for all members between and including the end of one Adjacency and the beginning of another. diff --git a/schema/merged.json b/schema/merged.json index c6c55e2a..4ad333ce 100644 --- a/schema/merged.json +++ b/schema/merged.json @@ -242,7 +242,7 @@ } ] }, - "description": "A list of Alleles that comprise a Haplotype. Members must share the same reference sequence as adjacent members. Alleles should not have overlapping or adjacent coordinates with neighboring Alleles. Neighboring alleles should be ordered by ascending coordinates, unless represented on a DNA inversion (following an end-defined sequence terminal), in which case they should be ordered in descending coordinates. Sequence references MUST be consistent for all members between and including the end of one Adjacency and the beginning of another." + "description": "A list of Alleles that comprise a Haplotype. Members must share the same reference sequence as adjacent members. Alleles should not have overlapping or adjacent coordinates with neighboring Alleles. Neighboring alleles should be ordered by ascending coordinates, unless represented on a DNA inversion (following an Adjacency with end-defined adjoinedSequences), in which case they should be ordered in descending coordinates. Sequence references MUST be consistent for all members between and including the end of one Adjacency and the beginning of another." } }, "required": [ @@ -916,12 +916,12 @@ "ga4ghDigest": { "prefix": "AJ", "keys": [ + "adjoinedSequences", "linker", - "sequenceTerminals", "type" ] }, - "description": "The `Adjacency` class represents the termination of a sequence and (when present) the beginning of an adjacent sequence, potentially with an intervening linker sequence.", + "description": "The `Adjacency` class can represent either the termination of a sequence or the adjoining of the end of a sequence with the beginning of an adjacent sequence, potentially with an intervening linker sequence.", "type": "object", "properties": { "id": { @@ -961,7 +961,7 @@ "$ref": "#/$defs/Expression" } }, - "sequenceTerminals": { + "adjoinedSequences": { "type": "array", "uniqueItems": false, "ordered": true, @@ -975,12 +975,12 @@ } ] }, - "description": "Sequence terminals involved in the adjacency.", + "description": "The terminal sequence or pair of adjoined sequences that defines in the adjacency.", "minItems": 1, "maxItems": 2 }, "linker": { - "description": "A linker sequence found between the sequence terminals.", + "description": "The sequence found between adjoined sequences.", "oneOf": [ { "$ref": "#/$defs/LengthExpression" @@ -995,7 +995,7 @@ } }, "required": [ - "sequenceTerminals" + "adjoinedSequences" ], "additionalProperties": false }, diff --git a/schema/merged.yaml b/schema/merged.yaml index 83afd496..4b9e64b5 100644 --- a/schema/merged.yaml +++ b/schema/merged.yaml @@ -171,10 +171,10 @@ $defs: the same reference sequence as adjacent members. Alleles should not have overlapping or adjacent coordinates with neighboring Alleles. Neighboring alleles should be ordered by ascending coordinates, unless represented - on a DNA inversion (following an end-defined sequence terminal), in which - case they should be ordered in descending coordinates. Sequence references - MUST be consistent for all members between and including the end of one - Adjacency and the beginning of another. + on a DNA inversion (following an Adjacency with end-defined adjoinedSequences), + in which case they should be ordered in descending coordinates. Sequence + references MUST be consistent for all members between and including the + end of one Adjacency and the beginning of another. required: - members additionalProperties: false @@ -699,12 +699,12 @@ $defs: ga4ghDigest: prefix: AJ keys: + - adjoinedSequences - linker - - sequenceTerminals - type - description: The `Adjacency` class represents the termination of a sequence and - (when present) the beginning of an adjacent sequence, potentially with an intervening - linker sequence. + description: The `Adjacency` class can represent either the termination of a sequence + or the adjoining of the end of a sequence with the beginning of an adjacent + sequence, potentially with an intervening linker sequence. type: object properties: id: @@ -739,7 +739,7 @@ $defs: ordered: false items: $ref: '#/$defs/Expression' - sequenceTerminals: + adjoinedSequences: type: array uniqueItems: false ordered: true @@ -747,17 +747,18 @@ $defs: oneOf: - $ref: '#/$defs/IRI' - $ref: '#/$defs/SequenceLocation' - description: Sequence terminals involved in the adjacency. + description: The terminal sequence or pair of adjoined sequences that defines + in the adjacency. minItems: 1 maxItems: 2 linker: - description: A linker sequence found between the sequence terminals. + description: The sequence found between adjoined sequences. oneOf: - $ref: '#/$defs/LengthExpression' - $ref: '#/$defs/LiteralSequenceExpression' - $ref: '#/$defs/ReferenceLengthExpression' required: - - sequenceTerminals + - adjoinedSequences additionalProperties: false Coding: type: object diff --git a/schema/vrs-source.yaml b/schema/vrs-source.yaml index 1d9b8e82..c5a80642 100644 --- a/schema/vrs-source.yaml +++ b/schema/vrs-source.yaml @@ -178,10 +178,10 @@ $defs: A list of :ref:`Alleles ` and :ref:`Adjacencies ` that comprise a Haplotype. Members must share the same reference sequence as adjacent members. Alleles should not have overlapping or adjacent coordinates with neighboring Alleles. Neighboring alleles should be ordered - by ascending coordinates, unless represented on a DNA inversion (following an end-defined sequence - terminal), in which case they should be ordered in descending coordinates. Sequence references MUST - be consistent for all members between and including the end of one Adjacency and the beginning of - another. + by ascending coordinates, unless represented on a DNA inversion (following an Adjacency with + end-defined adjoinedSequences), in which case they should be ordered in descending coordinates. + Sequence references MUST be consistent for all members between and including the end of one + Adjacency and the beginning of another. required: [ "members" ] # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -558,12 +558,13 @@ $defs: prefix: AJ keys: - type - - sequenceTerminals + - adjoinedSequences - linker inherits: MolecularVariation description: - The `Adjacency` class represents the termination of a sequence and (when present) - the beginning of an adjacent sequence, potentially with an intervening linker sequence. + The `Adjacency` class can represent either the termination of a sequence + or the adjoining of the end of a sequence with the beginning of an + adjacent sequence, potentially with an intervening linker sequence. type: object properties: type: @@ -571,7 +572,7 @@ $defs: const: Adjacency default: Adjacency description: MUST be "Adjacency". - sequenceTerminals: + adjoinedSequences: type: array uniqueItems: false ordered: true @@ -579,11 +580,50 @@ $defs: oneOf: - $refCurie: gks.core:IRI - $ref: '#/$defs/Location' - description: Sequence terminals involved in the adjacency. + description: The terminal sequence or pair of adjoined sequences that defines in the adjacency. minItems: 1 maxItems: 2 linker: $ref: '#/$defs/SequenceExpression' - description: A linker sequence found between the sequence terminals. + description: The sequence found between adjoined sequences. +# homology: +# # Only valid for breakends=2 +# type: boolean +# default: false +# description: +# A flag indicating whether the location interval of the breakend +# is due to the sequences at the breakends being homologous or +# whether the interval is due to uncertainty regarding the actual +# locations of the breakends. +# terminal: +# # TODO: can the schema encode a constraint that a terminal breakend cannot +# # be part of a breakpoint? +# type: boolean +# default: false +# description: +# # Only valid for breakends=1 +# Indicates the end of the molecule required: - - sequenceTerminals + - adjoinedSequences + +# Event: +# description: +# An event that results in a set of variants. +# type: object +# properties: +# type: +# type: string +# const: Event +# default: Event +# description: MUST be "Event" +# variants: +# $ref: '#/definitions/VariationSet' +# classification: +# # TODO: what event ontology should we use? +# type: string +# description: Category of event +# required: +# - type +# - variants +# - classification +# additionalProperties: true diff --git a/schema/vrs.json b/schema/vrs.json index 967fcf04..63ffb3b8 100644 --- a/schema/vrs.json +++ b/schema/vrs.json @@ -242,7 +242,7 @@ } ] }, - "description": "A list of Alleles that comprise a Haplotype. Members must share the same reference sequence as adjacent members. Alleles should not have overlapping or adjacent coordinates with neighboring Alleles. Neighboring alleles should be ordered by ascending coordinates, unless represented on a DNA inversion (following an end-defined sequence terminal), in which case they should be ordered in descending coordinates. Sequence references MUST be consistent for all members between and including the end of one Adjacency and the beginning of another." + "description": "A list of Alleles that comprise a Haplotype. Members must share the same reference sequence as adjacent members. Alleles should not have overlapping or adjacent coordinates with neighboring Alleles. Neighboring alleles should be ordered by ascending coordinates, unless represented on a DNA inversion (following an Adjacency with end-defined adjoinedSequences), in which case they should be ordered in descending coordinates. Sequence references MUST be consistent for all members between and including the end of one Adjacency and the beginning of another." } }, "required": [ @@ -916,12 +916,12 @@ "ga4ghDigest": { "prefix": "AJ", "keys": [ + "adjoinedSequences", "linker", - "sequenceTerminals", "type" ] }, - "description": "The `Adjacency` class represents the termination of a sequence and (when present) the beginning of an adjacent sequence, potentially with an intervening linker sequence.", + "description": "The `Adjacency` class can represent either the termination of a sequence or the adjoining of the end of a sequence with the beginning of an adjacent sequence, potentially with an intervening linker sequence.", "type": "object", "properties": { "id": { @@ -961,7 +961,7 @@ "$ref": "#/$defs/Expression" } }, - "sequenceTerminals": { + "adjoinedSequences": { "type": "array", "uniqueItems": false, "ordered": true, @@ -975,12 +975,12 @@ } ] }, - "description": "Sequence terminals involved in the adjacency.", + "description": "The terminal sequence or pair of adjoined sequences that defines in the adjacency.", "minItems": 1, "maxItems": 2 }, "linker": { - "description": "A linker sequence found between the sequence terminals.", + "description": "The sequence found between adjoined sequences.", "oneOf": [ { "$ref": "#/$defs/LengthExpression" @@ -995,7 +995,7 @@ } }, "required": [ - "sequenceTerminals" + "adjoinedSequences" ], "additionalProperties": false } diff --git a/schema/vrs.yaml b/schema/vrs.yaml index ced7005c..d445a6b0 100644 --- a/schema/vrs.yaml +++ b/schema/vrs.yaml @@ -171,10 +171,10 @@ $defs: the same reference sequence as adjacent members. Alleles should not have overlapping or adjacent coordinates with neighboring Alleles. Neighboring alleles should be ordered by ascending coordinates, unless represented - on a DNA inversion (following an end-defined sequence terminal), in which - case they should be ordered in descending coordinates. Sequence references - MUST be consistent for all members between and including the end of one - Adjacency and the beginning of another. + on a DNA inversion (following an Adjacency with end-defined adjoinedSequences), + in which case they should be ordered in descending coordinates. Sequence + references MUST be consistent for all members between and including the + end of one Adjacency and the beginning of another. required: - members additionalProperties: false @@ -699,12 +699,12 @@ $defs: ga4ghDigest: prefix: AJ keys: + - adjoinedSequences - linker - - sequenceTerminals - type - description: The `Adjacency` class represents the termination of a sequence and - (when present) the beginning of an adjacent sequence, potentially with an intervening - linker sequence. + description: The `Adjacency` class can represent either the termination of a sequence + or the adjoining of the end of a sequence with the beginning of an adjacent + sequence, potentially with an intervening linker sequence. type: object properties: id: @@ -739,7 +739,7 @@ $defs: ordered: false items: $ref: '#/$defs/Expression' - sequenceTerminals: + adjoinedSequences: type: array uniqueItems: false ordered: true @@ -747,15 +747,16 @@ $defs: oneOf: - $ref: '#/$defs/SequenceLocation' - $ref: gks.common.json#/$defs/IRI - description: Sequence terminals involved in the adjacency. + description: The terminal sequence or pair of adjoined sequences that defines + in the adjacency. minItems: 1 maxItems: 2 linker: - description: A linker sequence found between the sequence terminals. + description: The sequence found between adjoined sequences. oneOf: - $ref: '#/$defs/LengthExpression' - $ref: '#/$defs/LiteralSequenceExpression' - $ref: '#/$defs/ReferenceLengthExpression' required: - - sequenceTerminals + - adjoinedSequences additionalProperties: false diff --git a/tests/test_definitions.yaml b/tests/test_definitions.yaml index 10e18e73..e584f96c 100644 --- a/tests/test_definitions.yaml +++ b/tests/test_definitions.yaml @@ -1,30 +1,37 @@ tests: - test_file: simple_breakpoint.yaml - description: https://docs.google.com/presentation/d/11Hm-_IvKGFUpdsqh_LiIrxjpvQazTxg1/edit#slide=id.p2 + description: An adjacency between two sequence locations. + image: ../../docs/images/ex_simple_breakpoint.png schema: vrs definition: Adjacency - test_file: revcomp_breakpoint.yaml - description: https://docs.google.com/presentation/d/11Hm-_IvKGFUpdsqh_LiIrxjpvQazTxg1/edit#slide=id.p8 + description: An adjacency between two sequence locations, the 2nd of which is in reverse orientation. + image: ../../docs/images/ex_revcomp_breakpoint.png schema: vrs definition: Adjacency - test_file: terminal_breakend.yaml - description: https://docs.google.com/presentation/d/11Hm-_IvKGFUpdsqh_LiIrxjpvQazTxg1/edit#slide=id.p9 + description: An adjacency with only the starting sequence location. defining the break at which the adjacency ends or terminates. + image: ../../docs/images/ex_terminal_breakend.png schema: vrs definition: Adjacency - test_file: sequence_homology.yaml - description: https://docs.google.com/presentation/d/11Hm-_IvKGFUpdsqh_LiIrxjpvQazTxg1/edit#slide=id.p11 + description: An adjacency in which the two sequence locations have a homologous overlapping adjoined sequences. + image: ../../docs/images/ex_sequence_homology.png schema: vrs definition: Adjacency - test_file: precise_linker.yaml - description: https://docs.google.com/presentation/d/11Hm-_IvKGFUpdsqh_LiIrxjpvQazTxg1/edit#slide=id.p12 + description: An adjacency with a literal sequence linking the two sequence locations that make up the adjoined sequences. + image: ../../docs/images/ex_precise_linker.png schema: vrs definition: Adjacency - test_file: ambiguous_linker.yaml - description: https://docs.google.com/presentation/d/11Hm-_IvKGFUpdsqh_LiIrxjpvQazTxg1/edit#slide=id.p13 + description: An adjacency with a length expression linking the two sequence locations that make up the adjoined sequences. + image: ../../docs/images/ex_ambiguous_linker.png schema: vrs definition: Adjacency - test_file: sv_haplotype.yaml - description: https://docs.google.com/presentation/d/11Hm-_IvKGFUpdsqh_LiIrxjpvQazTxg1/edit#slide=id.p15 + description: A haplotype of 3 members. First an adjacency with a litereal sequence linker followed by an SNV on the 2nd sequence and ending with a simple breakpoint adjacency that ends with the 1st sequence in the haplotype. + image: ../../docs/images/ex_sv_haplotype.png schema: vrs definition: Haplotype - test_file: simple_haplotype.yaml