Skip to content

Commit

Permalink
Split exome and genome frequencies in VA data
Browse files Browse the repository at this point in the history
VA data was previously always using exome frequencies if available, genome if not. However, given that this is a general-purpose API, it's hard to say if users will want exomes, genomes, or both for their use cases. Hence, rather than try to make that decision for them, we allow (and require) them now to say which they want in their queries.
  • Loading branch information
phildarnowsky-broad committed Sep 13, 2024
1 parent fa15c6a commit 4ebe09e
Show file tree
Hide file tree
Showing 5 changed files with 161 additions and 78 deletions.
162 changes: 109 additions & 53 deletions graphql-api/src/graphql/resolvers/va.spec.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import { describe, expect, test } from '@jest/globals'

import {
resolveVACohortAlleleFrequencies,
resolveVAExome,
resolveVAGenome,
resolveVAAllele,
Allele as VAAllele,
CohortAlleleFrequency,
Expand Down Expand Up @@ -60,19 +61,29 @@ describe('resolveVACohortAlleleFrequency', () => {
ancestry_groups: [],
}

const genomeEsDocument = {
ac: 18,
an: 200,
hemizygote_count: 4,
homozygote_count: 5,
faf95: { popmax: 0.234, popmax_population: 'eas' },
ancestry_groups: [],
}

const variantESDocument = {
...alleleEsDocument,
variant_id: '1-123-G-A',
exome: exomeEsDocument,
genome: genomeEsDocument,
joint: { fafmax: { faf95_max: 0.234, faf95_max_gen_anc: 'amr' } },
}

test('parses a single CohortAlleleFrequency correctly', async () => {
const resolved = await resolveVACohortAlleleFrequencies(variantESDocument, null, null)
test('parses a single CohortAlleleFrequency exome correctly', async () => {
const resolved = await resolveVAExome(variantESDocument, null, null)
const expected: CohortAlleleFrequency[] = [
{
id: 'gnomad4:1-123-G-A',
label: 'Overall Cohort Allele Frequency for 1-123-G-A',
label: 'Exome Cohort Allele Frequency for 1-123-G-A',
type: 'CohortAlleleFrequency',
focusAllele: expectedAllele,
derivedFrom: {
Expand All @@ -84,7 +95,7 @@ describe('resolveVACohortAlleleFrequency', () => {
focusAlleleCount: 5,
locusAlleleCount: 100,
alleleFrequency: 0.05,
cohort: { id: 'ALL', label: 'Overall', characteristics: null },
cohort: { id: 'ALL', label: 'Exome', characteristics: null },
ancillaryResults: {
grpMaxFAF95: { frequency: 0.123, confidenceInterval: 0.95, groupId: 'afr' },
jointGrpMaxFAF95: { frequency: 0.234, confidenceInterval: 0.95, groupId: 'amr' },
Expand All @@ -98,6 +109,44 @@ describe('resolveVACohortAlleleFrequency', () => {
expect(resolved).toEqual(expected)
})

test('parses a single CohortAlleleFrequency genome correctly', async () => {
const resolved = await resolveVAGenome(variantESDocument, null, null)
const expected: CohortAlleleFrequency[] = [
{
id: 'gnomad4:1-123-G-A',
label: 'Genome Cohort Allele Frequency for 1-123-G-A',
type: 'CohortAlleleFrequency',
focusAllele: expectedAllele,
derivedFrom: {
id: 'gnomad4.1.0',
type: 'DataSet',
label: 'gnomAD v4.1.0',
version: '4.1.0',
},
focusAlleleCount: 18,
locusAlleleCount: 200,
alleleFrequency: 0.09,
cohort: { id: 'ALL', label: 'Genome', characteristics: null },
ancillaryResults: {
grpMaxFAF95: { frequency: 0.234, confidenceInterval: 0.95, groupId: 'eas' },
jointGrpMaxFAF95: { frequency: 0.234, confidenceInterval: 0.95, groupId: 'amr' },
homozygotes: 5,
hemizygotes: 4,
},
subcohortFrequency: [],
},
]

expect(resolved).toEqual(expected)
})

test('gracefully handles missing exome or genome', async () => {
const exomeOnlyDocument = { ...variantESDocument, genome: null }
const genomeOnlyDocument = { ...variantESDocument, exome: null }
expect(await resolveVAExome(genomeOnlyDocument, null, null)).toBeNull()
expect(await resolveVAGenome(exomeOnlyDocument, null, null)).toBeNull()
})

test('has the correct subcohortAlleleFrequency when there are multiple CAFs', async () => {
// Shuffled order of IDs is intentional here to better test sorting
const subcohortIds = [
Expand All @@ -121,55 +170,62 @@ describe('resolveVACohortAlleleFrequency', () => {
const fullDocument = {
...variantESDocument,
exome: { ...exomeEsDocument, ancestry_groups: subcohortDocuments },
genome: { ...genomeEsDocument, ancestry_groups: subcohortDocuments },
}

const resolved = await resolveVACohortAlleleFrequencies(fullDocument, null, null)
expect(resolved && resolved.length === subcohortIds.length + 1).toEqual(true)

const subcohortMap: Record<string, string[]> = resolved!.reduce(
(acc, cohort) => ({
...acc,
[cohort.id]: cohort.subcohortFrequency.map((subcohort) => subcohort.id),
}),
{}
)

expect(subcohortMap['gnomad4:1-123-G-A']!.sort()).toEqual(
subcohortIds.map((cohortId) => `gnomad4:1-123-G-A.${cohortId}`).sort()
)

expect(subcohortMap['gnomad4:1-123-G-A.XX'].sort()).toEqual([
'gnomad4:1-123-G-A.ami_XX',
'gnomad4:1-123-G-A.amr_XX',
'gnomad4:1-123-G-A.eur_XX',
])

expect(subcohortMap['gnomad4:1-123-G-A.XY'].sort()).toEqual([
'gnomad4:1-123-G-A.ami_XY',
'gnomad4:1-123-G-A.amr_XY',
'gnomad4:1-123-G-A.eur_XY',
])

expect(subcohortMap['gnomad4:1-123-G-A.ami'].sort()).toEqual([
'gnomad4:1-123-G-A.ami_XX',
'gnomad4:1-123-G-A.ami_XY',
])

expect(subcohortMap['gnomad4:1-123-G-A.amr'].sort()).toEqual([
'gnomad4:1-123-G-A.amr_XX',
'gnomad4:1-123-G-A.amr_XY',
])

expect(subcohortMap['gnomad4:1-123-G-A.eur'].sort()).toEqual([
'gnomad4:1-123-G-A.eur_XX',
'gnomad4:1-123-G-A.eur_XY',
])

expect(subcohortMap['gnomad4:1-123-G-A.ami_XX']).toEqual([])
expect(subcohortMap['gnomad4:1-123-G-A.ami_XY']).toEqual([])
expect(subcohortMap['gnomad4:1-123-G-A.amr_XX']).toEqual([])
expect(subcohortMap['gnomad4:1-123-G-A.amr_XY']).toEqual([])
expect(subcohortMap['gnomad4:1-123-G-A.eur_XX']).toEqual([])
expect(subcohortMap['gnomad4:1-123-G-A.eur_XY']).toEqual([])
const exomeResolved = await resolveVAExome(fullDocument, null, null)
const genomeResolved = await resolveVAGenome(fullDocument, null, null)

const results: CohortAlleleFrequency[][] = [exomeResolved!, genomeResolved!]

results.forEach((resolved: CohortAlleleFrequency[]) => {
expect(resolved && resolved.length === subcohortIds.length + 1).toEqual(true)

const subcohortMap: Record<string, string[]> = resolved!.reduce(
(acc, cohort) => ({
...acc,
[cohort.id]: cohort.subcohortFrequency.map((subcohort) => subcohort.id),
}),
{}
)

expect(subcohortMap['gnomad4:1-123-G-A']!.sort()).toEqual(
subcohortIds.map((cohortId) => `gnomad4:1-123-G-A.${cohortId}`).sort()
)

expect(subcohortMap['gnomad4:1-123-G-A.XX'].sort()).toEqual([
'gnomad4:1-123-G-A.ami_XX',
'gnomad4:1-123-G-A.amr_XX',
'gnomad4:1-123-G-A.eur_XX',
])

expect(subcohortMap['gnomad4:1-123-G-A.XY'].sort()).toEqual([
'gnomad4:1-123-G-A.ami_XY',
'gnomad4:1-123-G-A.amr_XY',
'gnomad4:1-123-G-A.eur_XY',
])

expect(subcohortMap['gnomad4:1-123-G-A.ami'].sort()).toEqual([
'gnomad4:1-123-G-A.ami_XX',
'gnomad4:1-123-G-A.ami_XY',
])

expect(subcohortMap['gnomad4:1-123-G-A.amr'].sort()).toEqual([
'gnomad4:1-123-G-A.amr_XX',
'gnomad4:1-123-G-A.amr_XY',
])

expect(subcohortMap['gnomad4:1-123-G-A.eur'].sort()).toEqual([
'gnomad4:1-123-G-A.eur_XX',
'gnomad4:1-123-G-A.eur_XY',
])

expect(subcohortMap['gnomad4:1-123-G-A.ami_XX']).toEqual([])
expect(subcohortMap['gnomad4:1-123-G-A.ami_XY']).toEqual([])
expect(subcohortMap['gnomad4:1-123-G-A.amr_XX']).toEqual([])
expect(subcohortMap['gnomad4:1-123-G-A.amr_XY']).toEqual([])
expect(subcohortMap['gnomad4:1-123-G-A.eur_XX']).toEqual([])
expect(subcohortMap['gnomad4:1-123-G-A.eur_XY']).toEqual([])
})
})
})
49 changes: 35 additions & 14 deletions graphql-api/src/graphql/resolvers/va.ts
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ const generateSequenceId = (sequence: string) => {
return hashWithSha512t24u(sequence)
}

const capitalize = (str: string): string => str.charAt(0).toUpperCase() + str.slice(1)

export const resolveVAAllele = async (obj: any, _args: any, _ctx: any): Promise<Allele | null> => {
const vrsData = obj.vrs

Expand Down Expand Up @@ -199,26 +201,29 @@ const getAncestryAndSexIds = (subsetId: string): [string | undefined, string | u
return first === 'XX' || first === 'XY' ? [undefined, first] : [first, second]
}

const cohortDescription = (subsetId: string | undefined): string => {
const cohortDescription = (
subsetId: string | undefined,
frequencyField: 'exome' | 'genome'
): string => {
if (subsetId === undefined) {
return 'Overall Cohort'
return `${capitalize(frequencyField)} Cohort`
}

const [ancestryGroupId, sexId] = getAncestryAndSexIds(subsetId)

if (ancestryGroupId) {
const ancestryGroupName = POPULATION_NAMES[ancestryGroupId]
if (sexId) {
return `${ancestryGroupName} ${sexId} Ancestry Group`
return `${capitalize(frequencyField)} ${ancestryGroupName} ${sexId} Ancestry Group`
}
return `${ancestryGroupName} Ancestry Group`
return `${capitalize(frequencyField)} ${ancestryGroupName} Ancestry Group`
}
return sexId!
}

const cohortForSubset = (subset: Subset): Cohort => {
const cohortForSubset = (subset: Subset, frequencyField: 'exome' | 'genome'): Cohort => {
if (!subset.id) {
return { id: 'ALL', label: 'Overall', characteristics: null }
return { id: 'ALL', label: capitalize(frequencyField), characteristics: null }
}

const [ancestryGroupId, sexId] = getAncestryAndSexIds(subset.id)
Expand All @@ -237,19 +242,24 @@ const cohortForSubset = (subset: Subset): Cohort => {
: []
const characteristics = [...sexCharacteristics, ...ancestryCharacteristics]

return { id: subset.id || 'ALL', label: cohortDescription(subset.id), characteristics }
return {
id: subset.id || 'ALL',
label: cohortDescription(subset.id, frequencyField),
characteristics,
}
}

const resolveVACohortAlleleFrequency = (
focusAllele: Allele,
variant_id: string,
subset: Subset
subset: Subset,
frequencyField: 'exome' | 'genome'
): CohortAlleleFrequencyWithoutSubcohorts => {
const idSuffix = subset.id ? `.${subset.id}` : ''
const id = `gnomad4:${variant_id}${idSuffix}`
const label = `${cohortDescription(subset.id)} Allele Frequency for ${variant_id}`
const label = `${cohortDescription(subset.id, frequencyField)} Allele Frequency for ${variant_id}`

const cohort = cohortForSubset(subset)
const cohort = cohortForSubset(subset, frequencyField)

const ancillaryResults = {
grpMaxFAF95: subset.grpMax || null,
Expand Down Expand Up @@ -357,17 +367,22 @@ const addSubcohorts = (
return Object.values(subcohortMap)
}

export const resolveVACohortAlleleFrequencies = async (
const resolveVACohortAlleleFrequencies = async (
obj: any,
args: any,
ctx: any
ctx: any,
frequencyField: 'exome' | 'genome'
): Promise<CohortAlleleFrequency[] | null> => {
const focusAllele = await resolveVAAllele(obj, args, ctx)
if (focusAllele === null) {
return null
}

const frequencies = obj.exome || obj.genome
const frequencies = obj[frequencyField]
if (!frequencies) {
return null
}

const fullSet: Subset = {
ac: frequencies.ac,
an: frequencies.an,
Expand All @@ -389,8 +404,14 @@ export const resolveVACohortAlleleFrequencies = async (
}
const subsets = [fullSet, ...(frequencies.ancestry_groups as Subset[])]
const cohortsWithoutSubcohorts = subsets.map((subset) =>
resolveVACohortAlleleFrequency(focusAllele, obj.variant_id, subset)
resolveVACohortAlleleFrequency(focusAllele, obj.variant_id, subset, frequencyField)
)

return addSubcohorts(cohortsWithoutSubcohorts)
}

export const resolveVAExome = async (obj: any, args: any, ctx: any) =>
resolveVACohortAlleleFrequencies(obj, args, ctx, 'exome')

export const resolveVAGenome = async (obj: any, args: any, ctx: any) =>
resolveVACohortAlleleFrequencies(obj, args, ctx, 'genome')
12 changes: 9 additions & 3 deletions graphql-api/src/graphql/resolvers/variant-fields.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,20 @@
import { resolveVACohortAlleleFrequencies, resolveVAAllele } from './va'
import { resolveVAExome, resolveVAGenome, resolveVAAllele } from './va'

const resolvers = {
Variant: {
rsids: (obj: any) => obj.rsids || [],
va: resolveVACohortAlleleFrequencies,
va: (obj: any, ctx: any, args: any) => ({
exome: resolveVAExome(obj, ctx, args),
genome: resolveVAGenome(obj, ctx, args),
}),
vrs: resolveVAAllele,
},
VariantDetails: {
rsids: (obj: any) => obj.rsids || [],
va: resolveVACohortAlleleFrequencies,
va: (obj: any, ctx: any, args: any) => ({
exome: resolveVAExome(obj, ctx, args),
genome: resolveVAGenome(obj, ctx, args),
}),
vrs: resolveVAAllele,
},
}
Expand Down
12 changes: 6 additions & 6 deletions graphql-api/src/graphql/types/va.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,7 @@ type VAQualityMeasures {
heterozygousSkewedAlleleCount: Int
}

"A measure of the frequency of an Allele in a cohort."
type VACohortAlleleFrequency {
type VACohortAlleleFrequencyData {
id: String!
type: String!
label: String
Expand All @@ -124,10 +123,11 @@ type VACohortAlleleFrequency {
This creates a recursive relationship and subcohorts can be further subdivided into more subcohorts.
This enables, for example, the description of different ancestry groups and sexes among those ancestry groups.
"""
subcohortFrequency: [VACohortAlleleFrequency]
subcohortFrequency: [VACohortAlleleFrequencyData]
}

type VA {
va: [VACohortAlleleFrequency!]
vrs: [VAAllele!]
"A measure of the frequency of an Allele in a cohort."
type VACohortAlleleFrequency {
exome: [VACohortAlleleFrequencyData!]
genome: [VACohortAlleleFrequencyData!]
}
4 changes: 2 additions & 2 deletions graphql-api/src/graphql/types/variant.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ type Variant {
hgvs: String

# GA4GH-format data
va: [VACohortAlleleFrequency!]
va: VACohortAlleleFrequency!
vrs: VAAllele
}

Expand Down Expand Up @@ -322,7 +322,7 @@ type VariantDetails {
multiNucleotideVariants: [MultiNucleotideVariantSummary!]
sortedTranscriptConsequences: [TranscriptConsequence!]

va: [VACohortAlleleFrequency!]
va: VACohortAlleleFrequency!
vrs: VAAllele
}

Expand Down

0 comments on commit 4ebe09e

Please sign in to comment.