Skip to content

Commit

Permalink
fixup(browser): code review -- refactor mergeExomeGenomeAndJointPopul…
Browse files Browse the repository at this point in the history
…ations
  • Loading branch information
rileyhgrant committed May 28, 2024
1 parent 773e93c commit a27d654
Show file tree
Hide file tree
Showing 4 changed files with 149 additions and 53 deletions.
7 changes: 5 additions & 2 deletions browser/src/VariantList/Variants.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,11 @@ const Variants = ({
}, [])

const filteredVariants = useMemo(() => {
return mergeExomeAndGenomeData(filterVariants(variants, filter, renderedTableColumns))
}, [variants, filter, renderedTableColumns])
return mergeExomeAndGenomeData({
datasetId,
variants: filterVariants(variants, filter, renderedTableColumns),
})
}, [datasetId, variants, filter, renderedTableColumns])

const renderedVariants = useMemo(() => {
return sortVariants(filteredVariants, sortState)
Expand Down
118 changes: 106 additions & 12 deletions browser/src/VariantList/mergeExomeAndGenomeData.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -243,10 +243,104 @@ describe('mergeExomeGenomeAndJointPopulationData', () => {

expect(result).toStrictEqual(expectedJointGeneticAncestryGroupObjects)
})

it('returns all ancestries from dataset if provided, filling in missing ones and removing not included ones', () => {
const exomeGeneticAncestryGroupObjects = createAncestryGroupObjects(
[
{ id: 'eur', value: 1 },
{ id: 'afr', value: 2 },
{ id: 'remaining', value: 4 },
],
false
)

const genomeGeneticAncestryGroupObjects = createAncestryGroupObjects(
[
{ id: 'afr', value: 8 },
{ id: 'remaining', value: 16 },
{ id: 'eur', value: 32 },
],
false
)

const jointGeneticAncestryGroupObjects = [
{ ac: 16, hemizygote_count: 17, homozygote_count: 18, an: 160, id: 'afr' },
{ ac: 16, hemizygote_count: 17, homozygote_count: 18, an: 160, id: 'afr_XX' },
{ ac: 16, hemizygote_count: 17, homozygote_count: 18, an: 160, id: 'afr_YY' },
{ ac: 32, hemizygote_count: 33, homozygote_count: 34, an: 320, id: 'remaining' },
{ ac: 64, hemizygote_count: 65, homozygote_count: 66, an: 640, id: 'eur' },
{ ac: 128, hemizygote_count: 129, homozygote_count: 130, an: 1280, id: 'mid' },
]

const testVariant = variantFactory.build({
variant_id: 'test_variant',
exome: { populations: exomeGeneticAncestryGroupObjects },
genome: { populations: genomeGeneticAncestryGroupObjects },
joint: { populations: jointGeneticAncestryGroupObjects as Population[] },
})

const result = mergeExomeGenomeAndJointPopulationData({
datasetId: 'gnomad_r4',
exomePopulations: testVariant.exome!.populations,
genomePopulations: testVariant.genome!.populations,
jointPopulations: testVariant.joint!.populations,
})

const expectedJointGeneticAncestryGroupObjects = createAncestryGroupObjects(
[
{ id: 'afr', value: 16 },
{ id: 'afr_XX', value: 16 },
{ id: 'afr_YY', value: 16 },
{ id: 'remaining', value: 32 },
{ id: 'eur', value: 64 },
{ id: 'mid', value: 128 },
],
true
)

const missingAncestries: Population[] = [
{ id: 'amr', ac: 0, an: 0, ac_hemi: 0, ac_hom: 0 },
{ id: 'amr_XX', ac: 0, an: 0, ac_hemi: 0, ac_hom: 0 },
{ id: 'amr_XY', ac: 0, an: 0, ac_hemi: 0, ac_hom: 0 },
{ id: 'asj', ac: 0, an: 0, ac_hemi: 0, ac_hom: 0 },
{ id: 'asj_XX', ac: 0, an: 0, ac_hemi: 0, ac_hom: 0 },
{ id: 'asj_XY', ac: 0, an: 0, ac_hemi: 0, ac_hom: 0 },
{ id: 'eas', ac: 0, an: 0, ac_hemi: 0, ac_hom: 0 },
{ id: 'eas_XX', ac: 0, an: 0, ac_hemi: 0, ac_hom: 0 },
{ id: 'eas_XY', ac: 0, an: 0, ac_hemi: 0, ac_hom: 0 },
{ id: 'fin', ac: 0, an: 0, ac_hemi: 0, ac_hom: 0 },
{ id: 'fin_XX', ac: 0, an: 0, ac_hemi: 0, ac_hom: 0 },
{ id: 'fin_XY', ac: 0, an: 0, ac_hemi: 0, ac_hom: 0 },
{ id: 'nfe', ac: 0, an: 0, ac_hemi: 0, ac_hom: 0 },
{ id: 'nfe_XX', ac: 0, an: 0, ac_hemi: 0, ac_hom: 0 },
{ id: 'nfe_XY', ac: 0, an: 0, ac_hemi: 0, ac_hom: 0 },
{ id: 'ami', ac: 0, an: 0, ac_hemi: 0, ac_hom: 0 },
{ id: 'ami_XX', ac: 0, an: 0, ac_hemi: 0, ac_hom: 0 },
{ id: 'ami_XY', ac: 0, an: 0, ac_hemi: 0, ac_hom: 0 },
{ id: 'sas', ac: 0, an: 0, ac_hemi: 0, ac_hom: 0 },
{ id: 'sas_XX', ac: 0, an: 0, ac_hemi: 0, ac_hom: 0 },
{ id: 'sas_XY', ac: 0, an: 0, ac_hemi: 0, ac_hom: 0 },
]

// include missing ancstries from v4 (e.g. fin)
const expectedObjectsIncludingMissingAncestries =
expectedJointGeneticAncestryGroupObjects.concat(missingAncestries)

// removes ancestries not present in v4 (e.g. eur)
const expectedAncestriesIncludingMissingMinusNotIncluded =
expectedObjectsIncludingMissingAncestries.filter((ancestry) => !ancestry.id.includes('eur'))

const sortAncestries = (ancestryA: Population, ancestryB: Population) =>
ancestryA.id.localeCompare(ancestryB.id)

expect(result.sort(sortAncestries)).toEqual(
expectedAncestriesIncludingMissingMinusNotIncluded.sort(sortAncestries)
)
})
})

describe('mergeExomeAndGenomeData', () => {
it('returns expected values with just exome data', () => {
it('returns just exome populations if only exome data is present, but no dataset', () => {
const exomeGeneticAncestryGroupObjects = createAncestryGroupObjects(
[
{ id: 'afr', value: 1 },
Expand All @@ -272,7 +366,7 @@ describe('mergeExomeAndGenomeData', () => {
},
})

const result = mergeExomeAndGenomeData([testExomeOnlyVariant])
const result = mergeExomeAndGenomeData({ variants: [testExomeOnlyVariant] })

const expected = [
{
Expand All @@ -290,7 +384,7 @@ describe('mergeExomeAndGenomeData', () => {

expect(result).toStrictEqual(expected)
})
it('returns expected values with just genome data', () => {
it('returns just genome populations if only genome data is present, but no dataset', () => {
const genomeGeneticAncestryGroupObjects = createAncestryGroupObjects(
[
{ id: 'afr', value: 8 },
Expand All @@ -317,7 +411,7 @@ describe('mergeExomeAndGenomeData', () => {
},
})

const result = mergeExomeAndGenomeData([testGenomeOnlyVariant])
const result = mergeExomeAndGenomeData({ variants: [testGenomeOnlyVariant] })

const expected = [
{
Expand All @@ -336,7 +430,7 @@ describe('mergeExomeAndGenomeData', () => {
expect(result).toStrictEqual(expected)
})

it('returns expected values with exome and genome data', () => {
it('merges present populations with exome and genome data, but no dataset', () => {
const exomeGeneticAncestryGroupObjects = createAncestryGroupObjects(
[
{ id: 'afr', value: 1 },
Expand Down Expand Up @@ -384,7 +478,7 @@ describe('mergeExomeAndGenomeData', () => {
},
})

const result = mergeExomeAndGenomeData([testExomeAndGenomeVariant])
const result = mergeExomeAndGenomeData({ variants: [testExomeAndGenomeVariant] })

const expected = [
{
Expand All @@ -407,7 +501,7 @@ describe('mergeExomeAndGenomeData', () => {

expect(result).toStrictEqual(expected)
})
it('returns expected values with exome and joint data', () => {
it('preferentially uses joint populations if both exome and joint are present, but no dataset', () => {
const exomeGeneticAncestryGroupObjects = createAncestryGroupObjects(
[
{ id: 'afr', value: 1 },
Expand Down Expand Up @@ -453,7 +547,7 @@ describe('mergeExomeAndGenomeData', () => {
},
})

const result = mergeExomeAndGenomeData([testExomeAndJointVariant])
const result = mergeExomeAndGenomeData({ variants: [testExomeAndJointVariant] })

const expectedJointGeneticAncestryGroupObjects = createAncestryGroupObjects(
[
Expand Down Expand Up @@ -481,7 +575,7 @@ describe('mergeExomeAndGenomeData', () => {

expect(result).toStrictEqual(expected)
})
it('returns expected values with genome and joint data', () => {
it('preferentially uses joint populations if both genome and joint are present, but no dataset', () => {
const genomeGeneticAncestryGroupObjects = createAncestryGroupObjects(
[
{ id: 'afr', value: 8 },
Expand Down Expand Up @@ -527,7 +621,7 @@ describe('mergeExomeAndGenomeData', () => {
},
})

const result = mergeExomeAndGenomeData([testGenomeAndJointVariant])
const result = mergeExomeAndGenomeData({ variants: [testGenomeAndJointVariant] })

const expectedJointGeneticAncestryGroupObjects = createAncestryGroupObjects(
[
Expand All @@ -554,7 +648,7 @@ describe('mergeExomeAndGenomeData', () => {

expect(result).toStrictEqual(expected)
})
it('returns expected values with exome, genome, and joint data', () => {
it('preferentially uses joint populations if exome, genome and joint are present, but no dataset', () => {
const exomeGeneticAncestryGroupObjects = createAncestryGroupObjects(
[
{ id: 'afr', value: 1 },
Expand Down Expand Up @@ -623,7 +717,7 @@ describe('mergeExomeAndGenomeData', () => {
},
})

const result = mergeExomeAndGenomeData([testExomeGenomeAndJointVariant])
const result = mergeExomeAndGenomeData({ variants: [testExomeGenomeAndJointVariant] })

const expectedJointGeneticAncestryGroupObjects = createAncestryGroupObjects(
[
Expand Down
61 changes: 37 additions & 24 deletions browser/src/VariantList/mergeExomeAndGenomeData.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,31 @@ import { PopulationId, getPopulationsInDataset } from '@gnomad/dataset-metadata/
// safe math on possibly null values
const add = (n1: number | null | undefined, n2: number | null | undefined) => (n1 || 0) + (n2 || 0)

const emptyAncestries = (ancestry: PopulationId): Population[] => {
return [
{ id: ancestry, ac: 0, an: 0, ac_hemi: 0, ac_hom: 0 },
{ id: `${ancestry}_XX`, ac: 0, an: 0, ac_hemi: 0, ac_hom: 0 },
{ id: `${ancestry}_XY`, ac: 0, an: 0, ac_hemi: 0, ac_hom: 0 },
]
}

const findAncestries = (
target: PopulationId,
candidates: Population[]
): Population[] | undefined => {
const foundAncestries = candidates.filter((ancestry) => ancestry.id.startsWith(target))
return foundAncestries.length > 0 ? foundAncestries : undefined
}

// include placeholders for any ancestries missing from the dataset
const addMissingAncestries = (
currentAncestries: Population[],
versionAncestries: PopulationId[]
) => {
const fullAncestries = [...currentAncestries]

versionAncestries.forEach((ancestry) => {
if (
fullAncestries.filter((ancestryObject: Population) => ancestryObject.id === ancestry)
.length === 0
) {
fullAncestries.push({ id: ancestry, ac: 0, an: 0, ac_hemi: 0, ac_hom: 0 })
fullAncestries.push({ id: `${ancestry}_XX`, ac: 0, an: 0, ac_hemi: 0, ac_hom: 0 })
fullAncestries.push({ id: `${ancestry}_XY`, ac: 0, an: 0, ac_hemi: 0, ac_hom: 0 })
}
})
const fullAncestries = versionAncestries.flatMap(
(versionAncestry) =>
findAncestries(versionAncestry, currentAncestries) || emptyAncestries(versionAncestry)
)

return fullAncestries
}
Expand All @@ -38,7 +46,7 @@ export const mergeExomeGenomeAndJointPopulationData = ({
genomePopulations: Population[]
jointPopulations?: Population[] | null
}) => {
const datasetPopulations = datasetId ? getPopulationsInDataset(datasetId) : []
const datasetPopulations = datasetId ? getPopulationsInDataset(datasetId) : undefined

if (jointPopulations) {
const reshapedJointPopulations = jointPopulations
Expand All @@ -50,12 +58,11 @@ export const mergeExomeGenomeAndJointPopulationData = ({
ac_hom: jointPopulation.homozygote_count!,
}))

const reshapedJointPopulaitonsWithAllAncestries = addMissingAncestries(
reshapedJointPopulations,
datasetPopulations
)
const reshapedJointPopulationWithAddedAncestries = datasetPopulations
? addMissingAncestries(reshapedJointPopulations, datasetPopulations)
: reshapedJointPopulations

return reshapedJointPopulaitonsWithAllAncestries
return reshapedJointPopulationWithAddedAncestries
}

const populations: { [key: string]: Population } = {}
Expand Down Expand Up @@ -92,12 +99,11 @@ export const mergeExomeGenomeAndJointPopulationData = ({
})

const reshapedMergedPopulations = Object.values(populations)
const reshapedMergedPopulationsWithAllAncestries = addMissingAncestries(
reshapedMergedPopulations,
datasetPopulations
)
const reshapedMergedPopulationsWithAddedAncestries = datasetPopulations
? addMissingAncestries(reshapedMergedPopulations, datasetPopulations)
: reshapedMergedPopulations

return reshapedMergedPopulationsWithAllAncestries
return reshapedMergedPopulationsWithAddedAncestries
}

type MergedVariant = Variant & {
Expand All @@ -111,7 +117,13 @@ type MergedVariant = Variant & {
populations: Population[]
}

export const mergeExomeAndGenomeData = (variants: Variant[]): MergedVariant[] => {
export const mergeExomeAndGenomeData = ({
datasetId,
variants,
}: {
datasetId?: DatasetId
variants: Variant[]
}): MergedVariant[] => {
const mergedVariants = variants.map((variant: Variant) => {
const { exome, genome, joint } = variant

Expand Down Expand Up @@ -165,6 +177,7 @@ export const mergeExomeAndGenomeData = (variants: Variant[]): MergedVariant[] =>
const combinedFilters = exomeFilters.concat(genomeFilters)

const combinedPopulations = mergeExomeGenomeAndJointPopulationData({
datasetId,
exomePopulations: exomeOrNone.populations,
genomePopulations: genomeOrNone.populations,
})
Expand Down
16 changes: 1 addition & 15 deletions dataset-metadata/gnomadPopulations.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,27 +63,13 @@ const v4Populations: PopulationId[] = [
'sas',
'remaining',
]
const allPopulations: PopulationId[] = [
'afr',
'ami',
'amr',
'asj',
'eas',
'mid',
'eur',
'nfe',
'fin',
'oth',
'sas',
'remaining',
]

export const populationsInDataset = {
ExAC: ExACPopulations,
v2: v2Populations,
v3: v3Populations,
v4: v4Populations,
default: allPopulations,
default: [],
}

export const getPopulationsInDataset = (datasetId: DatasetId): PopulationId[] => {
Expand Down

0 comments on commit a27d654

Please sign in to comment.