From f89a77bb3c9588829ef903ff553bd8ab540670ff Mon Sep 17 00:00:00 2001 From: Phil Darnowsky Date: Wed, 30 Oct 2024 15:07:17 -0400 Subject: [PATCH] Allow search of gene symbols that look like CAIDs --- browser/src/search.spec.ts | 72 ++++++++++++++++- browser/src/search.ts | 156 +++++++++++++++++++++++-------------- 2 files changed, 168 insertions(+), 60 deletions(-) diff --git a/browser/src/search.spec.ts b/browser/src/search.spec.ts index 1fc1662ac..72c463923 100644 --- a/browser/src/search.spec.ts +++ b/browser/src/search.spec.ts @@ -1,4 +1,4 @@ -import { beforeEach, describe, expect, jest, it } from '@jest/globals' +import { beforeEach, describe, expect, jest, it, test } from '@jest/globals' import { fetchSearchResults } from './search' @@ -150,4 +150,74 @@ describe('fetchSearchResults', () => { }, ]) }) + + describe('looking up a query formatted like a CAID', () => { + test('returns links to any genes with matching symbols as well as the presumptive CAID, disambiguating if needed', async () => { + // @ts-expect-error TS(2339) FIXME: Property 'mockReturnValue' does not exist on type ... Remove this comment to see the full error message + global.fetch.mockReturnValue( + Promise.resolve({ + json: () => + Promise.resolve({ + data: { + gene_search: [ + { ensembl_id: 'ENSG000004', symbol: 'CA327' }, + { ensembl_id: 'ENSG000001', symbol: 'CA321' }, + { ensembl_id: 'ENSG000005', symbol: 'CA3213' }, + { ensembl_id: 'ENSG000006', symbol: 'CA32' }, + { ensembl_id: 'ENSG000003', symbol: 'CA325' }, + ], + }, + }), + }) + ) + + expect(await fetchSearchResults('gnomad_r4', 'CA3')).toEqual([ + { label: 'CA3', value: `/variant/CA3?dataset=gnomad_r4` }, + { + label: 'CA32', + value: '/gene/ENSG000006?dataset=gnomad_r4', + }, + { + label: 'CA321', + value: '/gene/ENSG000001?dataset=gnomad_r4', + }, + { + label: 'CA3213', + value: '/gene/ENSG000005?dataset=gnomad_r4', + }, + { + label: 'CA325', + value: '/gene/ENSG000003?dataset=gnomad_r4', + }, + { + label: 'CA327', + value: '/gene/ENSG000004?dataset=gnomad_r4', + }, + ]) + + expect(await fetchSearchResults('gnomad_r4', 'CA32')).toEqual([ + { label: 'CA32 (variant)', value: `/variant/CA32?dataset=gnomad_r4` }, + { + label: 'CA32 (ENSG000006)', + value: '/gene/ENSG000006?dataset=gnomad_r4', + }, + { + label: 'CA321', + value: '/gene/ENSG000001?dataset=gnomad_r4', + }, + { + label: 'CA3213', + value: '/gene/ENSG000005?dataset=gnomad_r4', + }, + { + label: 'CA325', + value: '/gene/ENSG000003?dataset=gnomad_r4', + }, + { + label: 'CA327', + value: '/gene/ENSG000004?dataset=gnomad_r4', + }, + ]) + }) + }) }) diff --git a/browser/src/search.ts b/browser/src/search.ts index e9e082d16..6bfde8e5c 100644 --- a/browser/src/search.ts +++ b/browser/src/search.ts @@ -6,10 +6,80 @@ import { isRsId, } from '@gnomad/identifiers' -import { DatasetId, referenceGenome } from '@gnomad/dataset-metadata/metadata' +import { + DatasetId, + ReferenceGenome, + referenceGenome as getReferenceGenome, +} from '@gnomad/dataset-metadata/metadata' import { isStructuralVariantId } from './identifiers' -export const fetchSearchResults = (datasetId: DatasetId, query: string) => { +const fetchGeneSymbolSearchResults = (query: string, referenceGenome: ReferenceGenome) => { + return fetch('/api/', { + body: JSON.stringify({ + query: ` + query GeneSearch($query: String!, $referenceGenome: ReferenceGenomeId!) { + gene_search(query: $query, reference_genome: $referenceGenome) { + ensembl_id + symbol + } + } + `, + variables: { query, referenceGenome }, + }), + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + }).then((response) => response.json()) +} + +type GeneSearchResult = { data: { gene_search: { ensembl_id: string; symbol: string }[] } } + +const parseGeneSearchResults = ( + response: GeneSearchResult, + query: string, + datasetId: DatasetId, + startingGeneSymbolCounts: Record = {} +): [SearchResultItem[], Record] => { + const genes = response.data.gene_search + const geneSymbolCounts = { ...startingGeneSymbolCounts } + + genes.forEach((gene) => { + if (geneSymbolCounts[gene.symbol] === undefined) { + geneSymbolCounts[gene.symbol] = 0 + } + geneSymbolCounts[gene.symbol] += 1 + }) + + const formattedGenes = genes + .sort((gene1, gene2) => { + const symbolPrefix = query.toUpperCase() + const symbol1 = gene1.symbol.toUpperCase() + const symbol2 = gene2.symbol.toUpperCase() + + if (symbol1.startsWith(symbolPrefix) && !symbol2.startsWith(symbolPrefix)) { + return -1 + } + + if (!symbol1.startsWith(symbolPrefix) && symbol2.startsWith(symbolPrefix)) { + return 1 + } + return symbol1.localeCompare(symbol2) + }) + .map((gene) => ({ + label: + geneSymbolCounts[gene.symbol] > 1 ? `${gene.symbol} (${gene.ensembl_id})` : gene.symbol, + + value: `/gene/${gene.ensembl_id}?dataset=${datasetId}`, + })) + + return [formattedGenes, geneSymbolCounts] +} + +type SearchResultItem = { label: string; value: string } + +export const fetchSearchResults = ( + datasetId: DatasetId, + query: string +): Promise => { if (datasetId.startsWith('gnomad_sv')) { // ============================================================================================== // Structural Variants @@ -49,14 +119,26 @@ export const fetchSearchResults = (datasetId: DatasetId, query: string) => { ]) } + // Some gene symbols also match the format for variant CAIDs, so we have + // to cover that as a special case if (/^CA[0-9]+$/i.test(query)) { const caid = query.toUpperCase() - return Promise.resolve([ - { - label: caid, - value: `/variant/${caid}?dataset=${datasetId}`, - }, - ]) + return fetchGeneSymbolSearchResults(query, getReferenceGenome(datasetId)) + .then((response) => { + if (!response?.data?.gene_search) { + return [] + } + return response + }) + .then((response) => parseGeneSearchResults(response, query, datasetId, { [caid]: 1 })) + .then(([geneSearchResults, geneSymbolCounts]) => { + const variantItem = { + label: geneSymbolCounts[caid] > 1 ? `${caid} (variant)` : caid, + value: `/variant/${caid}?dataset=${datasetId}`, + } + + return [variantItem, ...geneSearchResults] + }) } if (/^[0-9]+$/.test(query)) { @@ -132,60 +214,16 @@ export const fetchSearchResults = (datasetId: DatasetId, query: string) => { // ============================================================================================== if (/^[A-Z][A-Z0-9-]*$/.test(upperCaseQuery)) { - return fetch('/api/', { - body: JSON.stringify({ - query: ` - query GeneSearch($query: String!, $referenceGenome: ReferenceGenomeId!) { - gene_search(query: $query, reference_genome: $referenceGenome) { - ensembl_id - symbol - } - } - `, - variables: { query, referenceGenome: referenceGenome(datasetId) }, - }), - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - }) - .then((response) => response.json()) + return fetchGeneSymbolSearchResults(query, getReferenceGenome(datasetId)) .then((response) => { - if (!response.data.gene_search) { + if (!response?.data?.gene_search) { throw new Error('Unable to retrieve search results') } - - const genes = response.data.gene_search as { ensembl_id: string; symbol: string }[] - - const geneSymbolCounts: Record = {} - genes.forEach((gene) => { - if (geneSymbolCounts[gene.symbol] === undefined) { - geneSymbolCounts[gene.symbol] = 0 - } - geneSymbolCounts[gene.symbol] += 1 - }) - - return genes - .sort((gene1, gene2) => { - const symbolPrefix = query.toUpperCase() - const symbol1 = gene1.symbol.toUpperCase() - const symbol2 = gene2.symbol.toUpperCase() - - if (symbol1.startsWith(symbolPrefix) && !symbol2.startsWith(symbolPrefix)) { - return -1 - } - - if (!symbol1.startsWith(symbolPrefix) && symbol2.startsWith(symbolPrefix)) { - return 1 - } - return symbol1.localeCompare(symbol2) - }) - .map((gene) => ({ - label: - geneSymbolCounts[gene.symbol] > 1 - ? `${gene.symbol} (${gene.ensembl_id})` - : gene.symbol, - - value: `/gene/${gene.ensembl_id}?dataset=${datasetId}`, - })) + return response + }) + .then((response) => { + const [geneSearchResults] = parseGeneSearchResults(response, query, datasetId) + return geneSearchResults }) }