From 41cda8b99b17225f2b758178fa6e031ab144fb5d Mon Sep 17 00:00:00 2001 From: blesssan Date: Sun, 17 Nov 2024 02:38:48 +0800 Subject: [PATCH 1/2] fix: add tokenizer for the query string --- lunr-repro.tsx | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/lunr-repro.tsx b/lunr-repro.tsx index 41df1a4..7c174c2 100644 --- a/lunr-repro.tsx +++ b/lunr-repro.tsx @@ -93,19 +93,30 @@ const Search: React.FC<{ query: string; docs: Record; }> = function ({ query, docs }) { - const lunrIndex = useMemo((() => lunr(function () { - this.ref('name'); - this.field('body'); - for (const [name, body] of Object.entries(docs)) { - this.use((lunr as any).ja); - this.add({ name, body }); - } - //console.debug(`Indexed ${docs.length} docs`); - })), [docs]); + // example input = 標準製品 + // in Japanese these are two words, 標準 and 製品. Which will be tokenized in the docs as such (separated). + // in order to search for 標準製品. Use the same tokenizer, to separate the query. + // create a tokenized query, which should use the function to tokenize the doc + const queryTokenized = lunr.ja.tokenizer(query); + // turn the array back into proper lunr query search term. such as "標準 製品" + const queryTerm = queryTokenized.join(" "); + const lunrIndex = useMemo( + () => + lunr(function () { + this.use((lunr as any).ja); + this.ref("name"); + this.field("body"); + for (const [name, body] of Object.entries(docs)) { + this.add({ name, body }); + } + //console.debug(`Indexed ${docs.length} docs`); + }), + [docs] + ); const [results, error] = useMemo(() => { try { - return [lunrIndex.search(query) ?? [], null]; + return [lunrIndex.search(queryTerm) ?? [], null]; } catch (e) { return [[], `${e.message}`]; } From 83c7253cf72cbd9cbfb9d6e6e30093528b604543 Mon Sep 17 00:00:00 2001 From: blesssan Date: Sun, 17 Nov 2024 09:29:37 +0800 Subject: [PATCH 2/2] refactor: moved logic inside existing memo --- lunr-repro.tsx | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lunr-repro.tsx b/lunr-repro.tsx index 7c174c2..305e169 100644 --- a/lunr-repro.tsx +++ b/lunr-repro.tsx @@ -93,13 +93,6 @@ const Search: React.FC<{ query: string; docs: Record; }> = function ({ query, docs }) { - // example input = 標準製品 - // in Japanese these are two words, 標準 and 製品. Which will be tokenized in the docs as such (separated). - // in order to search for 標準製品. Use the same tokenizer, to separate the query. - // create a tokenized query, which should use the function to tokenize the doc - const queryTokenized = lunr.ja.tokenizer(query); - // turn the array back into proper lunr query search term. such as "標準 製品" - const queryTerm = queryTokenized.join(" "); const lunrIndex = useMemo( () => lunr(function () { @@ -115,6 +108,13 @@ const Search: React.FC<{ ); const [results, error] = useMemo(() => { + // example input = 標準製品 + // in Japanese these are two words, 標準 and 製品. Which will be tokenized in the docs as such (separated). + // in order to search for 標準製品. Use the same tokenizer, to separate the query. + // create a tokenized query, which should use the function to tokenize the doc + const queryTokenized = lunr.ja.tokenizer(query); + // turn the array back into proper lunr query search term. such as "標準 製品" + const queryTerm = queryTokenized.join(" "); try { return [lunrIndex.search(queryTerm) ?? [], null]; } catch (e) {