-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy path05_search_quality.sql
71 lines (37 loc) · 1.14 KB
/
05_search_quality.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
-- region unaccent() remove national characters
SELECT unaccent('bądź łaskawy');
SELECT unaccent('With wide support');
SELECT unaccent('我想挣脱束缚');
-- endregion
-- region similarity() for misspelling
-- a) the same words, different formatting
SELECT similarity('Something', 'something');
-- b) Multiple words
SELECT similarity('Something different', 'samething diferent');
-- c) Different words, common part
SELECT similarity('Something', 'everything');
-- endregion
-- region similarity in practise
-- List of misspelled words
CREATE MATERIALIZED VIEW misspell_index AS
SELECT word
FROM ts_stat(
'SELECT ' ||
' to_tsvector(''simple'', article.title) || ' ||
' to_tsvector(''simple'', article.content) ' ||
'FROM article'
);
-- REFRESH MATERIALIZED VIEW misspell_index;
-- endregion
-- region Typo hints
-- TODO: Make possible to searching by multiple words
SELECT
word,
similarity
FROM misspell_index,
similarity(word, 'twiter') AS similarity
WHERE similarity > 0.5
ORDER BY similarity DESC;
SELECT similarity('twitter', 'twiter');
SELECT similarity('winter', 'winer');
-- endregion