-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy path01_stemmer.sql
73 lines (34 loc) · 1.1 KB
/
01_stemmer.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
/**
* How does search works?
*
* Full text search is set of techniques
*/
-- region Key mechanism: Stemmer
SELECT to_tsvector('impossible is nothing');
SELECT to_tsvector('impossible, impossibility, imposter');
-- endregion
-- region Under the hood
-- a) Suffix-stripping + Eliminate casing
SELECT to_tsvector('ran != run or running');
-- b) Parsing special tokens
SELECT to_tsvector('Sponsored by: https://goyello.com');
SELECT to_tsvector('<a href="#">HTML</a> content');
SELECT *
FROM ts_debug('<strong>HTML 5.1</strong>');
-- endregion
-- region Difference between languages
SELECT to_tsvector('english', 'It''s not a bug - it''s a feature.');
SELECT to_tsvector('dutch', 'Het is bug - niet feature.');
SELECT to_tsvector('simple', 'It''s not a bug - it''s a feature.');
-- endregion
-- region Supported languages
SELECT cfgname
FROM pg_ts_config;
-- endregion
-- region Building document
SELECT to_tsvector(a.title) ||
to_tsvector(a.content) ||
to_tsvector(coalesce(c.name, '')) AS document
FROM article a
LEFT JOIN category c ON c.id = a.categoryid;
-- endregion