-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathio.go
132 lines (116 loc) · 3.72 KB
/
io.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
package joise
import (
"database/sql"
"fmt"
"github.com/lib/pq"
)
type rawTokenSet struct {
ID int64
Tokens []int64
RawTokens [][]byte
}
// ListEntry is a set ID, size, and the matching position of the token
type ListEntry struct {
ID int64
Size int
MatchPosition int
}
// Asc. ordering by the size of sets
type bySize []ListEntry
func (l bySize) Len() int { return len(l) }
func (l bySize) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
func (l bySize) Less(i, j int) bool { return l[i].Size < l[j].Size }
// Desc. ordering by the length of matching prefix
type byPrefixLength []ListEntry
func (l byPrefixLength) Len() int { return len(l) }
func (l byPrefixLength) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
func (l byPrefixLength) Less(i, j int) bool { return l[i].MatchPosition > l[j].MatchPosition }
// Asc. ordering by the length of matching suffix
type bySuffixLength []ListEntry
func (l bySuffixLength) Len() int { return len(l) }
func (l bySuffixLength) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
func (l bySuffixLength) Less(i, j int) bool {
return (l[i].Size - l[i].MatchPosition) > (l[j].Size - l[j].MatchPosition)
}
// SetTokens read tokens from a given set.
func SetTokens(db *sql.DB, table string, setID int64) []int64 {
s := fmt.Sprintf(`
SELECT tokens FROM %s WHERE id = $1;`, table)
var tokens []int64
if err := db.QueryRow(s, setID).Scan(pq.Array(&tokens)); err != nil {
panic(err)
}
return tokens
}
func setTokensPrefix(db *sql.DB, table string, setID int64, endPos int) []int64 {
s := fmt.Sprintf(`
SELECT tokens[1:$1] FROM %s WHERE id = $2;`, table)
var tokens []int64
if err := db.QueryRow(s, endPos+1, setID).Scan(pq.Array(&tokens)); err != nil {
panic(err)
}
return tokens
}
func setTokensSuffix(db *sql.DB, table string, setID int64, startPos int) []int64 {
s := fmt.Sprintf(`
SELECT tokens[$1:size] FROM %s WHERE id = $2;`, table)
var tokens []int64
if err := db.QueryRow(s, startPos+1, setID).Scan(pq.Array(&tokens)); err != nil {
panic(err)
}
return tokens
}
// startPos is an inclusive zero-start index
// endPos is a non-inclusive zero-start index
func setTokensSubset(db *sql.DB, table string, setID int64, startPos, endPos int) []int64 {
s := fmt.Sprintf(`
SELECT tokens[$1:$2] FROM %s WHERE id = $3;`, pq.QuoteIdentifier(table))
var tokens []int64
if err := db.QueryRow(s, startPos+1, endPos, setID).Scan(pq.Array(&tokens)); err != nil {
panic(err)
}
return tokens
}
// InvertedList reads an inverted list from the database
func InvertedList(db *sql.DB, table string, token int64) (entries []ListEntry) {
var setIDs, sizes, matchPositions []int64
s := fmt.Sprintf(`
SELECT set_ids, set_sizes, match_positions FROM %s WHERE token = $1`, pq.QuoteIdentifier(table))
if err := db.QueryRow(s, token).Scan(pq.Array(&setIDs), pq.Array(&sizes), pq.Array(&matchPositions)); err != nil {
panic(err)
}
entries = make([]ListEntry, len(setIDs))
for i := range entries {
entries[i] = ListEntry{
ID: setIDs[i],
Size: int(sizes[i]),
MatchPosition: int(matchPositions[i]),
}
}
return
}
func querySets(db *sql.DB, listTable, queryTable string) []rawTokenSet {
rows, err := db.Query(fmt.Sprintf(`
SELECT id, (
SELECT array_agg(raw_token)
FROM %s
WHERE token = any(tokens)
), tokens FROM %s`, pq.QuoteIdentifier(listTable), pq.QuoteIdentifier(queryTable)))
if err != nil {
panic(err)
}
queries := make([]rawTokenSet, 0)
for rows.Next() {
var query rawTokenSet
var ba pq.ByteaArray
if err := rows.Scan(&query.ID, &ba, pq.Array(&query.Tokens)); err != nil {
panic(err)
}
query.RawTokens = ba
queries = append(queries, query)
}
if err := rows.Err(); err != nil {
panic(err)
}
return queries
}