-
Notifications
You must be signed in to change notification settings - Fork 0
/
lsh_test.go
100 lines (78 loc) · 3.22 KB
/
lsh_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
package lsh
import (
"testing"
"github.com/stretchr/testify/assert"
)
var (
aShingles = Shingle([]string{"A spokesperson for the Sudzo Corporation revealed today that studies have shown it is good for people to buy Sudzo products."})
bShingles = Shingle([]string{"The Sudzo Corporation has revealed today that buying Sudzo products is good for people."})
cShingles = Shingle([]string{"A spokesperson from the Sudzo Corporation has made an announcement about products of corporation."})
simpleShingles = [][]string{
0: {"a", "d"},
1: {"c"},
2: {"b", "d", "e"},
3: {"a", "c", "d"},
}
)
func Test_LSH_equalCandidates(t *testing.T) {
equalShingles := [][]string{
0: {"A spokesperson for the Sudzo Corporation revealed today that studies have shown it is good for people to buy Sudzo products."},
1: {"There was a boy whos name was Jim. And all the friends were very good to him."},
2: {"A spokesperson for the Sudzo Corporation revealed today that studies have shown it is good for people to buy Sudzo products."},
}
buckets := LSH(Minhash(equalShingles, 5), 1)
candidates := buckets.FindCandidates()
candidatesOf0 := candidates.GetByKey(0)
candidatesOf2 := candidates.GetByKey(2)
assert.Equal(t, 1, len(candidatesOf0))
assert.Equal(t, 1, len(candidatesOf2))
assert.Equal(t, 2, candidatesOf0[0].Index)
assert.Equal(t, 1, candidatesOf0[0].Elections)
assert.Equal(t, 0, candidatesOf2[0].Index)
assert.Equal(t, 1, candidatesOf2[0].Elections)
}
func Test_LSH_equalCandidatePairs(t *testing.T) {
equalShingles := [][]string{
0: {"A spokesperson for the Sudzo Corporation revealed today that studies have shown it is good for people to buy Sudzo products."},
1: {"There was a boy whos name was Jim. And all the friends were very good to him."},
2: {"A spokesperson for the Sudzo Corporation revealed today that studies have shown it is good for people to buy Sudzo products."},
}
buckets := LSH(Minhash(equalShingles, 5), 1)
candidatePairs := buckets.FindCandidatePairs()
assert.Equal(t, 1, len(candidatePairs.Index))
pair, ok := candidatePairs.Index["0_2"]
assert.True(t, ok)
assert.Equal(t, 0, pair.A)
assert.Equal(t, 2, pair.B)
}
func Test_LSH_similarCandidates(t *testing.T) {
similarShingles := [][]string{
0: aShingles,
1: {"There was a boy whos name was Jim. And all the friends were very good to him."},
2: bShingles,
}
buckets := LSH(Minhash(similarShingles, 5), 3)
candidates := buckets.FindCandidates()
candidatesOf0 := candidates.GetByKey(0)
candidatesOf2 := candidates.GetByKey(2)
assert.Equal(t, 1, len(candidatesOf0))
assert.Equal(t, 1, len(candidatesOf2))
assert.Equal(t, 2, candidatesOf0[0].Index)
assert.Equal(t, 1, candidatesOf0[0].Elections)
assert.Equal(t, 0, candidatesOf2[0].Index)
assert.Equal(t, 1, candidatesOf2[0].Elections)
}
func Test_LSH_similarCandidatePairs(t *testing.T) {
similarShingles := [][]string{
0: aShingles,
1: {"There was a boy whos name was Jim. And all the friends were very good to him."},
2: bShingles,
}
buckets := LSH(Minhash(similarShingles, 5), 3)
candidatePairs := buckets.FindCandidatePairs()
assert.Equal(t, 1, len(candidatePairs.Index))
pair, ok := candidatePairs.Index["0_2"]
assert.True(t, ok)
assert.Equal(t, 0, pair.A)
assert.Equal(t, 2, pair.B)
}