Skip to content

Commit

Permalink
update benchmarks & examples
Browse files Browse the repository at this point in the history
  • Loading branch information
sgreben committed Oct 11, 2024
1 parent 50c3b44 commit 24ec0f6
Show file tree
Hide file tree
Showing 7 changed files with 112 additions and 152 deletions.
103 changes: 69 additions & 34 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ func main() {
// or, just return the nearest neighbor's distances and indices:
// distances,indices := model.Find(k, 0b101011)

fmt.Println("Votes:", bitknn.VoteSlice(votes))
fmt.Println("Votes:", bitknn.votes)

// you can also use a map for the votes.
// this is good if you have a very large number of different labels:
Expand Down Expand Up @@ -120,7 +120,7 @@ func main() {
// or, just return the nearest neighbor's distances and indices:
// distances,indices := model.Find(k, 0b101011)

fmt.Println("Votes:", bitknn.VoteSlice(votes))
fmt.Println("Votes:", votes)

// you can also use a map for the votes
votesMap := make(map[int]float64)
Expand Down Expand Up @@ -187,7 +187,7 @@ func main() {
query := pack.String("fob")
model.Predict(k, query, bitknn.VoteSlice(votes))

fmt.Println("Votes:", bitknn.VoteSlice(votes))
fmt.Println("Votes:", votes)
}
```

Expand All @@ -210,37 +210,72 @@ pkg: github.com/keilerkonzept/bitknn
cpu: Apple M1 Pro
```

| Op | N | k | Distance weighting | Vote values | sec / op | B/op | allocs/op |
|-----------|---------|-----|--------------------|-------------|--------------|------|-----------|
| `Predict` | 100 | 3 | | | 138.7n ± 22% | 0 | 0 |
| `Predict` | 100 | 3 | | ☑️ | 127.8n ± 11% | 0 | 0 |
| `Predict` | 100 | 3 | linear | | 137.0n ± 11% | 0 | 0 |
| `Predict` | 100 | 3 | linear | ☑️ | 136.7n ± 10% | 0 | 0 |
| `Predict` | 100 | 3 | quadratic | | 137.2n ± 7% | 0 | 0 |
| `Predict` | 100 | 3 | quadratic | ☑️ | 130.4n ± 4% | 0 | 0 |
| `Predict` | 100 | 3 | custom | | 140.6n ± 7% | 0 | 0 |
| `Predict` | 100 | 3 | custom | ☑️ | 134.9n ± 13% | 0 | 0 |
| `Predict` | 100 | 10 | | | 307.4n ± 11% | 0 | 0 |
| `Predict` | 100 | 10 | | ☑️ | 297.8n ± 15% | 0 | 0 |
| `Predict` | 100 | 10 | linear | | 288.2n ± 18% | 0 | 0 |
| `Predict` | 100 | 10 | linear | ☑️ | 302.9n ± 14% | 0 | 0 |
| `Predict` | 100 | 10 | quadratic | | 283.7n ± 15% | 0 | 0 |
| `Predict` | 100 | 10 | quadratic | ☑️ | 290.0n ± 13% | 0 | 0 |
| `Predict` | 100 | 10 | custom | | 313.1n ± 17% | 0 | 0 |
| `Predict` | 100 | 10 | custom | ☑️ | 316.2n ± 11% | 0 | 0 |
| `Predict` | 100 | 100 | | ☑️ | 545.4n ± 4% | 0 | 0 |
| `Predict` | 100 | 100 | linear | | 542.4n ± 4% | 0 | 0 |
| `Predict` | 100 | 100 | linear | ☑️ | 577.5n ± 4% | 0 | 0 |
| `Predict` | 100 | 100 | quadratic | | 553.1n ± 3% | 0 | 0 |
| `Predict` | 100 | 100 | quadratic | ☑️ | 582.4n ± 6% | 0 | 0 |
| `Predict` | 100 | 100 | custom | | 683.8n ± 4% | 0 | 0 |
| `Predict` | 100 | 100 | custom | ☑️ | 748.5n ± 2% | 0 | 0 |
| `Predict` | 1000 | 3 | | | 669.5n ± 6% | 0 | 0 |
| `Predict` | 1000 | 10 | | | 930.3n ± 7% | 0 | 0 |
| `Predict` | 1000 | 100 | | | 3.762µ ± 5% | 0 | 0 |
| `Predict` | 1000000 | 3 | | | 532.1µ ± 1% | 0 | 0 |
| `Predict` | 1000000 | 10 | | | 534.5µ ± 1% | 0 | 0 |
| `Predict` | 1000000 | 100 | | | 551.7µ ± 1% | 0 | 0 |
| Model | Bits | N | k | Op | s/op | B/op | allocs/op |
|-----------|------|---------|-----|-----------|-------------|------|-----------|
| Model | 64 | 100 | 3 | `Predict` | 99.06n ± 2% | 0 | 0 |
| WideModel | 64 | 100 | 3 | `Predict` | 191.6n ± 1% | 0 | 0 |
| Model | 64 | 100 | 3 | `Find` | 88.09n ± 0% | 0 | 0 |
| WideModel | 64 | 100 | 3 | `Find` | 182.8n ± 1% | 0 | 0 |
| Model | 64 | 100 | 10 | `Predict` | 225.1n ± 1% | 0 | 0 |
| WideModel | 64 | 100 | 10 | `Predict` | 372.0n ± 1% | 0 | 0 |
| Model | 64 | 100 | 10 | `Find` | 202.9n ± 1% | 0 | 0 |
| WideModel | 64 | 100 | 10 | `Find` | 345.2n ± 0% | 0 | 0 |
| Model | 64 | 1000 | 3 | `Predict` | 538.2n ± 1% | 0 | 0 |
| WideModel | 64 | 1000 | 3 | `Predict` | 1.469µ ± 1% | 0 | 0 |
| Model | 64 | 1000 | 3 | `Find` | 525.8n ± 1% | 0 | 0 |
| WideModel | 64 | 1000 | 3 | `Find` | 1.465µ ± 1% | 0 | 0 |
| Model | 64 | 1000 | 10 | `Predict` | 835.4n ± 1% | 0 | 0 |
| WideModel | 64 | 1000 | 10 | `Predict` | 1.880µ ± 1% | 0 | 0 |
| Model | 64 | 1000 | 10 | `Find` | 807.4n ± 0% | 0 | 0 |
| WideModel | 64 | 1000 | 10 | `Find` | 1.867µ ± 2% | 0 | 0 |
| Model | 64 | 1000 | 100 | `Predict` | 3.718µ ± 0% | 0 | 0 |
| WideModel | 64 | 1000 | 100 | `Predict` | 4.935µ ± 0% | 0 | 0 |
| Model | 64 | 1000 | 100 | `Find` | 3.494µ ± 0% | 0 | 0 |
| WideModel | 64 | 1000 | 100 | `Find` | 4.701µ ± 0% | 0 | 0 |
| Model | 64 | 1000000 | 3 | `Predict` | 458.8µ ± 0% | 0 | 0 |
| WideModel | 64 | 1000000 | 3 | `Predict` | 1.301m ± 1% | 0 | 0 |
| Model | 64 | 1000000 | 3 | `Find` | 457.9µ ± 1% | 0 | 0 |
| WideModel | 64 | 1000000 | 3 | `Find` | 1.302m ± 1% | 0 | 0 |
| Model | 64 | 1000000 | 10 | `Predict` | 456.9µ ± 0% | 0 | 0 |
| WideModel | 64 | 1000000 | 10 | `Predict` | 1.295m ± 2% | 0 | 0 |
| Model | 64 | 1000000 | 10 | `Find` | 457.6µ ± 1% | 0 | 0 |
| WideModel | 64 | 1000000 | 10 | `Find` | 1.298m ± 1% | 0 | 0 |
| Model | 64 | 1000000 | 100 | `Predict` | 474.5µ ± 1% | 0 | 0 |
| WideModel | 64 | 1000000 | 100 | `Predict` | 1.316m ± 1% | 0 | 0 |
| Model | 64 | 1000000 | 100 | `Find` | 466.9µ ± 0% | 0 | 0 |
| WideModel | 64 | 1000000 | 100 | `Find` | 1.306m ± 0% | 0 | 0 |
| WideModel | 128 | 100 | 3 | `Predict` | 296.7n ± 0% | 0 | 0 |
| WideModel | 128 | 100 | 3 | `Find` | 285.8n ± 0% | 0 | 0 |
| WideModel | 128 | 100 | 10 | `Predict` | 467.4n ± 1% | 0 | 0 |
| WideModel | 128 | 100 | 10 | `Find` | 441.1n ± 1% | 0 | 0 |
| WideModel | 640 | 100 | 3 | `Predict` | 654.6n ± 1% | 0 | 0 |
| WideModel | 640 | 100 | 3 | `Find` | 640.3n ± 1% | 0 | 0 |
| WideModel | 640 | 100 | 10 | `Predict` | 850.0n ± 1% | 0 | 0 |
| WideModel | 640 | 100 | 10 | `Find` | 825.0n ± 0% | 0 | 0 |
| WideModel | 128 | 1000 | 3 | `Predict` | 2.384µ ± 0% | 0 | 0 |
| WideModel | 128 | 1000 | 3 | `Find` | 2.374µ ± 0% | 0 | 0 |
| WideModel | 128 | 1000 | 10 | `Predict` | 2.900µ ± 0% | 0 | 0 |
| WideModel | 128 | 1000 | 10 | `Find` | 2.901µ ± 1% | 0 | 0 |
| WideModel | 128 | 1000 | 100 | `Predict` | 5.630µ ± 1% | 0 | 0 |
| WideModel | 128 | 1000 | 100 | `Find` | 5.472µ ± 3% | 0 | 0 |
| WideModel | 128 | 1000000 | 3 | `Predict` | 2.266m ± 0% | 0 | 0 |
| WideModel | 128 | 1000000 | 3 | `Find` | 2.273m ± 3% | 0 | 0 |
| WideModel | 128 | 1000000 | 10 | `Predict` | 2.269m ± 0% | 0 | 0 |
| WideModel | 128 | 1000000 | 10 | `Find` | 2.261m ± 1% | 0 | 0 |
| WideModel | 128 | 1000000 | 100 | `Predict` | 2.295m ± 1% | 0 | 0 |
| WideModel | 128 | 1000000 | 100 | `Find` | 2.289m ± 0% | 0 | 0 |
| WideModel | 640 | 1000 | 3 | `Predict` | 6.214µ ± 2% | 0 | 0 |
| WideModel | 640 | 1000 | 3 | `Find` | 6.201µ ± 1% | 0 | 0 |
| WideModel | 640 | 1000 | 10 | `Predict` | 6.777µ ± 1% | 0 | 0 |
| WideModel | 640 | 1000 | 10 | `Find` | 6.728µ ± 1% | 0 | 0 |
| WideModel | 640 | 1000 | 100 | `Predict` | 11.16µ ± 2% | 0 | 0 |
| WideModel | 640 | 1000 | 100 | `Find` | 10.85µ ± 2% | 0 | 0 |
| WideModel | 640 | 1000000 | 3 | `Predict` | 5.756m ± 4% | 0 | 0 |
| WideModel | 640 | 1000000 | 3 | `Find` | 5.832m ± 2% | 0 | 0 |
| WideModel | 640 | 1000000 | 10 | `Predict` | 5.842m ± 1% | 0 | 0 |
| WideModel | 640 | 1000000 | 10 | `Find` | 5.830m ± 5% | 0 | 0 |
| WideModel | 640 | 1000000 | 100 | `Predict` | 5.914m ± 6% | 0 | 0 |
| WideModel | 640 | 1000000 | 100 | `Find` | 5.872m ± 1% | 0 | 0 |

## License

Expand Down
4 changes: 2 additions & 2 deletions example_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ func Example() {
// or, just return the nearest neighbor's distances and indices:
// distances,indices := model.Find(k, 0b101011)

fmt.Println("Votes:", bitknn.VoteSlice(votes))
fmt.Println("Votes:", votes)

// you can also use a map for the votes.
// this is good if you have a very large number of different labels:
Expand Down Expand Up @@ -56,7 +56,7 @@ func ExampleFitWide() {
query := pack.String("fob")
model.Predict(k, query, bitknn.VoteSlice(votes))

fmt.Println("Votes:", bitknn.VoteSlice(votes))
fmt.Println("Votes:", votes)

// Output:
// Votes: [0.25 0.16666666666666666]
Expand Down
12 changes: 10 additions & 2 deletions lsh/model_bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
"github.com/keilerkonzept/bitknn/lsh"
)

func Benchmark_Model_Predict(b *testing.B) {
func BenchmarkModel(b *testing.B) {
type bench struct {
hashes []lsh.Hash
dataSize []int
Expand All @@ -29,14 +29,22 @@ func Benchmark_Model_Predict(b *testing.B) {
query := testrandom.Query()
for _, k := range bench.k {
for _, hash := range bench.hashes {
b.Run(fmt.Sprintf("hash=%T_N=%d_k=%d", hash, dataSize, k), func(b *testing.B) {
b.Run(fmt.Sprintf("Op=Predict_hash=%T_N=%d_k=%d", hash, dataSize, k), func(b *testing.B) {
model := lsh.Fit(data, labels, hash)
model.PreallocateHeap(k)
b.ResetTimer()
for n := 0; n < b.N; n++ {
model.Predict(k, query, bitknn.DiscardVotes)
}
})
b.Run(fmt.Sprintf("Op=Find_hash=%T_N=%d_k=%d", hash, dataSize, k), func(b *testing.B) {
model := lsh.Fit(data, labels, hash)
model.PreallocateHeap(k)
b.ResetTimer()
for n := 0; n < b.N; n++ {
model.Find(k, query)
}
})
}
}
}
Expand Down
108 changes: 8 additions & 100 deletions model_bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,13 @@ package bitknn_test

import (
"fmt"
"math/rand/v2"
"testing"

"github.com/keilerkonzept/bitknn"
"github.com/keilerkonzept/bitknn/internal/testrandom"
)

func Benchmark_Model_Predict(b *testing.B) {
func BenchmarkModel(b *testing.B) {
type bench struct {
dataSize []int
k []int
Expand All @@ -21,117 +20,26 @@ func Benchmark_Model_Predict(b *testing.B) {
for _, bench := range benches {
for _, dataSize := range bench.dataSize {
for _, k := range bench.k {
b.Run(fmt.Sprintf("N=%d_k=%d", dataSize, k), func(b *testing.B) {
data := testrandom.Data(dataSize)
labels := testrandom.Labels(dataSize)
model := bitknn.Fit(data, labels)
query := testrandom.Query()
data := testrandom.Data(dataSize)
labels := testrandom.Labels(dataSize)
model := bitknn.Fit(data, labels)
query := testrandom.Query()

b.Run(fmt.Sprintf("Op=Predict_bits=64_N=%d_k=%d", dataSize, k), func(b *testing.B) {
model.PreallocateHeap(k)
b.ResetTimer()
for n := 0; n < b.N; n++ {
model.Predict(k, query, bitknn.DiscardVotes)
}
})
}
}
}
}

func Benchmark_Model_PredictV(b *testing.B) {
votes := make([]float64, 256)
type bench struct {
dataSize []int
k []int
}
benches := []bench{
{dataSize: []int{100}, k: []int{3, 10, 100}},
}
for _, bench := range benches {
for _, dataSize := range bench.dataSize {
for _, k := range bench.k {
b.Run(fmt.Sprintf("N=%d_k=%d", dataSize, k), func(b *testing.B) {
data := testrandom.Data(dataSize)
labels := testrandom.Labels(dataSize)
values := testrandom.Values(dataSize)
model := bitknn.Fit(data, labels, bitknn.WithValues(values))
query := rand.Uint64()

b.Run(fmt.Sprintf("Op=Find_bits=64_N=%d_k=%d", dataSize, k), func(b *testing.B) {
model.PreallocateHeap(k)
voteSlice := bitknn.VoteSlice(votes)
b.ResetTimer()
for n := 0; n < b.N; n++ {
model.Predict(k, query, &voteSlice)
model.Find(k, query)
}
})
}
}
}
}

func Benchmark_Model_PredictD(b *testing.B) {
votes := make([]float64, 256)
type bench struct {
dataSize []int
k []int
}
benches := []bench{
{dataSize: []int{100}, k: []int{3, 10, 100}},
}
for _, d := range []bitknn.DistanceWeighting{bitknn.DistanceWeightingLinear, bitknn.DistanceWeightingQuadratic, bitknn.DistanceWeightingCustom} {
for _, bench := range benches {
for _, dataSize := range bench.dataSize {
for _, k := range bench.k {
b.Run(fmt.Sprintf("DistFunc=%v_N=%d_k=%d", d, dataSize, k), func(b *testing.B) {
data := testrandom.Data(dataSize)
labels := testrandom.Labels(dataSize)
model := bitknn.Fit(data, labels)
model.DistanceWeighting = d
model.DistanceWeightingFunc = func(d int) float64 { return 1 / float64(1+d) }
query := rand.Uint64()
voteSlice := bitknn.VoteSlice(votes)

b.ResetTimer()
for n := 0; n < b.N; n++ {
model.Predict(k, query, &voteSlice)
}
})
}
}
}
}
}

func Benchmark_Model_PredictDV(b *testing.B) {
votes := make([]float64, 256)
type bench struct {
dataSize []int
k []int
}
benches := []bench{
{dataSize: []int{100}, k: []int{3, 10, 100}},
}
for _, d := range []bitknn.DistanceWeighting{bitknn.DistanceWeightingLinear, bitknn.DistanceWeightingQuadratic, bitknn.DistanceWeightingCustom} {
for _, bench := range benches {
for _, dataSize := range bench.dataSize {
for _, k := range bench.k {
b.Run(fmt.Sprintf("DistFunc=%v_N=%d_k=%d", d, dataSize, k), func(b *testing.B) {
data := testrandom.Data(dataSize)
labels := testrandom.Labels(dataSize)
values := testrandom.Values(dataSize)
model := bitknn.Fit(data, labels, bitknn.WithValues(values))
model.DistanceWeighting = d
model.DistanceWeightingFunc = func(d int) float64 { return 1 / float64(1+d) }
query := rand.Uint64()
voteSlice := bitknn.VoteSlice(votes)

b.ResetTimer()
for n := 0; n < b.N; n++ {
model.Predict(k, query, &voteSlice)
}
})
}
}
}
}
}
22 changes: 15 additions & 7 deletions model_wide_bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@ import (

"github.com/keilerkonzept/bitknn"
"github.com/keilerkonzept/bitknn/internal/testrandom"
"github.com/keilerkonzept/bitknn/pack"
)

func Benchmark_WideModel_Predict(b *testing.B) {
func BenchmarkWideModel(b *testing.B) {
type bench struct {
dim []int
dataSize []int
Expand All @@ -22,18 +23,25 @@ func Benchmark_WideModel_Predict(b *testing.B) {
for _, dim := range bench.dim {
for _, dataSize := range bench.dataSize {
for _, k := range bench.k {
b.Run(fmt.Sprintf("dim=%d_N=%d_k=%d", dim*64, dataSize, k), func(b *testing.B) {
data := testrandom.WideData(dim, dataSize)
labels := testrandom.Labels(dataSize)
model := bitknn.FitWide(data, labels)
query := testrandom.WideQuery(dim)

data := testrandom.WideData(dim, dataSize)
pack.ReallocateFlat(data)
labels := testrandom.Labels(dataSize)
model := bitknn.FitWide(data, labels)
query := testrandom.WideQuery(dim)
b.Run(fmt.Sprintf("Op=Predict_bits=%d_N=%d_k=%d", dim*64, dataSize, k), func(b *testing.B) {
model.PreallocateHeap(k)
b.ResetTimer()
for n := 0; n < b.N; n++ {
model.Predict(k, query, bitknn.DiscardVotes)
}
})
b.Run(fmt.Sprintf("Op=Find_bits=%d_N=%d_k=%d", dim*64, dataSize, k), func(b *testing.B) {
model.PreallocateHeap(k)
b.ResetTimer()
for n := 0; n < b.N; n++ {
model.Find(k, query)
}
})
}
}
}
Expand Down
3 changes: 2 additions & 1 deletion nearest.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,12 @@ func Nearest(data []uint64, k int, x uint64, distances, indices []int) int {
heap.Push(dist, i)
}

if k0 < k {
if len(data) <= k {
return k0
}

maxDist := *distance0
_ = data[k]
for i := k; i < len(data); i++ {
dist := bits.OnesCount64(x ^ data[i])
if dist >= maxDist {
Expand Down
Loading

0 comments on commit 24ec0f6

Please sign in to comment.