From 24ec0f60bec6b7dd0981c6881d6d9e4e28e28e48 Mon Sep 17 00:00:00 2001 From: Sergey Grebenshchikov Date: Fri, 11 Oct 2024 17:21:30 +0200 Subject: [PATCH] update benchmarks & examples --- README.md | 103 +++++++++++++++++++++++++------------ example_test.go | 4 +- lsh/model_bench_test.go | 12 ++++- model_bench_test.go | 108 +++------------------------------------ model_wide_bench_test.go | 22 +++++--- nearest.go | 3 +- nearest_wide.go | 12 ++--- 7 files changed, 112 insertions(+), 152 deletions(-) diff --git a/README.md b/README.md index ac2ae30..5b3a7a4 100644 --- a/README.md +++ b/README.md @@ -75,7 +75,7 @@ func main() { // or, just return the nearest neighbor's distances and indices: // distances,indices := model.Find(k, 0b101011) - fmt.Println("Votes:", bitknn.VoteSlice(votes)) + fmt.Println("Votes:", bitknn.votes) // you can also use a map for the votes. // this is good if you have a very large number of different labels: @@ -120,7 +120,7 @@ func main() { // or, just return the nearest neighbor's distances and indices: // distances,indices := model.Find(k, 0b101011) - fmt.Println("Votes:", bitknn.VoteSlice(votes)) + fmt.Println("Votes:", votes) // you can also use a map for the votes votesMap := make(map[int]float64) @@ -187,7 +187,7 @@ func main() { query := pack.String("fob") model.Predict(k, query, bitknn.VoteSlice(votes)) - fmt.Println("Votes:", bitknn.VoteSlice(votes)) + fmt.Println("Votes:", votes) } ``` @@ -210,37 +210,72 @@ pkg: github.com/keilerkonzept/bitknn cpu: Apple M1 Pro ``` -| Op | N | k | Distance weighting | Vote values | sec / op | B/op | allocs/op | -|-----------|---------|-----|--------------------|-------------|--------------|------|-----------| -| `Predict` | 100 | 3 | | | 138.7n ± 22% | 0 | 0 | -| `Predict` | 100 | 3 | | ☑️ | 127.8n ± 11% | 0 | 0 | -| `Predict` | 100 | 3 | linear | | 137.0n ± 11% | 0 | 0 | -| `Predict` | 100 | 3 | linear | ☑️ | 136.7n ± 10% | 0 | 0 | -| `Predict` | 100 | 3 | quadratic | | 137.2n ± 7% | 0 | 0 | -| `Predict` | 100 | 3 | quadratic | ☑️ | 130.4n ± 4% | 0 | 0 | -| `Predict` | 100 | 3 | custom | | 140.6n ± 7% | 0 | 0 | -| `Predict` | 100 | 3 | custom | ☑️ | 134.9n ± 13% | 0 | 0 | -| `Predict` | 100 | 10 | | | 307.4n ± 11% | 0 | 0 | -| `Predict` | 100 | 10 | | ☑️ | 297.8n ± 15% | 0 | 0 | -| `Predict` | 100 | 10 | linear | | 288.2n ± 18% | 0 | 0 | -| `Predict` | 100 | 10 | linear | ☑️ | 302.9n ± 14% | 0 | 0 | -| `Predict` | 100 | 10 | quadratic | | 283.7n ± 15% | 0 | 0 | -| `Predict` | 100 | 10 | quadratic | ☑️ | 290.0n ± 13% | 0 | 0 | -| `Predict` | 100 | 10 | custom | | 313.1n ± 17% | 0 | 0 | -| `Predict` | 100 | 10 | custom | ☑️ | 316.2n ± 11% | 0 | 0 | -| `Predict` | 100 | 100 | | ☑️ | 545.4n ± 4% | 0 | 0 | -| `Predict` | 100 | 100 | linear | | 542.4n ± 4% | 0 | 0 | -| `Predict` | 100 | 100 | linear | ☑️ | 577.5n ± 4% | 0 | 0 | -| `Predict` | 100 | 100 | quadratic | | 553.1n ± 3% | 0 | 0 | -| `Predict` | 100 | 100 | quadratic | ☑️ | 582.4n ± 6% | 0 | 0 | -| `Predict` | 100 | 100 | custom | | 683.8n ± 4% | 0 | 0 | -| `Predict` | 100 | 100 | custom | ☑️ | 748.5n ± 2% | 0 | 0 | -| `Predict` | 1000 | 3 | | | 669.5n ± 6% | 0 | 0 | -| `Predict` | 1000 | 10 | | | 930.3n ± 7% | 0 | 0 | -| `Predict` | 1000 | 100 | | | 3.762µ ± 5% | 0 | 0 | -| `Predict` | 1000000 | 3 | | | 532.1µ ± 1% | 0 | 0 | -| `Predict` | 1000000 | 10 | | | 534.5µ ± 1% | 0 | 0 | -| `Predict` | 1000000 | 100 | | | 551.7µ ± 1% | 0 | 0 | +| Model | Bits | N | k | Op | s/op | B/op | allocs/op | +|-----------|------|---------|-----|-----------|-------------|------|-----------| +| Model | 64 | 100 | 3 | `Predict` | 99.06n ± 2% | 0 | 0 | +| WideModel | 64 | 100 | 3 | `Predict` | 191.6n ± 1% | 0 | 0 | +| Model | 64 | 100 | 3 | `Find` | 88.09n ± 0% | 0 | 0 | +| WideModel | 64 | 100 | 3 | `Find` | 182.8n ± 1% | 0 | 0 | +| Model | 64 | 100 | 10 | `Predict` | 225.1n ± 1% | 0 | 0 | +| WideModel | 64 | 100 | 10 | `Predict` | 372.0n ± 1% | 0 | 0 | +| Model | 64 | 100 | 10 | `Find` | 202.9n ± 1% | 0 | 0 | +| WideModel | 64 | 100 | 10 | `Find` | 345.2n ± 0% | 0 | 0 | +| Model | 64 | 1000 | 3 | `Predict` | 538.2n ± 1% | 0 | 0 | +| WideModel | 64 | 1000 | 3 | `Predict` | 1.469µ ± 1% | 0 | 0 | +| Model | 64 | 1000 | 3 | `Find` | 525.8n ± 1% | 0 | 0 | +| WideModel | 64 | 1000 | 3 | `Find` | 1.465µ ± 1% | 0 | 0 | +| Model | 64 | 1000 | 10 | `Predict` | 835.4n ± 1% | 0 | 0 | +| WideModel | 64 | 1000 | 10 | `Predict` | 1.880µ ± 1% | 0 | 0 | +| Model | 64 | 1000 | 10 | `Find` | 807.4n ± 0% | 0 | 0 | +| WideModel | 64 | 1000 | 10 | `Find` | 1.867µ ± 2% | 0 | 0 | +| Model | 64 | 1000 | 100 | `Predict` | 3.718µ ± 0% | 0 | 0 | +| WideModel | 64 | 1000 | 100 | `Predict` | 4.935µ ± 0% | 0 | 0 | +| Model | 64 | 1000 | 100 | `Find` | 3.494µ ± 0% | 0 | 0 | +| WideModel | 64 | 1000 | 100 | `Find` | 4.701µ ± 0% | 0 | 0 | +| Model | 64 | 1000000 | 3 | `Predict` | 458.8µ ± 0% | 0 | 0 | +| WideModel | 64 | 1000000 | 3 | `Predict` | 1.301m ± 1% | 0 | 0 | +| Model | 64 | 1000000 | 3 | `Find` | 457.9µ ± 1% | 0 | 0 | +| WideModel | 64 | 1000000 | 3 | `Find` | 1.302m ± 1% | 0 | 0 | +| Model | 64 | 1000000 | 10 | `Predict` | 456.9µ ± 0% | 0 | 0 | +| WideModel | 64 | 1000000 | 10 | `Predict` | 1.295m ± 2% | 0 | 0 | +| Model | 64 | 1000000 | 10 | `Find` | 457.6µ ± 1% | 0 | 0 | +| WideModel | 64 | 1000000 | 10 | `Find` | 1.298m ± 1% | 0 | 0 | +| Model | 64 | 1000000 | 100 | `Predict` | 474.5µ ± 1% | 0 | 0 | +| WideModel | 64 | 1000000 | 100 | `Predict` | 1.316m ± 1% | 0 | 0 | +| Model | 64 | 1000000 | 100 | `Find` | 466.9µ ± 0% | 0 | 0 | +| WideModel | 64 | 1000000 | 100 | `Find` | 1.306m ± 0% | 0 | 0 | +| WideModel | 128 | 100 | 3 | `Predict` | 296.7n ± 0% | 0 | 0 | +| WideModel | 128 | 100 | 3 | `Find` | 285.8n ± 0% | 0 | 0 | +| WideModel | 128 | 100 | 10 | `Predict` | 467.4n ± 1% | 0 | 0 | +| WideModel | 128 | 100 | 10 | `Find` | 441.1n ± 1% | 0 | 0 | +| WideModel | 640 | 100 | 3 | `Predict` | 654.6n ± 1% | 0 | 0 | +| WideModel | 640 | 100 | 3 | `Find` | 640.3n ± 1% | 0 | 0 | +| WideModel | 640 | 100 | 10 | `Predict` | 850.0n ± 1% | 0 | 0 | +| WideModel | 640 | 100 | 10 | `Find` | 825.0n ± 0% | 0 | 0 | +| WideModel | 128 | 1000 | 3 | `Predict` | 2.384µ ± 0% | 0 | 0 | +| WideModel | 128 | 1000 | 3 | `Find` | 2.374µ ± 0% | 0 | 0 | +| WideModel | 128 | 1000 | 10 | `Predict` | 2.900µ ± 0% | 0 | 0 | +| WideModel | 128 | 1000 | 10 | `Find` | 2.901µ ± 1% | 0 | 0 | +| WideModel | 128 | 1000 | 100 | `Predict` | 5.630µ ± 1% | 0 | 0 | +| WideModel | 128 | 1000 | 100 | `Find` | 5.472µ ± 3% | 0 | 0 | +| WideModel | 128 | 1000000 | 3 | `Predict` | 2.266m ± 0% | 0 | 0 | +| WideModel | 128 | 1000000 | 3 | `Find` | 2.273m ± 3% | 0 | 0 | +| WideModel | 128 | 1000000 | 10 | `Predict` | 2.269m ± 0% | 0 | 0 | +| WideModel | 128 | 1000000 | 10 | `Find` | 2.261m ± 1% | 0 | 0 | +| WideModel | 128 | 1000000 | 100 | `Predict` | 2.295m ± 1% | 0 | 0 | +| WideModel | 128 | 1000000 | 100 | `Find` | 2.289m ± 0% | 0 | 0 | +| WideModel | 640 | 1000 | 3 | `Predict` | 6.214µ ± 2% | 0 | 0 | +| WideModel | 640 | 1000 | 3 | `Find` | 6.201µ ± 1% | 0 | 0 | +| WideModel | 640 | 1000 | 10 | `Predict` | 6.777µ ± 1% | 0 | 0 | +| WideModel | 640 | 1000 | 10 | `Find` | 6.728µ ± 1% | 0 | 0 | +| WideModel | 640 | 1000 | 100 | `Predict` | 11.16µ ± 2% | 0 | 0 | +| WideModel | 640 | 1000 | 100 | `Find` | 10.85µ ± 2% | 0 | 0 | +| WideModel | 640 | 1000000 | 3 | `Predict` | 5.756m ± 4% | 0 | 0 | +| WideModel | 640 | 1000000 | 3 | `Find` | 5.832m ± 2% | 0 | 0 | +| WideModel | 640 | 1000000 | 10 | `Predict` | 5.842m ± 1% | 0 | 0 | +| WideModel | 640 | 1000000 | 10 | `Find` | 5.830m ± 5% | 0 | 0 | +| WideModel | 640 | 1000000 | 100 | `Predict` | 5.914m ± 6% | 0 | 0 | +| WideModel | 640 | 1000000 | 100 | `Find` | 5.872m ± 1% | 0 | 0 | ## License diff --git a/example_test.go b/example_test.go index d728b78..44809fe 100644 --- a/example_test.go +++ b/example_test.go @@ -23,7 +23,7 @@ func Example() { // or, just return the nearest neighbor's distances and indices: // distances,indices := model.Find(k, 0b101011) - fmt.Println("Votes:", bitknn.VoteSlice(votes)) + fmt.Println("Votes:", votes) // you can also use a map for the votes. // this is good if you have a very large number of different labels: @@ -56,7 +56,7 @@ func ExampleFitWide() { query := pack.String("fob") model.Predict(k, query, bitknn.VoteSlice(votes)) - fmt.Println("Votes:", bitknn.VoteSlice(votes)) + fmt.Println("Votes:", votes) // Output: // Votes: [0.25 0.16666666666666666] diff --git a/lsh/model_bench_test.go b/lsh/model_bench_test.go index 1573c40..0b9cef8 100644 --- a/lsh/model_bench_test.go +++ b/lsh/model_bench_test.go @@ -9,7 +9,7 @@ import ( "github.com/keilerkonzept/bitknn/lsh" ) -func Benchmark_Model_Predict(b *testing.B) { +func BenchmarkModel(b *testing.B) { type bench struct { hashes []lsh.Hash dataSize []int @@ -29,7 +29,7 @@ func Benchmark_Model_Predict(b *testing.B) { query := testrandom.Query() for _, k := range bench.k { for _, hash := range bench.hashes { - b.Run(fmt.Sprintf("hash=%T_N=%d_k=%d", hash, dataSize, k), func(b *testing.B) { + b.Run(fmt.Sprintf("Op=Predict_hash=%T_N=%d_k=%d", hash, dataSize, k), func(b *testing.B) { model := lsh.Fit(data, labels, hash) model.PreallocateHeap(k) b.ResetTimer() @@ -37,6 +37,14 @@ func Benchmark_Model_Predict(b *testing.B) { model.Predict(k, query, bitknn.DiscardVotes) } }) + b.Run(fmt.Sprintf("Op=Find_hash=%T_N=%d_k=%d", hash, dataSize, k), func(b *testing.B) { + model := lsh.Fit(data, labels, hash) + model.PreallocateHeap(k) + b.ResetTimer() + for n := 0; n < b.N; n++ { + model.Find(k, query) + } + }) } } } diff --git a/model_bench_test.go b/model_bench_test.go index 03cf59b..e33aa1a 100644 --- a/model_bench_test.go +++ b/model_bench_test.go @@ -2,14 +2,13 @@ package bitknn_test import ( "fmt" - "math/rand/v2" "testing" "github.com/keilerkonzept/bitknn" "github.com/keilerkonzept/bitknn/internal/testrandom" ) -func Benchmark_Model_Predict(b *testing.B) { +func BenchmarkModel(b *testing.B) { type bench struct { dataSize []int k []int @@ -21,117 +20,26 @@ func Benchmark_Model_Predict(b *testing.B) { for _, bench := range benches { for _, dataSize := range bench.dataSize { for _, k := range bench.k { - b.Run(fmt.Sprintf("N=%d_k=%d", dataSize, k), func(b *testing.B) { - data := testrandom.Data(dataSize) - labels := testrandom.Labels(dataSize) - model := bitknn.Fit(data, labels) - query := testrandom.Query() + data := testrandom.Data(dataSize) + labels := testrandom.Labels(dataSize) + model := bitknn.Fit(data, labels) + query := testrandom.Query() + b.Run(fmt.Sprintf("Op=Predict_bits=64_N=%d_k=%d", dataSize, k), func(b *testing.B) { model.PreallocateHeap(k) b.ResetTimer() for n := 0; n < b.N; n++ { model.Predict(k, query, bitknn.DiscardVotes) } }) - } - } - } -} - -func Benchmark_Model_PredictV(b *testing.B) { - votes := make([]float64, 256) - type bench struct { - dataSize []int - k []int - } - benches := []bench{ - {dataSize: []int{100}, k: []int{3, 10, 100}}, - } - for _, bench := range benches { - for _, dataSize := range bench.dataSize { - for _, k := range bench.k { - b.Run(fmt.Sprintf("N=%d_k=%d", dataSize, k), func(b *testing.B) { - data := testrandom.Data(dataSize) - labels := testrandom.Labels(dataSize) - values := testrandom.Values(dataSize) - model := bitknn.Fit(data, labels, bitknn.WithValues(values)) - query := rand.Uint64() - + b.Run(fmt.Sprintf("Op=Find_bits=64_N=%d_k=%d", dataSize, k), func(b *testing.B) { model.PreallocateHeap(k) - voteSlice := bitknn.VoteSlice(votes) b.ResetTimer() for n := 0; n < b.N; n++ { - model.Predict(k, query, &voteSlice) + model.Find(k, query) } }) } } } } - -func Benchmark_Model_PredictD(b *testing.B) { - votes := make([]float64, 256) - type bench struct { - dataSize []int - k []int - } - benches := []bench{ - {dataSize: []int{100}, k: []int{3, 10, 100}}, - } - for _, d := range []bitknn.DistanceWeighting{bitknn.DistanceWeightingLinear, bitknn.DistanceWeightingQuadratic, bitknn.DistanceWeightingCustom} { - for _, bench := range benches { - for _, dataSize := range bench.dataSize { - for _, k := range bench.k { - b.Run(fmt.Sprintf("DistFunc=%v_N=%d_k=%d", d, dataSize, k), func(b *testing.B) { - data := testrandom.Data(dataSize) - labels := testrandom.Labels(dataSize) - model := bitknn.Fit(data, labels) - model.DistanceWeighting = d - model.DistanceWeightingFunc = func(d int) float64 { return 1 / float64(1+d) } - query := rand.Uint64() - voteSlice := bitknn.VoteSlice(votes) - - b.ResetTimer() - for n := 0; n < b.N; n++ { - model.Predict(k, query, &voteSlice) - } - }) - } - } - } - } -} - -func Benchmark_Model_PredictDV(b *testing.B) { - votes := make([]float64, 256) - type bench struct { - dataSize []int - k []int - } - benches := []bench{ - {dataSize: []int{100}, k: []int{3, 10, 100}}, - } - for _, d := range []bitknn.DistanceWeighting{bitknn.DistanceWeightingLinear, bitknn.DistanceWeightingQuadratic, bitknn.DistanceWeightingCustom} { - for _, bench := range benches { - for _, dataSize := range bench.dataSize { - for _, k := range bench.k { - b.Run(fmt.Sprintf("DistFunc=%v_N=%d_k=%d", d, dataSize, k), func(b *testing.B) { - data := testrandom.Data(dataSize) - labels := testrandom.Labels(dataSize) - values := testrandom.Values(dataSize) - model := bitknn.Fit(data, labels, bitknn.WithValues(values)) - model.DistanceWeighting = d - model.DistanceWeightingFunc = func(d int) float64 { return 1 / float64(1+d) } - query := rand.Uint64() - voteSlice := bitknn.VoteSlice(votes) - - b.ResetTimer() - for n := 0; n < b.N; n++ { - model.Predict(k, query, &voteSlice) - } - }) - } - } - } - } -} diff --git a/model_wide_bench_test.go b/model_wide_bench_test.go index 9f0d9d0..e729878 100644 --- a/model_wide_bench_test.go +++ b/model_wide_bench_test.go @@ -6,9 +6,10 @@ import ( "github.com/keilerkonzept/bitknn" "github.com/keilerkonzept/bitknn/internal/testrandom" + "github.com/keilerkonzept/bitknn/pack" ) -func Benchmark_WideModel_Predict(b *testing.B) { +func BenchmarkWideModel(b *testing.B) { type bench struct { dim []int dataSize []int @@ -22,18 +23,25 @@ func Benchmark_WideModel_Predict(b *testing.B) { for _, dim := range bench.dim { for _, dataSize := range bench.dataSize { for _, k := range bench.k { - b.Run(fmt.Sprintf("dim=%d_N=%d_k=%d", dim*64, dataSize, k), func(b *testing.B) { - data := testrandom.WideData(dim, dataSize) - labels := testrandom.Labels(dataSize) - model := bitknn.FitWide(data, labels) - query := testrandom.WideQuery(dim) - + data := testrandom.WideData(dim, dataSize) + pack.ReallocateFlat(data) + labels := testrandom.Labels(dataSize) + model := bitknn.FitWide(data, labels) + query := testrandom.WideQuery(dim) + b.Run(fmt.Sprintf("Op=Predict_bits=%d_N=%d_k=%d", dim*64, dataSize, k), func(b *testing.B) { model.PreallocateHeap(k) b.ResetTimer() for n := 0; n < b.N; n++ { model.Predict(k, query, bitknn.DiscardVotes) } }) + b.Run(fmt.Sprintf("Op=Find_bits=%d_N=%d_k=%d", dim*64, dataSize, k), func(b *testing.B) { + model.PreallocateHeap(k) + b.ResetTimer() + for n := 0; n < b.N; n++ { + model.Find(k, query) + } + }) } } } diff --git a/nearest.go b/nearest.go index ead3c94..f600007 100644 --- a/nearest.go +++ b/nearest.go @@ -23,11 +23,12 @@ func Nearest(data []uint64, k int, x uint64, distances, indices []int) int { heap.Push(dist, i) } - if k0 < k { + if len(data) <= k { return k0 } maxDist := *distance0 + _ = data[k] for i := k; i < len(data); i++ { dist := bits.OnesCount64(x ^ data[i]) if dist >= maxDist { diff --git a/nearest_wide.go b/nearest_wide.go index 1f34e05..6b97904 100644 --- a/nearest_wide.go +++ b/nearest_wide.go @@ -12,25 +12,25 @@ func NearestWide(data [][]uint64, k int, x []uint64, distances, indices []int) i distance0 := &distances[0] k0 := min(k, len(data)) - for i, d := range data[:k0] { dist := 0 - for j, d := range d { - dist += bits.OnesCount64(x[j] ^ d) + for j, x := range x { + dist += bits.OnesCount64(d[j] ^ x) } heap.Push(dist, i) } - if k0 < k { + if len(data) <= k { return k0 } maxDist := *distance0 + _ = data[k] for i := k; i < len(data); i++ { dist := 0 d := data[i] - for j, d := range d { - dist += bits.OnesCount64(x[j] ^ d) + for j, x := range x { + dist += bits.OnesCount64(d[j] ^ x) } if dist >= maxDist { continue