From af759de02009538e7a2d070991d175d60a251e46 Mon Sep 17 00:00:00 2001 From: Karl Gaissmaier Date: Fri, 13 Dec 2024 22:32:23 +0100 Subject: [PATCH 1/4] add tests before refactoring --- bitset_test.go | 225 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 213 insertions(+), 12 deletions(-) diff --git a/bitset_test.go b/bitset_test.go index bee2999..9f91df6 100644 --- a/bitset_test.go +++ b/bitset_test.go @@ -19,6 +19,7 @@ import ( "math" "math/bits" "math/rand" + "reflect" "strconv" "testing" "time" @@ -152,7 +153,6 @@ func TestExceedCap(t *testing.T) { bmp.ClearAll() d := Cap() bmp.Set(d) - } func TestExpand(t *testing.T) { @@ -255,7 +255,6 @@ func TestNextClear(t *testing.T) { next, found = v.NextClear(0) if found || next != 0 { t.Errorf("Found next clear bit as %d, it should have return (0, false)", next) - } } @@ -302,7 +301,210 @@ func TestIterate(t *testing.T) { if data[4] != 2000 { t.Errorf("bug 4") } +} + +func TestNextSet(t *testing.T) { + testCases := []struct { + name string + // + set []uint + del []uint + // + startIdx uint + wantIdx uint + wantOk bool + }{ + { + name: "null", + set: []uint{}, + startIdx: 0, + wantIdx: 0, + wantOk: false, + }, + { + name: "zero", + set: []uint{0}, + startIdx: 0, + wantIdx: 0, + wantOk: true, + }, + { + name: "1,5", + set: []uint{1, 5}, + startIdx: 0, + wantIdx: 1, + wantOk: true, + }, + { + name: "many", + set: []uint{1, 65, 130, 190, 250, 300, 380, 420, 480, 511}, + startIdx: 100, + wantIdx: 130, + wantOk: true, + }, + { + name: "many-2", + set: []uint{1, 65, 130, 190, 250, 300, 380, 420, 480, 511}, + del: []uint{130, 190, 300, 420}, + startIdx: 100, + wantIdx: 250, + wantOk: true, + }, + { + name: "last", + set: []uint{1, 65, 130, 190, 250, 300, 380, 420, 480, 511}, + startIdx: 511, + wantIdx: 511, + wantOk: true, + }, + { + name: "last-2", + set: []uint{1, 65, 130, 190, 250, 300, 380, 420, 480, 511}, + del: []uint{511}, + startIdx: 511, + wantIdx: 0, + wantOk: false, + }, + } + + for _, tc := range testCases { + var b BitSet + for _, u := range tc.set { + b.Set(u) + } + + for _, u := range tc.del { + b.Clear(u) // without compact + } + idx, ok := b.NextSet(tc.startIdx) + + if ok != tc.wantOk { + t.Errorf("NextSet, %s: got ok: %v, want: %v", tc.name, ok, tc.wantOk) + } + if idx != tc.wantIdx { + t.Errorf("NextSet, %s: got next idx: %d, want: %d", tc.name, idx, tc.wantIdx) + } + } +} + +func TestNextSetMany(t *testing.T) { + testCases := []struct { + name string + // + set []uint + del []uint + // + buf []uint + wantData []uint + // + startIdx uint + wantIdx uint + }{ + { + name: "null", + set: []uint{}, + del: []uint{}, + buf: make([]uint, 0, 512), + wantData: []uint{}, + startIdx: 0, + wantIdx: 0, + }, + { + name: "zero", + set: []uint{0}, + del: []uint{}, + buf: make([]uint, 0, 512), + wantData: []uint{0}, // bit #0 is set + startIdx: 0, + wantIdx: 0, + }, + { + name: "1,5", + set: []uint{1, 5}, + del: []uint{}, + buf: make([]uint, 0, 512), + wantData: []uint{1, 5}, + startIdx: 0, + wantIdx: 5, + }, + { + name: "many", + set: []uint{1, 65, 130, 190, 250, 300, 380, 420, 480, 511}, + del: []uint{}, + buf: make([]uint, 0, 512), + wantData: []uint{1, 65, 130, 190, 250, 300, 380, 420, 480, 511}, + startIdx: 0, + wantIdx: 511, + }, + { + name: "start idx", + set: []uint{1, 65, 130, 190, 250, 300, 380, 420, 480, 511}, + del: []uint{}, + buf: make([]uint, 0, 512), + wantData: []uint{250, 300, 380, 420, 480, 511}, + startIdx: 195, + wantIdx: 511, + }, + { + name: "zero buffer", + set: []uint{1, 2, 3, 4, 511}, + del: []uint{}, + buf: make([]uint, 0), // buffer + wantData: []uint{}, + startIdx: 0, + wantIdx: 0, + }, + { + name: "buffer too short, first word", + set: []uint{1, 2, 3, 4, 5, 6, 7, 8, 9}, + del: []uint{}, + buf: make([]uint, 0, 5), // buffer + wantData: []uint{1, 2, 3, 4, 5}, + startIdx: 0, + wantIdx: 5, + }, + { + name: "buffer too short", + set: []uint{65, 66, 67, 68, 69, 70}, + del: []uint{}, + buf: make([]uint, 0, 5), // buffer + wantData: []uint{65, 66, 67, 68, 69}, + startIdx: 0, + wantIdx: 69, + }, + { + name: "special, last return", + set: []uint{1}, + del: []uint{1}, // delete without compact + buf: make([]uint, 0, 5), // buffer + wantData: []uint{}, + startIdx: 0, + wantIdx: 0, + }, + } + + for _, tc := range testCases { + var b BitSet + for _, u := range tc.set { + b.Set(u) + } + + for _, u := range tc.del { + b.Clear(u) // without compact + } + + idx, buf := b.NextSetMany(tc.startIdx, tc.buf) + + if idx != tc.wantIdx { + t.Errorf("NextSetMany, %s: got next idx: %d, want: %d", tc.name, idx, tc.wantIdx) + } + + if !reflect.DeepEqual(buf, tc.wantData) { + t.Errorf("NextSetMany, %s: returned buf is not equal as expected:\ngot: %v\nwant: %v", + tc.name, buf, tc.wantData) + } + } } func TestSetTo(t *testing.T) { @@ -472,7 +674,7 @@ func TestPanicMustNew(t *testing.T) { func TestPanicDifferenceBNil(t *testing.T) { var b *BitSet - var compare = New(10) + compare := New(10) defer func() { if r := recover(); r == nil { t.Error("Nil First should should have caused a panic") @@ -483,7 +685,7 @@ func TestPanicDifferenceBNil(t *testing.T) { func TestPanicDifferenceCompareNil(t *testing.T) { var compare *BitSet - var b = New(10) + b := New(10) defer func() { if r := recover(); r == nil { t.Error("Nil Second should should have caused a panic") @@ -494,7 +696,7 @@ func TestPanicDifferenceCompareNil(t *testing.T) { func TestPanicUnionBNil(t *testing.T) { var b *BitSet - var compare = New(10) + compare := New(10) defer func() { if r := recover(); r == nil { t.Error("Nil First should should have caused a panic") @@ -505,7 +707,7 @@ func TestPanicUnionBNil(t *testing.T) { func TestPanicUnionCompareNil(t *testing.T) { var compare *BitSet - var b = New(10) + b := New(10) defer func() { if r := recover(); r == nil { t.Error("Nil Second should should have caused a panic") @@ -516,7 +718,7 @@ func TestPanicUnionCompareNil(t *testing.T) { func TestPanicIntersectionBNil(t *testing.T) { var b *BitSet - var compare = New(10) + compare := New(10) defer func() { if r := recover(); r == nil { t.Error("Nil First should should have caused a panic") @@ -527,7 +729,7 @@ func TestPanicIntersectionBNil(t *testing.T) { func TestPanicIntersectionCompareNil(t *testing.T) { var compare *BitSet - var b = New(10) + b := New(10) defer func() { if r := recover(); r == nil { t.Error("Nil Second should should have caused a panic") @@ -538,7 +740,7 @@ func TestPanicIntersectionCompareNil(t *testing.T) { func TestPanicSymmetricDifferenceBNil(t *testing.T) { var b *BitSet - var compare = New(10) + compare := New(10) defer func() { if r := recover(); r == nil { t.Error("Nil First should should have caused a panic") @@ -549,7 +751,7 @@ func TestPanicSymmetricDifferenceBNil(t *testing.T) { func TestPanicSymmetricDifferenceCompareNil(t *testing.T) { var compare *BitSet - var b = New(10) + b := New(10) defer func() { if r := recover(); r == nil { t.Error("Nil Second should should have caused a panic") @@ -802,7 +1004,6 @@ func TestInsertAtWithSet(t *testing.T) { t.Error("66 should be set") return } - } func TestInsertAt(t *testing.T) { @@ -1600,6 +1801,7 @@ func TestRankSelect(t *testing.T) { return } } + func TestFlip(t *testing.T) { b := new(BitSet) c := b.Flip(11) @@ -1848,7 +2050,6 @@ func TestDeleteWithBitSetInstance(t *testing.T) { t.Errorf("Expected index %d to not be set, but was", i) } } - } } From e8b9a384791b4207d3cc63558abd6d37af1a03be Mon Sep 17 00:00:00 2001 From: Karl Gaissmaier Date: Fri, 13 Dec 2024 23:19:38 +0100 Subject: [PATCH 2/4] simplify the code in NextSet --- bitset.go | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/bitset.go b/bitset.go index 8a9844e..243cf2e 100644 --- a/bitset.go +++ b/bitset.go @@ -501,23 +501,25 @@ func (b *BitSet) NextSet(i uint) (uint, bool) { if x >= len(b.set) { return 0, false } - w := b.set[x] - w = w >> wordsIndex(i) - if w != 0 { - return i + uint(bits.TrailingZeros64(w)), true + + // process first (partial) word + word := b.set[x] >> wordsIndex(i) + if word != 0 { + return i + uint(bits.TrailingZeros64(word)), true } + + // process the following full words until next bit is set x++ - // bounds check elimination in the loop - if x < 0 { + if x >= len(b.set) { return 0, false } - for x < len(b.set) { - if b.set[x] != 0 { - return uint(x)*wordSize + uint(bits.TrailingZeros64(b.set[x])), true - } - x++ + for idx, word := range b.set[x:] { + if word != 0 { + return uint((x+idx)< Date: Fri, 13 Dec 2024 23:20:21 +0100 Subject: [PATCH 3/4] simplify the code in NextSetMany --- bitset.go | 44 +++++++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/bitset.go b/bitset.go index 243cf2e..3c9df34 100644 --- a/bitset.go +++ b/bitset.go @@ -545,44 +545,50 @@ func (b *BitSet) NextSet(i uint) (uint, bool) { // However if bitmap.Count() is large, it might be preferable to // use several calls to NextSetMany, for performance reasons. func (b *BitSet) NextSetMany(i uint, buffer []uint) (uint, []uint) { - myanswer := buffer capacity := cap(buffer) + result := buffer[:capacity] + x := int(i >> log2WordSize) if x >= len(b.set) || capacity == 0 { - return 0, myanswer[:0] + return 0, result[:0] } - skip := wordsIndex(i) - word := b.set[x] >> skip - myanswer = myanswer[:capacity] - size := int(0) + + // process first (partial) word + word := b.set[x] >> wordsIndex(i) + + size := 0 for word != 0 { - r := uint(bits.TrailingZeros64(word)) - t := word & ((^word) + 1) - myanswer[size] = r + i + result[size] = i + uint(bits.TrailingZeros64(word)) + size++ if size == capacity { - goto End + return result[size-1], result[:size] } - word = word ^ t + + // clear the rightmost set bit + word &= word - 1 } + + // process the following full words x++ for idx, word := range b.set[x:] { for word != 0 { - r := uint(bits.TrailingZeros64(word)) - t := word & ((^word) + 1) - myanswer[size] = r + (uint(x+idx) << 6) + result[size] = uint((x+idx)< 0 { - return myanswer[size-1], myanswer[:size] + return result[size-1], result[:size] } - return 0, myanswer[:0] + return 0, result[:0] } // NextClear returns the next clear bit from the specified index, From 56199074247ae79d2bb26646bbe46ecd4afcc5fe Mon Sep 17 00:00:00 2001 From: Karl Gaissmaier Date: Fri, 13 Dec 2024 23:22:40 +0100 Subject: [PATCH 4/4] benchmark more readable bit twiddler --- bitset_benchmark_test.go | 111 +++++++++++++++++++++++---------------- 1 file changed, 67 insertions(+), 44 deletions(-) diff --git a/bitset_benchmark_test.go b/bitset_benchmark_test.go index 13e669e..7a783ac 100644 --- a/bitset_benchmark_test.go +++ b/bitset_benchmark_test.go @@ -283,7 +283,7 @@ func BenchmarkLemireIterateManyb(b *testing.B) { } func setRnd(bits []uint64, halfings int) { - var rnd = rand.NewSource(0).(rand.Source64) + rnd := rand.NewSource(0).(rand.Source64) for i := range bits { bits[i] = 0xFFFFFFFFFFFFFFFF for j := 0; j < halfings; j++ { @@ -294,14 +294,14 @@ func setRnd(bits []uint64, halfings int) { // go test -bench=BenchmarkFlorianUekermannIterateMany func BenchmarkFlorianUekermannIterateMany(b *testing.B) { - var input = make([]uint64, 68) + input := make([]uint64, 68) setRnd(input, 4) - var bitmap = From(input) + bitmap := From(input) buffer := make([]uint, 256) b.ResetTimer() - var checksum = uint(0) + checksum := uint(0) for i := 0; i < b.N; i++ { - var last, batch = bitmap.NextSetMany(0, buffer) + last, batch := bitmap.NextSetMany(0, buffer) for len(batch) > 0 { for _, idx := range batch { checksum += idx @@ -315,11 +315,11 @@ func BenchmarkFlorianUekermannIterateMany(b *testing.B) { } func BenchmarkFlorianUekermannIterateManyReg(b *testing.B) { - var input = make([]uint64, 68) + input := make([]uint64, 68) setRnd(input, 4) - var bitmap = From(input) + bitmap := From(input) b.ResetTimer() - var checksum = uint(0) + checksum := uint(0) for i := 0; i < b.N; i++ { for j, e := bitmap.NextSet(0); e; j, e = bitmap.NextSet(j + 1) { checksum += j @@ -333,11 +333,11 @@ func BenchmarkFlorianUekermannIterateManyReg(b *testing.B) { // function provided by FlorianUekermann func good(set []uint64) (checksum uint) { for wordIdx, word := range set { - var wordIdx = uint(wordIdx * 64) + wordIdx := uint(wordIdx * 64) for word != 0 { - var bitIdx = uint(bits.TrailingZeros64(word)) + bitIdx := uint(bits.TrailingZeros64(word)) word ^= 1 << bitIdx - var index = wordIdx + bitIdx + index := wordIdx + bitIdx checksum += index } } @@ -345,10 +345,10 @@ func good(set []uint64) (checksum uint) { } func BenchmarkFlorianUekermannIterateManyComp(b *testing.B) { - var input = make([]uint64, 68) + input := make([]uint64, 68) setRnd(input, 4) b.ResetTimer() - var checksum = uint(0) + checksum := uint(0) for i := 0; i < b.N; i++ { checksum += good(input) } @@ -361,15 +361,15 @@ func BenchmarkFlorianUekermannIterateManyComp(b *testing.B) { // go test -bench=BenchmarkFlorianUekermannLowDensityIterateMany func BenchmarkFlorianUekermannLowDensityIterateMany(b *testing.B) { - var input = make([]uint64, 1000000) - var rnd = rand.NewSource(0).(rand.Source64) + input := make([]uint64, 1000000) + rnd := rand.NewSource(0).(rand.Source64) for i := 0; i < 50000; i++ { input[rnd.Uint64()%1000000] = 1 } - var bitmap = From(input) + bitmap := From(input) buffer := make([]uint, 256) b.ResetTimer() - var sum = uint(0) + sum := uint(0) for i := 0; i < b.N; i++ { j := uint(0) j, buffer = bitmap.NextSetMany(j, buffer) @@ -386,14 +386,14 @@ func BenchmarkFlorianUekermannLowDensityIterateMany(b *testing.B) { } func BenchmarkFlorianUekermannLowDensityIterateManyReg(b *testing.B) { - var input = make([]uint64, 1000000) - var rnd = rand.NewSource(0).(rand.Source64) + input := make([]uint64, 1000000) + rnd := rand.NewSource(0).(rand.Source64) for i := 0; i < 50000; i++ { input[rnd.Uint64()%1000000] = 1 } - var bitmap = From(input) + bitmap := From(input) b.ResetTimer() - var checksum = uint(0) + checksum := uint(0) for i := 0; i < b.N; i++ { for j, e := bitmap.NextSet(0); e; j, e = bitmap.NextSet(j + 1) { checksum += j @@ -405,13 +405,13 @@ func BenchmarkFlorianUekermannLowDensityIterateManyReg(b *testing.B) { } func BenchmarkFlorianUekermannLowDensityIterateManyComp(b *testing.B) { - var input = make([]uint64, 1000000) - var rnd = rand.NewSource(0).(rand.Source64) + input := make([]uint64, 1000000) + rnd := rand.NewSource(0).(rand.Source64) for i := 0; i < 50000; i++ { input[rnd.Uint64()%1000000] = 1 } b.ResetTimer() - var checksum = uint(0) + checksum := uint(0) for i := 0; i < b.N; i++ { checksum += good(input) } @@ -424,12 +424,12 @@ func BenchmarkFlorianUekermannLowDensityIterateManyComp(b *testing.B) { // go test -bench=BenchmarkFlorianUekermannMidDensityIterateMany func BenchmarkFlorianUekermannMidDensityIterateMany(b *testing.B) { - var input = make([]uint64, 1000000) - var rnd = rand.NewSource(0).(rand.Source64) + input := make([]uint64, 1000000) + rnd := rand.NewSource(0).(rand.Source64) for i := 0; i < 3000000; i++ { input[rnd.Uint64()%1000000] |= uint64(1) << (rnd.Uint64() % 64) } - var bitmap = From(input) + bitmap := From(input) buffer := make([]uint, 256) b.ResetTimer() sum := uint(0) @@ -450,14 +450,14 @@ func BenchmarkFlorianUekermannMidDensityIterateMany(b *testing.B) { } func BenchmarkFlorianUekermannMidDensityIterateManyReg(b *testing.B) { - var input = make([]uint64, 1000000) - var rnd = rand.NewSource(0).(rand.Source64) + input := make([]uint64, 1000000) + rnd := rand.NewSource(0).(rand.Source64) for i := 0; i < 3000000; i++ { input[rnd.Uint64()%1000000] |= uint64(1) << (rnd.Uint64() % 64) } - var bitmap = From(input) + bitmap := From(input) b.ResetTimer() - var checksum = uint(0) + checksum := uint(0) for i := 0; i < b.N; i++ { for j, e := bitmap.NextSet(0); e; j, e = bitmap.NextSet(j + 1) { checksum += j @@ -469,13 +469,13 @@ func BenchmarkFlorianUekermannMidDensityIterateManyReg(b *testing.B) { } func BenchmarkFlorianUekermannMidDensityIterateManyComp(b *testing.B) { - var input = make([]uint64, 1000000) - var rnd = rand.NewSource(0).(rand.Source64) + input := make([]uint64, 1000000) + rnd := rand.NewSource(0).(rand.Source64) for i := 0; i < 3000000; i++ { input[rnd.Uint64()%1000000] |= uint64(1) << (rnd.Uint64() % 64) } b.ResetTimer() - var checksum = uint(0) + checksum := uint(0) for i := 0; i < b.N; i++ { checksum += good(input) } @@ -487,12 +487,12 @@ func BenchmarkFlorianUekermannMidDensityIterateManyComp(b *testing.B) { ////////// High density func BenchmarkFlorianUekermannMidStrongDensityIterateMany(b *testing.B) { - var input = make([]uint64, 1000000) - var rnd = rand.NewSource(0).(rand.Source64) + input := make([]uint64, 1000000) + rnd := rand.NewSource(0).(rand.Source64) for i := 0; i < 20000000; i++ { input[rnd.Uint64()%1000000] |= uint64(1) << (rnd.Uint64() % 64) } - var bitmap = From(input) + bitmap := From(input) buffer := make([]uint, 256) b.ResetTimer() sum := uint(0) @@ -513,14 +513,14 @@ func BenchmarkFlorianUekermannMidStrongDensityIterateMany(b *testing.B) { } func BenchmarkFlorianUekermannMidStrongDensityIterateManyReg(b *testing.B) { - var input = make([]uint64, 1000000) - var rnd = rand.NewSource(0).(rand.Source64) + input := make([]uint64, 1000000) + rnd := rand.NewSource(0).(rand.Source64) for i := 0; i < 20000000; i++ { input[rnd.Uint64()%1000000] |= uint64(1) << (rnd.Uint64() % 64) } - var bitmap = From(input) + bitmap := From(input) b.ResetTimer() - var checksum = uint(0) + checksum := uint(0) for i := 0; i < b.N; i++ { for j, e := bitmap.NextSet(0); e; j, e = bitmap.NextSet(j + 1) { checksum += j @@ -532,13 +532,13 @@ func BenchmarkFlorianUekermannMidStrongDensityIterateManyReg(b *testing.B) { } func BenchmarkFlorianUekermannMidStrongDensityIterateManyComp(b *testing.B) { - var input = make([]uint64, 1000000) - var rnd = rand.NewSource(0).(rand.Source64) + input := make([]uint64, 1000000) + rnd := rand.NewSource(0).(rand.Source64) for i := 0; i < 20000000; i++ { input[rnd.Uint64()%1000000] |= uint64(1) << (rnd.Uint64() % 64) } b.ResetTimer() - var checksum = uint(0) + checksum := uint(0) for i := 0; i < b.N; i++ { checksum += good(input) } @@ -635,3 +635,26 @@ func BenchmarkIsSuperSet(b *testing.B) { len, len, density, overrideS, overrideSS, fStrict) } } + +// clear the right most bit (C-RMS) +// test two different algorithms +func BenchmarkClearRMS(b *testing.B) { + var word uint64 + + // cryptic + b.Run("cryptic", func(b *testing.B) { + word = 0xaaaa_aaaa_aaaa_aaaa + for i := 0; i < b.N; i++ { + t := word & ((^word) + 1) + word = word ^ t + } + }) + + // less cryptic + b.Run("simple", func(b *testing.B) { + word = 0xaaaa_aaaa_aaaa_aaaa + for i := 0; i < b.N; i++ { + word &= word - 1 + } + }) +}