From e9efcb57f94d7910f9d565220c03c2b693656410 Mon Sep 17 00:00:00 2001 From: Robert Fratto Date: Thu, 3 Oct 2024 09:43:37 -0400 Subject: [PATCH] chore(storage/bloom/v1): bind bloom page size to max bloom size (#14372) Previously, the bloom page size was hard-coded to 256KB. However, 256KB is much smaller than the normal bloom size, leading to nearly every bloom page to be overfilled with a single, massive bloom. Binding the bloom page size to the maximum bloom size allows bloom pages to contain multiple smaller blooms (which are still larger than 256KB) or one maxed out bloom. Signed-off-by: Robert Fratto --- pkg/storage/bloom/v1/builder.go | 15 +++++++++++---- pkg/storage/bloom/v1/builder_test.go | 16 ++++++++++++++++ 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/pkg/storage/bloom/v1/builder.go b/pkg/storage/bloom/v1/builder.go index f4bec3b2eaadb..0d291b5a9ea28 100644 --- a/pkg/storage/bloom/v1/builder.go +++ b/pkg/storage/bloom/v1/builder.go @@ -70,18 +70,25 @@ func NewBlockOptions(enc compression.Codec, maxBlockSizeBytes, maxBloomSizeBytes opts := NewBlockOptionsFromSchema(Schema{ version: CurrentSchemaVersion, encoding: enc, - }) + }, maxBloomSizeBytes) opts.BlockSize = maxBlockSizeBytes opts.UnencodedBlockOptions.MaxBloomSizeBytes = maxBloomSizeBytes return opts } -func NewBlockOptionsFromSchema(s Schema) BlockOptions { +func NewBlockOptionsFromSchema(s Schema, maxBloomSizeBytes uint64) BlockOptions { return BlockOptions{ Schema: s, // TODO(owen-d): benchmark and find good defaults - SeriesPageSize: 4 << 10, // 4KB, typical page size - BloomPageSize: 256 << 10, // 256KB, no idea what to make this + SeriesPageSize: 4 << 10, // 4KB, typical page size + + // Allow one bloom page to fit either several small blooms or one large + // bloom at max size. + // + // Previously this value was fixed at 256KB, which is smaller than most + // blooms. Setting this value less than maxBloomSizeBytes means that most + // pages will consist of a single oversized bloom. + BloomPageSize: maxBloomSizeBytes, } } diff --git a/pkg/storage/bloom/v1/builder_test.go b/pkg/storage/bloom/v1/builder_test.go index 81c367df9c811..662c1375809be 100644 --- a/pkg/storage/bloom/v1/builder_test.go +++ b/pkg/storage/bloom/v1/builder_test.go @@ -23,6 +23,22 @@ var blockEncodings = []compression.Codec{ compression.Zstd, } +func TestBlockOptions_BloomPageSize(t *testing.T) { + t.Parallel() + + var ( + maxBlockSizeBytes = uint64(50 << 10) + maxBloomSizeBytes = uint64(10 << 10) + ) + + opts := NewBlockOptions(compression.None, maxBlockSizeBytes, maxBloomSizeBytes) + + require.GreaterOrEqual( + t, opts.BloomPageSize, maxBloomSizeBytes, + "opts.BloomPageSize should be greater or equal to the maximum bloom size to avoid having too many overfilled pages", + ) +} + func TestBlockOptions_RoundTrip(t *testing.T) { t.Parallel() opts := BlockOptions{