From 599dc047cdb41c0f0602b35ea208970c24c83d6e Mon Sep 17 00:00:00 2001 From: Michal Siedlaczek Date: Sat, 30 Mar 2024 21:06:07 -0400 Subject: [PATCH] Implement all dynamic-dispatch block codecs --- include/pisa/codec/block_codec.hpp | 17 ++++++ include/pisa/codec/interpolative.hpp | 18 ++++++ include/pisa/codec/maskedvbyte.hpp | 13 +++- include/pisa/codec/optpfor.hpp | 54 +++++++++++++++++ include/pisa/codec/qmx.hpp | 12 ++++ include/pisa/codec/simple16.hpp | 27 +-------- include/pisa/codec/simple8b.hpp | 12 ++++ include/pisa/codec/streamvbyte.hpp | 13 ++++ include/pisa/codec/varint_g8iu.hpp | 18 ++++++ include/pisa/codec/varintgb.hpp | 13 +++- src/codec/interpolative.cpp | 54 +++++++++++++++++ src/codec/maskedvbyte.cpp | 29 +++++++++ src/codec/optpfor.cpp | 47 +++++++++++++++ src/codec/qmx.cpp | 40 +++++++++++++ src/codec/simple16.cpp | 31 ++++++++++ src/codec/simple8b.cpp | 26 ++++++++ src/codec/streamvbyte.cpp | 22 +++++++ src/codec/varint_g8iu.cpp | 90 ++++++++++++++++++++++++++++ src/codec/varintgb.cpp | 30 ++++++++++ tools/queries_dynamic.cpp | 37 +++++++++++- 20 files changed, 574 insertions(+), 29 deletions(-) create mode 100644 include/pisa/codec/interpolative.hpp create mode 100644 include/pisa/codec/optpfor.hpp create mode 100644 include/pisa/codec/varint_g8iu.hpp create mode 100644 src/codec/interpolative.cpp create mode 100644 src/codec/maskedvbyte.cpp create mode 100644 src/codec/optpfor.cpp create mode 100644 src/codec/qmx.cpp create mode 100644 src/codec/simple16.cpp create mode 100644 src/codec/simple8b.cpp create mode 100644 src/codec/streamvbyte.cpp create mode 100644 src/codec/varint_g8iu.cpp create mode 100644 src/codec/varintgb.cpp diff --git a/include/pisa/codec/block_codec.hpp b/include/pisa/codec/block_codec.hpp index 28c5495e..2b4480eb 100644 --- a/include/pisa/codec/block_codec.hpp +++ b/include/pisa/codec/block_codec.hpp @@ -5,16 +5,33 @@ namespace pisa { +/** + * Block codecs encode and decode an entire list. This is in opposition to a streaming codec, + * which can encode and decode values one by one. + */ class BlockCodec { public: + /** + * Encodes a list of `n` unsigned integers and appends them to the output buffer. + */ virtual void encode( std::uint32_t const* in, std::uint32_t sum_of_values, std::size_t n, std::vector& out ) const = 0; + /** + * Decodes a list of `n` unsigned integers from a binary buffer and writes them to pre-allocated + * memory. + */ virtual std::uint8_t const* decode( std::uint8_t const* in, std::uint32_t* out, std::uint32_t sum_of_values, std::size_t n ) const = 0; + /** + * Returns the block size of the encoding. + * + * Block codecs write blocks of fixed size, e.g., 128 integers. Thus, it is only possible to + * encode at most `block_size()` elements. + */ [[nodiscard]] virtual auto block_size() const noexcept -> std::size_t = 0; }; diff --git a/include/pisa/codec/interpolative.hpp b/include/pisa/codec/interpolative.hpp new file mode 100644 index 00000000..b4f6ff10 --- /dev/null +++ b/include/pisa/codec/interpolative.hpp @@ -0,0 +1,18 @@ +#pragma once + +#include + +#include "codec/block_codec.hpp" + +namespace pisa { + +class InterpolativeBlockCodec: public BlockCodec { + static constexpr std::uint64_t m_block_size = 128; + + public: + void encode(uint32_t const* in, uint32_t sum_of_values, size_t n, std::vector& out) const; + uint8_t const* decode(uint8_t const* in, uint32_t* out, uint32_t sum_of_values, size_t n) const; + auto block_size() const noexcept -> std::size_t { return m_block_size; } +}; + +} // namespace pisa diff --git a/include/pisa/codec/maskedvbyte.hpp b/include/pisa/codec/maskedvbyte.hpp index b31d2228..13adf3a8 100644 --- a/include/pisa/codec/maskedvbyte.hpp +++ b/include/pisa/codec/maskedvbyte.hpp @@ -4,8 +4,8 @@ #include "MaskedVByte/include/varintdecode.h" #include "MaskedVByte/include/varintencode.h" +#include "codec/block_codec.hpp" #include "codec/block_codecs.hpp" -#include "util/util.hpp" namespace pisa { struct maskedvbyte_block { @@ -31,4 +31,15 @@ struct maskedvbyte_block { return in + read; } }; + +class MaskedVByteBlockCodec: public BlockCodec { + static constexpr std::uint64_t m_block_size = 128; + static constexpr std::uint64_t m_overflow = 512; + + public: + void encode(uint32_t const* in, uint32_t sum_of_values, size_t n, std::vector& out) const; + uint8_t const* decode(uint8_t const* in, uint32_t* out, uint32_t sum_of_values, size_t n) const; + auto block_size() const noexcept -> std::size_t { return m_block_size; } +}; + } // namespace pisa diff --git a/include/pisa/codec/optpfor.hpp b/include/pisa/codec/optpfor.hpp new file mode 100644 index 00000000..7d209539 --- /dev/null +++ b/include/pisa/codec/optpfor.hpp @@ -0,0 +1,54 @@ +#pragma once + +#include + +#include "FastPFor/headers/optpfor.h" + +#include "codec/block_codec.hpp" + +namespace pisa { + +class OptPForBlockCodec: public BlockCodec { + struct Codec: FastPForLib::OPTPFor<4, FastPForLib::Simple16> { + uint8_t const* force_b{nullptr}; + + uint32_t findBestB(const uint32_t* in, uint32_t len) { + // trick to force the choice of b from a parameter + if (force_b != nullptr) { + return *force_b; + } + + // this is mostly a cut&paste from FastPFor, but we stop the + // optimization early as the b to test becomes larger than maxb + uint32_t b = 0; + uint32_t bsize = std::numeric_limits::max(); + const uint32_t mb = FastPForLib::maxbits(in, in + len); + uint32_t i = 0; + while (mb > 28 + possLogs[i]) { + ++i; // some schemes such as Simple16 don't code numbers greater than 28 + } + + for (; i < possLogs.size(); i++) { + if (possLogs[i] > mb && possLogs[i] >= mb) { + break; + } + const uint32_t csize = tryB(possLogs[i], in, len); + + if (csize <= bsize) { + b = possLogs[i]; + bsize = csize; + } + } + return b; + } + }; + + static const uint64_t m_block_size = Codec::BlockSize; + + public: + void encode(uint32_t const* in, uint32_t sum_of_values, size_t n, std::vector& out) const; + uint8_t const* decode(uint8_t const* in, uint32_t* out, uint32_t sum_of_values, size_t n) const; + auto block_size() const noexcept -> std::size_t { return m_block_size; } +}; + +} // namespace pisa diff --git a/include/pisa/codec/qmx.hpp b/include/pisa/codec/qmx.hpp index 5b6d800f..393a7ff9 100644 --- a/include/pisa/codec/qmx.hpp +++ b/include/pisa/codec/qmx.hpp @@ -1,6 +1,7 @@ #pragma once #include "QMX/qmx.hpp" +#include "codec/block_codec.hpp" #include "codec/block_codecs.hpp" namespace pisa { @@ -47,4 +48,15 @@ struct qmx_block { return in + enc_len; } }; + +class QmxBlockCodec: public BlockCodec { + static constexpr std::uint64_t m_block_size = 128; + static constexpr std::uint64_t m_overflow = 512; + + public: + void encode(uint32_t const* in, uint32_t sum_of_values, size_t n, std::vector& out) const; + uint8_t const* decode(uint8_t const* in, uint32_t* out, uint32_t sum_of_values, size_t n) const; + auto block_size() const noexcept -> std::size_t { return m_block_size; } +}; + } // namespace pisa diff --git a/include/pisa/codec/simple16.hpp b/include/pisa/codec/simple16.hpp index 303305ee..3d73537a 100644 --- a/include/pisa/codec/simple16.hpp +++ b/include/pisa/codec/simple16.hpp @@ -39,30 +39,9 @@ struct simple16_block { class Simple16BlockCodec: public BlockCodec { static constexpr std::uint64_t m_block_size = 128; - void - encode(uint32_t const* in, uint32_t /* sum_of_values */, size_t n, std::vector& out) const { - assert(n <= m_block_size); - thread_local FastPForLib::Simple16 codec; - thread_local std::array buf{}; - size_t out_len = buf.size(); - codec.encodeArray(in, n, reinterpret_cast(buf.data()), out_len); - out_len *= 4; - out.insert(out.end(), buf.data(), buf.data() + out_len); - } - - uint8_t const* decode(uint8_t const* in, uint32_t* out, uint32_t sum_of_values, size_t n) const { - assert(n <= m_block_size); - FastPForLib::Simple16 codec; - std::array buf{}; - - auto const* ret = reinterpret_cast( - codec.decodeArray(reinterpret_cast(in), 8 * n, buf.data(), n) - ); - - std::copy(buf.begin(), std::next(buf.begin(), n), out); - return ret; - } - + public: + void encode(uint32_t const* in, uint32_t sum_of_values, size_t n, std::vector& out) const; + uint8_t const* decode(uint8_t const* in, uint32_t* out, uint32_t sum_of_values, size_t n) const; auto block_size() const noexcept -> std::size_t { return m_block_size; } }; diff --git a/include/pisa/codec/simple8b.hpp b/include/pisa/codec/simple8b.hpp index 5928d5da..859a1da4 100644 --- a/include/pisa/codec/simple8b.hpp +++ b/include/pisa/codec/simple8b.hpp @@ -3,6 +3,8 @@ #include +#include "codec/block_codec.hpp" + namespace pisa { struct simple8b_block { @@ -28,4 +30,14 @@ struct simple8b_block { ); } }; + +class Simple8bBlockCodec: public BlockCodec { + static constexpr std::uint64_t m_block_size = 128; + + public: + void encode(uint32_t const* in, uint32_t sum_of_values, size_t n, std::vector& out) const; + uint8_t const* decode(uint8_t const* in, uint32_t* out, uint32_t sum_of_values, size_t n) const; + auto block_size() const noexcept -> std::size_t { return m_block_size; } +}; + } // namespace pisa diff --git a/include/pisa/codec/streamvbyte.hpp b/include/pisa/codec/streamvbyte.hpp index 8cd41ac8..7fd1b0b5 100644 --- a/include/pisa/codec/streamvbyte.hpp +++ b/include/pisa/codec/streamvbyte.hpp @@ -5,6 +5,7 @@ #include #include +#include "codec/block_codec.hpp" #include "streamvbyte/include/streamvbyte.h" namespace pisa { @@ -35,4 +36,16 @@ struct streamvbyte_block { return in + read; } }; + +class StreamVByteBlockCodec: public BlockCodec { + static constexpr std::uint64_t m_block_size = 128; + static constexpr std::size_t m_max_compressed_bytes = + pisa::streamvbyte_max_compressedbytes(m_block_size); + + public: + void encode(uint32_t const* in, uint32_t sum_of_values, size_t n, std::vector& out) const; + uint8_t const* decode(uint8_t const* in, uint32_t* out, uint32_t sum_of_values, size_t n) const; + auto block_size() const noexcept -> std::size_t { return m_block_size; } +}; + } // namespace pisa diff --git a/include/pisa/codec/varint_g8iu.hpp b/include/pisa/codec/varint_g8iu.hpp new file mode 100644 index 00000000..acc62732 --- /dev/null +++ b/include/pisa/codec/varint_g8iu.hpp @@ -0,0 +1,18 @@ +#pragma once + +#include + +#include "codec/block_codec.hpp" + +namespace pisa { + +class VarintG8IUBlockCodec: public BlockCodec { + static const uint64_t m_block_size = 128; + + public: + void encode(uint32_t const* in, uint32_t sum_of_values, size_t n, std::vector& out) const; + uint8_t const* decode(uint8_t const* in, uint32_t* out, uint32_t sum_of_values, size_t n) const; + auto block_size() const noexcept -> std::size_t { return m_block_size; } +}; + +} // namespace pisa diff --git a/include/pisa/codec/varintgb.hpp b/include/pisa/codec/varintgb.hpp index 84b55a80..84b370ad 100644 --- a/include/pisa/codec/varintgb.hpp +++ b/include/pisa/codec/varintgb.hpp @@ -4,8 +4,7 @@ #include #include -#include "FastPFor/headers/common.h" - +#include "codec/block_codec.hpp" #include "codec/block_codecs.hpp" #include "memory.hpp" @@ -257,4 +256,14 @@ struct varintgb_block { return read + in; } }; + +class VarintGbBlockCodec: public BlockCodec { + static constexpr std::uint64_t m_block_size = 128; + + public: + void encode(uint32_t const* in, uint32_t sum_of_values, size_t n, std::vector& out) const; + uint8_t const* decode(uint8_t const* in, uint32_t* out, uint32_t sum_of_values, size_t n) const; + auto block_size() const noexcept -> std::size_t { return m_block_size; } +}; + } // namespace pisa diff --git a/src/codec/interpolative.cpp b/src/codec/interpolative.cpp new file mode 100644 index 00000000..d18baa54 --- /dev/null +++ b/src/codec/interpolative.cpp @@ -0,0 +1,54 @@ +#include +#include + +#include "codec/block_codecs.hpp" +#include "codec/interpolative.hpp" + +namespace pisa { + +void InterpolativeBlockCodec::encode( + uint32_t const* in, uint32_t sum_of_values, size_t n, std::vector& out +) const { + assert(n <= m_block_size); + thread_local std::array inbuf{}; + thread_local std::vector outbuf; // TODO: Can we use array? How long does it need + // to be? + inbuf[0] = *in; + for (size_t i = 1; i < n; ++i) { + inbuf[i] = inbuf[i - 1] + in[i]; + } + + if (sum_of_values == uint32_t(-1)) { + sum_of_values = inbuf[n - 1]; + TightVariableByte::encode_single(sum_of_values, out); + } + + bit_writer bw(outbuf); + bw.write_interpolative(inbuf.data(), n - 1, 0, sum_of_values); + auto const* bufptr = reinterpret_cast(outbuf.data()); + out.insert(out.end(), bufptr, bufptr + ceil_div(bw.size(), 8)); +} + +uint8_t const* InterpolativeBlockCodec::decode( + uint8_t const* in, uint32_t* out, uint32_t sum_of_values, size_t n +) const { + assert(n <= m_block_size); + if (sum_of_values == std::numeric_limits::max()) { + in = TightVariableByte::decode(in, &sum_of_values, 1); + } + + out[n - 1] = sum_of_values; + size_t read_interpolative = 0; + if (n > 1) { + bit_reader br(in); + br.read_interpolative(out, n - 1, 0, sum_of_values); + for (size_t i = n - 1; i > 0; --i) { + out[i] -= out[i - 1]; + } + read_interpolative = ceil_div(br.position(), 8); + } + + return in + read_interpolative; +} + +} // namespace pisa diff --git a/src/codec/maskedvbyte.cpp b/src/codec/maskedvbyte.cpp new file mode 100644 index 00000000..1d09348a --- /dev/null +++ b/src/codec/maskedvbyte.cpp @@ -0,0 +1,29 @@ +#include "codec/maskedvbyte.hpp" + +namespace pisa { + +void MaskedVByteBlockCodec::encode( + uint32_t const* in, uint32_t sum_of_values, size_t n, std::vector& out +) const { + assert(n <= m_block_size); + auto* src = const_cast(in); + if (n < m_block_size) { + interpolative_block::encode(src, sum_of_values, n, out); + return; + } + thread_local std::array buf{}; + size_t out_len = vbyte_encode(src, n, buf.data()); + out.insert(out.end(), buf.data(), buf.data() + out_len); +} + +uint8_t const* +MaskedVByteBlockCodec::decode(uint8_t const* in, uint32_t* out, uint32_t sum_of_values, size_t n) const { + assert(n <= m_block_size); + if PISA_UNLIKELY (n < m_block_size) { + return interpolative_block::decode(in, out, sum_of_values, n); + } + auto read = masked_vbyte_decode(in, out, n); + return in + read; +} + +} // namespace pisa diff --git a/src/codec/optpfor.cpp b/src/codec/optpfor.cpp new file mode 100644 index 00000000..445fa633 --- /dev/null +++ b/src/codec/optpfor.cpp @@ -0,0 +1,47 @@ +#include + +#include "codec/block_codecs.hpp" +#include "codec/optpfor.hpp" + +namespace pisa { + +void OptPForBlockCodec::encode( + uint32_t const* in, uint32_t sum_of_values, size_t n, std::vector& out +) const { + thread_local Codec optpfor_codec; + thread_local std::array buf{}; + assert(n <= m_block_size); + + if (n < m_block_size) { + interpolative_block::encode(in, sum_of_values, n, out); + return; + } + + size_t out_len = buf.size(); + + optpfor_codec.force_b = nullptr; + optpfor_codec.encodeBlock(in, reinterpret_cast(buf.data()), out_len); + out_len *= 4; + out.insert(out.end(), buf.data(), buf.data() + out_len); +} + +uint8_t const* +OptPForBlockCodec::decode(uint8_t const* in, uint32_t* out, uint32_t sum_of_values, size_t n) const { + thread_local Codec optpfor_codec; // pfor decoding is *not* thread-safe + assert(n <= m_block_size); + + if PISA_UNLIKELY (n < m_block_size) { + return interpolative_block::decode(in, out, sum_of_values, n); + } + + size_t out_len = m_block_size; + uint8_t const* ret; + + ret = reinterpret_cast( + optpfor_codec.decodeBlock(reinterpret_cast(in), out, out_len) + ); + assert(out_len == n); + return ret; +} + +} // namespace pisa diff --git a/src/codec/qmx.cpp b/src/codec/qmx.cpp new file mode 100644 index 00000000..a35041ee --- /dev/null +++ b/src/codec/qmx.cpp @@ -0,0 +1,40 @@ +#include "codec/qmx.hpp" + +namespace pisa { + +void QmxBlockCodec::encode( + uint32_t const* in, uint32_t sum_of_values, size_t n, std::vector& out +) const { + assert(n <= m_block_size); + auto* src = const_cast(in); + if (n < m_block_size) { + interpolative_block::encode(src, sum_of_values, n, out); + return; + } + thread_local QMX::compress_integer_qmx_improved qmx_codec; + thread_local std::vector buf(2 * n * sizeof(uint32_t) + m_overflow); + + size_t out_len = qmx_codec.encode(buf.data(), buf.size(), in, n); + TightVariableByte::encode_single(out_len, out); + out.insert(out.end(), buf.data(), buf.data() + out_len); +} + +uint8_t const* +QmxBlockCodec::decode(uint8_t const* in, uint32_t* out, uint32_t sum_of_values, size_t n) const { + static QMX::compress_integer_qmx_improved qmx_codec; // decodeBlock is thread-safe + assert(n <= m_block_size); + if PISA_UNLIKELY (n < m_block_size) { + return interpolative_block::decode(in, out, sum_of_values, n); + } + uint32_t enc_len = 0; + in = TightVariableByte::decode(in, &enc_len, 1); + std::vector buf(2 * n + m_overflow); + qmx_codec.decode(buf.data(), n, in, enc_len); + for (size_t i = 0; i < n; ++i) { + *out = buf[i]; + ++out; + } + return in + enc_len; +} + +} // namespace pisa diff --git a/src/codec/simple16.cpp b/src/codec/simple16.cpp new file mode 100644 index 00000000..c7edaec4 --- /dev/null +++ b/src/codec/simple16.cpp @@ -0,0 +1,31 @@ +#include "codec/simple16.hpp" + +namespace pisa { + +void Simple16BlockCodec::encode( + uint32_t const* in, [[maybe_unused]] uint32_t sum_of_values, size_t n, std::vector& out +) const { + assert(n <= m_block_size); + thread_local FastPForLib::Simple16 codec; + thread_local std::array buf{}; + size_t out_len = buf.size(); + codec.encodeArray(in, n, reinterpret_cast(buf.data()), out_len); + out_len *= 4; + out.insert(out.end(), buf.data(), buf.data() + out_len); +} + +uint8_t const* +Simple16BlockCodec::decode(uint8_t const* in, uint32_t* out, uint32_t sum_of_values, size_t n) const { + assert(n <= m_block_size); + FastPForLib::Simple16 codec; + std::array buf{}; + + auto const* ret = reinterpret_cast( + codec.decodeArray(reinterpret_cast(in), 8 * n, buf.data(), n) + ); + + std::copy(buf.begin(), std::next(buf.begin(), n), out); + return ret; +} + +} // namespace pisa diff --git a/src/codec/simple8b.cpp b/src/codec/simple8b.cpp new file mode 100644 index 00000000..38768823 --- /dev/null +++ b/src/codec/simple8b.cpp @@ -0,0 +1,26 @@ +#include "codec/simple8b.hpp" + +namespace pisa { + +void Simple8bBlockCodec::encode( + uint32_t const* in, uint32_t sum_of_values, size_t n, std::vector& out +) const { + assert(n <= m_block_size); + thread_local FastPForLib::Simple8b codec; + thread_local std::array buf{}; + size_t out_len = buf.size(); + codec.encodeArray(in, n, reinterpret_cast(buf.data()), out_len); + out_len *= 4; + out.insert(out.end(), buf.data(), buf.data() + out_len); +} + +uint8_t const* +Simple8bBlockCodec::decode(uint8_t const* in, uint32_t* out, uint32_t sum_of_values, size_t n) const { + assert(n <= m_block_size); + FastPForLib::Simple8b codec; + return reinterpret_cast( + codec.decodeArray(reinterpret_cast(in), 8 * n, out, n) + ); +} + +} // namespace pisa diff --git a/src/codec/streamvbyte.cpp b/src/codec/streamvbyte.cpp new file mode 100644 index 00000000..270763e3 --- /dev/null +++ b/src/codec/streamvbyte.cpp @@ -0,0 +1,22 @@ +#include "codec/streamvbyte.hpp" + +namespace pisa { + +void StreamVByteBlockCodec::encode( + uint32_t const* in, uint32_t /* sum_of_values */, size_t n, std::vector& out +) const { + assert(n <= m_block_size); + auto* src = const_cast(in); + thread_local std::array buf{}; + size_t out_len = streamvbyte_encode(src, n, buf.data()); + out.insert(out.end(), buf.data(), buf.data() + out_len); +} +uint8_t const* StreamVByteBlockCodec::decode( + uint8_t const* in, uint32_t* out, uint32_t /* sum_of_values */, size_t n +) const { + assert(n <= m_block_size); + auto read = streamvbyte_decode(in, out, n); + return in + read; +} + +} // namespace pisa diff --git a/src/codec/varint_g8iu.cpp b/src/codec/varint_g8iu.cpp new file mode 100644 index 00000000..49fb2595 --- /dev/null +++ b/src/codec/varint_g8iu.cpp @@ -0,0 +1,90 @@ +#include + +#include "codec/block_codecs.hpp" +#include "codec/varint_g8iu.hpp" +#include "util/likely.hpp" + +namespace pisa { + +struct Codec: VarIntG8IU { + // rewritten version of decodeBlock optimized for when the output + // size is known rather than the input + // the buffers pointed by src and dst must be respectively at least + // 9 and 8 elements large + uint32_t decodeBlock(uint8_t const*& src, uint32_t* dst) const { + uint8_t desc = *src; + src += 1; + const __m128i data = _mm_lddqu_si128(reinterpret_cast<__m128i const*>(src)); + src += 8; + const __m128i result = _mm_shuffle_epi8(data, vecmask[desc][0]); + _mm_storeu_si128(reinterpret_cast<__m128i*>(dst), result); + int readSize = maskOutputSize[desc]; + + if (readSize > 4) { + const __m128i result2 = + _mm_shuffle_epi8(data, vecmask[desc][1]); //__builtin_ia32_pshufb128(data, + // shf2); + _mm_storeu_si128(reinterpret_cast<__m128i*>(dst + 4), result2); + //__builtin_ia32_storedqu(dst + //+ (16), result2); + } + + return readSize; + } +}; + +void VarintG8IUBlockCodec::encode( + uint32_t const* in, uint32_t sum_of_values, size_t n, std::vector& out +) const { + thread_local Codec varint_codec; + thread_local std::array buf{}; + assert(n <= m_block_size); + + if (n < m_block_size) { + interpolative_block::encode(in, sum_of_values, n, out); + return; + } + + size_t out_len = buf.size(); + + const uint32_t* src = in; + unsigned char* dst = buf.data(); + size_t srclen = n * 4; + size_t dstlen = out_len; + out_len = 0; + while (srclen > 0 && dstlen >= 9) { + out_len += varint_codec.encodeBlock(src, srclen, dst, dstlen); + } + assert(srclen == 0); + out.insert(out.end(), buf.data(), buf.data() + out_len); +} + +uint8_t const* +VarintG8IUBlockCodec::decode(uint8_t const* in, uint32_t* out, uint32_t sum_of_values, size_t n) const { + static Codec varint_codec; // decodeBlock is thread-safe + assert(n <= m_block_size); + + if PISA_UNLIKELY (n < m_block_size) { + return interpolative_block::decode(in, out, sum_of_values, n); + } + + size_t out_len = 0; + uint8_t const* src = in; + uint32_t* dst = out; + while (out_len <= (n - 8)) { + out_len += varint_codec.decodeBlock(src, dst + out_len); + } + + // decodeBlock can overshoot, so we decode the last blocks in a local buffer + while (out_len < n) { + uint32_t buf[8]; + size_t read = varint_codec.decodeBlock(src, buf); + size_t needed = std::min(read, n - out_len); + memcpy(dst + out_len, buf, needed * 4); + out_len += needed; + } + assert(out_len == n); + return src; +} + +} // namespace pisa diff --git a/src/codec/varintgb.cpp b/src/codec/varintgb.cpp new file mode 100644 index 00000000..dd866047 --- /dev/null +++ b/src/codec/varintgb.cpp @@ -0,0 +1,30 @@ +#include "codec/varintgb.hpp" + +namespace pisa { + +void VarintGbBlockCodec::encode( + uint32_t const* in, uint32_t sum_of_values, size_t n, std::vector& out +) const { + thread_local VarIntGB varintgb_codec; + assert(n <= m_block_size); + if (n < m_block_size) { + interpolative_block::encode(in, sum_of_values, n, out); + return; + } + thread_local std::array buf{}; + size_t out_len = varintgb_codec.encodeArray(in, n, buf.data()); + out.insert(out.end(), buf.data(), buf.data() + out_len); +} + +uint8_t const* +VarintGbBlockCodec::decode(uint8_t const* in, uint32_t* out, uint32_t sum_of_values, size_t n) const { + thread_local VarIntGB varintgb_codec; + assert(n <= m_block_size); + if PISA_UNLIKELY (n < m_block_size) { + return interpolative_block::decode(in, out, sum_of_values, n); + } + auto read = varintgb_codec.decodeArray(in, n, out); + return read + in; +} + +} // namespace pisa diff --git a/tools/queries_dynamic.cpp b/tools/queries_dynamic.cpp index 7289723e..9f335033 100644 --- a/tools/queries_dynamic.cpp +++ b/tools/queries_dynamic.cpp @@ -17,11 +17,20 @@ #include "app.hpp" #include "block_inverted_index.hpp" #include "codec/block_codec.hpp" +#include "codec/interpolative.hpp" +#include "codec/maskedvbyte.hpp" +#include "codec/optpfor.hpp" +#include "codec/qmx.hpp" +#include "codec/simdbp.hpp" +#include "codec/simple16.hpp" +#include "codec/simple8b.hpp" +#include "codec/streamvbyte.hpp" +#include "codec/varint_g8iu.hpp" +#include "codec/varintgb.hpp" #include "cursor/block_max_scored_cursor.hpp" #include "cursor/cursor.hpp" #include "cursor/max_scored_cursor.hpp" #include "cursor/scored_cursor.hpp" -#include "index_types.hpp" #include "memory_source.hpp" #include "query/algorithm/and_query.hpp" #include "query/algorithm/block_max_maxscore_query.hpp" @@ -311,12 +320,36 @@ using wand_uniform_index = wand_data>; using wand_uniform_index_quantized = wand_data>; auto resolve_codec(std::string_view encoding) -> std::unique_ptr { + if (encoding == "block_interpolative") { + return std::make_unique(); + } + if (encoding == "block_maskedvbyte") { + return std::make_unique(); + } + if (encoding == "block_optpfor") { + return std::make_unique(); + } + if (encoding == "block_qmx") { + return std::make_unique(); + } if (encoding == "block_simdbp") { return std::make_unique(); } - if (encoding == "simple16_simdbp") { + if (encoding == "block_simple16") { return std::make_unique(); } + if (encoding == "block_simple8b") { + return std::make_unique(); + } + if (encoding == "block_streamvbyte") { + return std::make_unique(); + } + if (encoding == "block_varintg8iu") { + return std::make_unique(); + } + if (encoding == "block_varintgb") { + return std::make_unique(); + } throw std::domain_error("invalid encoding type"); }