-
-
Notifications
You must be signed in to change notification settings - Fork 66
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement all dynamic-dispatch block codecs
- Loading branch information
Showing
20 changed files
with
574 additions
and
29 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
#pragma once | ||
|
||
#include <vector> | ||
|
||
#include "codec/block_codec.hpp" | ||
|
||
namespace pisa { | ||
|
||
class InterpolativeBlockCodec: public BlockCodec { | ||
static constexpr std::uint64_t m_block_size = 128; | ||
|
||
public: | ||
void encode(uint32_t const* in, uint32_t sum_of_values, size_t n, std::vector<uint8_t>& out) const; | ||
uint8_t const* decode(uint8_t const* in, uint32_t* out, uint32_t sum_of_values, size_t n) const; | ||
auto block_size() const noexcept -> std::size_t { return m_block_size; } | ||
}; | ||
|
||
} // namespace pisa |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
#pragma once | ||
|
||
#include <vector> | ||
|
||
#include "FastPFor/headers/optpfor.h" | ||
|
||
#include "codec/block_codec.hpp" | ||
|
||
namespace pisa { | ||
|
||
class OptPForBlockCodec: public BlockCodec { | ||
struct Codec: FastPForLib::OPTPFor<4, FastPForLib::Simple16<false>> { | ||
uint8_t const* force_b{nullptr}; | ||
|
||
uint32_t findBestB(const uint32_t* in, uint32_t len) { | ||
// trick to force the choice of b from a parameter | ||
if (force_b != nullptr) { | ||
return *force_b; | ||
} | ||
|
||
// this is mostly a cut&paste from FastPFor, but we stop the | ||
// optimization early as the b to test becomes larger than maxb | ||
uint32_t b = 0; | ||
uint32_t bsize = std::numeric_limits<uint32_t>::max(); | ||
const uint32_t mb = FastPForLib::maxbits(in, in + len); | ||
uint32_t i = 0; | ||
while (mb > 28 + possLogs[i]) { | ||
++i; // some schemes such as Simple16 don't code numbers greater than 28 | ||
} | ||
|
||
for (; i < possLogs.size(); i++) { | ||
if (possLogs[i] > mb && possLogs[i] >= mb) { | ||
break; | ||
} | ||
const uint32_t csize = tryB(possLogs[i], in, len); | ||
|
||
if (csize <= bsize) { | ||
b = possLogs[i]; | ||
bsize = csize; | ||
} | ||
} | ||
return b; | ||
} | ||
}; | ||
|
||
static const uint64_t m_block_size = Codec::BlockSize; | ||
|
||
public: | ||
void encode(uint32_t const* in, uint32_t sum_of_values, size_t n, std::vector<uint8_t>& out) const; | ||
uint8_t const* decode(uint8_t const* in, uint32_t* out, uint32_t sum_of_values, size_t n) const; | ||
auto block_size() const noexcept -> std::size_t { return m_block_size; } | ||
}; | ||
|
||
} // namespace pisa |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
#pragma once | ||
|
||
#include <vector> | ||
|
||
#include "codec/block_codec.hpp" | ||
|
||
namespace pisa { | ||
|
||
class VarintG8IUBlockCodec: public BlockCodec { | ||
static const uint64_t m_block_size = 128; | ||
|
||
public: | ||
void encode(uint32_t const* in, uint32_t sum_of_values, size_t n, std::vector<uint8_t>& out) const; | ||
uint8_t const* decode(uint8_t const* in, uint32_t* out, uint32_t sum_of_values, size_t n) const; | ||
auto block_size() const noexcept -> std::size_t { return m_block_size; } | ||
}; | ||
|
||
} // namespace pisa |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
#include <cassert> | ||
#include <limits> | ||
|
||
#include "codec/block_codecs.hpp" | ||
#include "codec/interpolative.hpp" | ||
|
||
namespace pisa { | ||
|
||
void InterpolativeBlockCodec::encode( | ||
uint32_t const* in, uint32_t sum_of_values, size_t n, std::vector<uint8_t>& out | ||
) const { | ||
assert(n <= m_block_size); | ||
thread_local std::array<std::uint32_t, m_block_size> inbuf{}; | ||
thread_local std::vector<uint32_t> outbuf; // TODO: Can we use array? How long does it need | ||
// to be? | ||
inbuf[0] = *in; | ||
for (size_t i = 1; i < n; ++i) { | ||
inbuf[i] = inbuf[i - 1] + in[i]; | ||
} | ||
|
||
if (sum_of_values == uint32_t(-1)) { | ||
sum_of_values = inbuf[n - 1]; | ||
TightVariableByte::encode_single(sum_of_values, out); | ||
} | ||
|
||
bit_writer bw(outbuf); | ||
bw.write_interpolative(inbuf.data(), n - 1, 0, sum_of_values); | ||
auto const* bufptr = reinterpret_cast<uint8_t const*>(outbuf.data()); | ||
out.insert(out.end(), bufptr, bufptr + ceil_div(bw.size(), 8)); | ||
} | ||
|
||
uint8_t const* InterpolativeBlockCodec::decode( | ||
uint8_t const* in, uint32_t* out, uint32_t sum_of_values, size_t n | ||
) const { | ||
assert(n <= m_block_size); | ||
if (sum_of_values == std::numeric_limits<std::uint32_t>::max()) { | ||
in = TightVariableByte::decode(in, &sum_of_values, 1); | ||
} | ||
|
||
out[n - 1] = sum_of_values; | ||
size_t read_interpolative = 0; | ||
if (n > 1) { | ||
bit_reader br(in); | ||
br.read_interpolative(out, n - 1, 0, sum_of_values); | ||
for (size_t i = n - 1; i > 0; --i) { | ||
out[i] -= out[i - 1]; | ||
} | ||
read_interpolative = ceil_div(br.position(), 8); | ||
} | ||
|
||
return in + read_interpolative; | ||
} | ||
|
||
} // namespace pisa |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
#include "codec/maskedvbyte.hpp" | ||
|
||
namespace pisa { | ||
|
||
void MaskedVByteBlockCodec::encode( | ||
uint32_t const* in, uint32_t sum_of_values, size_t n, std::vector<uint8_t>& out | ||
) const { | ||
assert(n <= m_block_size); | ||
auto* src = const_cast<uint32_t*>(in); | ||
if (n < m_block_size) { | ||
interpolative_block::encode(src, sum_of_values, n, out); | ||
return; | ||
} | ||
thread_local std::array<std::uint8_t, 2 * m_block_size * sizeof(std::uint32_t)> buf{}; | ||
size_t out_len = vbyte_encode(src, n, buf.data()); | ||
out.insert(out.end(), buf.data(), buf.data() + out_len); | ||
} | ||
|
||
uint8_t const* | ||
MaskedVByteBlockCodec::decode(uint8_t const* in, uint32_t* out, uint32_t sum_of_values, size_t n) const { | ||
assert(n <= m_block_size); | ||
if PISA_UNLIKELY (n < m_block_size) { | ||
return interpolative_block::decode(in, out, sum_of_values, n); | ||
} | ||
auto read = masked_vbyte_decode(in, out, n); | ||
return in + read; | ||
} | ||
|
||
} // namespace pisa |
Oops, something went wrong.