Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Concepts #571

Merged
merged 3 commits into from
Feb 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ set(CMAKE_CXX_EXTENSIONS OFF)
if(NOT CMAKE_CXX_STANDARD EQUAL 17)
add_compile_definitions(PISA_ENABLE_CONCEPTS=1)
add_compile_definitions(PISA_CXX20=1)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fconcepts-diagnostics-depth=2")
endif()
add_compile_definitions(BOOST_NO_CXX98_FUNCTION_BASE=1)

Expand Down
5 changes: 5 additions & 0 deletions include/pisa/block_freq_index.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
#include "bit_vector.hpp"
#include "block_posting_list.hpp"
#include "codec/compact_elias_fano.hpp"
#include "concepts.hpp"
#include "concepts/inverted_index.hpp"
#include "mappable/mappable_vector.hpp"
#include "mappable/mapper.hpp"
#include "memory_source.hpp"
Expand Down Expand Up @@ -44,6 +46,9 @@ class block_freq_index {
* any index operations may result in undefined behavior.
*/
explicit block_freq_index(MemorySource source) : m_source(std::move(source)) {
PISA_ASSERT_CONCEPT((concepts::SortedInvertedIndex<
block_freq_index,
typename block_posting_list<BlockCodec, Profile>::document_enumerator>));
mapper::map(*this, m_source.data(), mapper::map_flags::warmup);
}

Expand Down
11 changes: 10 additions & 1 deletion include/pisa/block_posting_list.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#pragma once

#include "codec/block_codecs.hpp"
#include "concepts.hpp"
#include "concepts/posting_cursor.hpp"
#include "util/block_profiler.hpp"
#include "util/util.hpp"

Expand Down Expand Up @@ -86,6 +88,11 @@ struct block_posting_list {
m_block_endpoints(m_block_maxs + 4 * m_blocks),
m_blocks_data(m_block_endpoints + 4 * (m_blocks - 1)),
m_universe(universe) {
PISA_ASSERT_CONCEPT(
(concepts::FrequencyPostingCursor<document_enumerator>
&& concepts::SortedPostingCursor<document_enumerator>)
);

if (Profile) {
// std::cout << "OPEN\t" << m_term_id << "\t" << m_blocks << "\n";
m_block_profile = block_profiler::open_list(term_id, m_blocks);
Expand Down Expand Up @@ -159,9 +166,11 @@ struct block_posting_list {
return m_freqs_buf[m_pos_in_block] + 1;
}

uint64_t PISA_ALWAYSINLINE value() { return freq(); }

uint64_t position() const { return m_cur_block * BlockCodec::block_size + m_pos_in_block; }

uint64_t size() const { return m_n; }
uint64_t size() const noexcept { return m_n; }

uint64_t num_blocks() const { return m_blocks; }

Expand Down
39 changes: 39 additions & 0 deletions include/pisa/concepts/container.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@

// Copyright 2024 PISA developers
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// clang-format off

#pragma once

#ifdef PISA_ENABLE_CONCEPTS

#include <concepts>

namespace pisa::concepts {

/**
* Any container with a size.
*/
template <typename T>
concept SizedContainer = requires(T const container) {
/** Returns the number of posting lists in the index. */
{ container.size() } noexcept -> std::convertible_to<std::size_t>;
};

}; // namespace pisa

// clang-format on

#endif
52 changes: 52 additions & 0 deletions include/pisa/concepts/inverted_index.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// Copyright 2024 PISA developers
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

// clang-format off

#ifdef PISA_ENABLE_CONCEPTS

#include <concepts>
#include <cstdint>

#include "container.hpp"
#include "posting_cursor.hpp"

namespace pisa::concepts {

/**
* Inverted index is a collection of posting lists.
*/
template <typename T, typename Cursor>
concept InvertedIndex = PostingCursor<Cursor> && SizedContainer<T>
&& requires(T const i, std::uint32_t termid) {
/** Accesses a posting list via a cursor. */
{ i.operator[](termid) } -> std::same_as<Cursor>;

/** Returns the number of indexed documents. */
{ i.num_docs() } noexcept -> std::convertible_to<std::size_t>;
};

/**
* Inverted index that stores postings sorted by document IDs.
*/
template <typename T, typename Cursor>
concept SortedInvertedIndex = InvertedIndex<T, Cursor> && SortedPostingCursor<Cursor>;

}; // namespace pisa

// clang-format on

#endif
63 changes: 63 additions & 0 deletions include/pisa/concepts/mapping.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
// Copyright 2024 PISA developers
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// clang-format off

#pragma once

#ifdef PISA_ENABLE_CONCEPTS

#include <concepts>
#include <cstdint>
#include <optional>

namespace pisa::concepts {

/**
* Mapping from an integer to a payload value.
*
* One of the examples is a mapping from document ID to document title or URL.
*/
template <typename T, typename Payload>
concept Mapping = requires(T const map, std::uint32_t pos) {
/** Get payload at position `pos`. */
{ map[pos] } -> std::convertible_to<Payload>;

/** Returns the number of posting lists in the index. */
{ map.size() } noexcept -> std::convertible_to<std::size_t>;
};

/**
* Mapping from a payload value to ordinal ID.
*/
template <typename T, typename Payload>
concept ReverseMapping = requires(T const map, Payload payload) {
/** Get the position of the given payload. */
{ map.find(payload) } -> std::convertible_to<std::optional<std::uint32_t>>;
};

/**
* Mapping from an integer to a payload value and back.
*
* One of the examples is a term lexicon, which maps from term IDs to terms and back.
* The backwards mapping can be used to look up term IDs after parsing a query to term tokens.
*/
template <typename T, typename Payload>
concept BidirectionalMapping = Mapping<T, Payload> && ReverseMapping<T, Payload>;

}; // namespace pisa

// clang-format on

#endif
99 changes: 99 additions & 0 deletions include/pisa/concepts/posting_cursor.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
// Copyright 2024 PISA developers
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// clang-format off

#pragma once

#ifdef PISA_ENABLE_CONCEPTS

#include <concepts>
#include <cstdint>

#include "container.hpp"
#include "type_alias.hpp"

namespace pisa::concepts {

/**
* A posting cursor iterates over a posting list.
*/
template <typename C>
concept PostingCursor = SizedContainer<C> && requires(C const &cursor)
{
/** Returns the document ID at the current position. */
{ cursor.docid() } -> std::convertible_to<std::uint32_t>;
} && requires(C cursor) {
/** Moves the cursor to the next position. */
cursor.next();
};

/**
* A posting cursor returning a score.
*/
template <typename C>
concept FrequencyPostingCursor = PostingCursor<C> && requires(C cursor) {
/** Returns the value of the payload. */
{ cursor.freq() } -> std::convertible_to<std::uint32_t>;
};

/**
* A posting cursor returning a score.
*/
template <typename C>
concept ScoredPostingCursor = PostingCursor<C> && requires(C cursor) {
/** Returns the value of the payload. */
{ cursor.score() } -> std::convertible_to<Score>;
};

/**
* A cursor over a posting list that stores postings in increasing order of document IDs.
*/
template <typename C>
concept SortedPostingCursor = PostingCursor<C>
&& requires(C cursor, std::uint32_t docid) {
/**
* Moves the cursor to the next position at which the document ID is at least `docid`.
* If the current ID already satisfies this condition, the cursor will not move. It will
* never move backwards.
*/
cursor.next_geq(docid);
};

/**
* A posting cursor with max score.
*/
template <typename C>
concept MaxScorePostingCursor = ScoredPostingCursor<C> && requires(C const& cursor) {
/** Returns the max score of the entire list. */
{ cursor.max_score() } noexcept -> std::convertible_to<Score>;
};

/**
* A posting cursor with block-max scores.
*/
template <typename C>
concept BlockMaxPostingCursor = MaxScorePostingCursor<C> && SortedPostingCursor<C>
&& requires(C cursor) {
/** Returns the max highest docid of the current block. */
{ cursor.block_max_docid() } -> std::convertible_to<DocId>;
/** Returns the max score of the current block. */
{ cursor.block_max_score() } -> std::convertible_to<Score>;
};

}; // namespace pisa

// clang-format on

#endif
5 changes: 4 additions & 1 deletion include/pisa/cursor/block_max_scored_cursor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
namespace pisa {

template <typename Cursor, typename Wand>
PISA_REQUIRES((concepts::FrequencyPostingCursor<Cursor> && concepts::SortedPostingCursor<Cursor>))
class BlockMaxScoredCursor: public MaxScoredCursor<Cursor> {
public:
using base_cursor_type = Cursor;
Expand All @@ -22,7 +23,9 @@ class BlockMaxScoredCursor: public MaxScoredCursor<Cursor> {
typename Wand::wand_data_enumerator wdata
)
: MaxScoredCursor<Cursor>(std::move(cursor), std::move(term_scorer), weight, max_score),
m_wdata(std::move(wdata)) {}
m_wdata(std::move(wdata)) {
PISA_ASSERT_CONCEPT((concepts::BlockMaxPostingCursor<BlockMaxScoredCursor>));
}
BlockMaxScoredCursor(BlockMaxScoredCursor const&) = delete;
BlockMaxScoredCursor(BlockMaxScoredCursor&&) = default;
BlockMaxScoredCursor& operator=(BlockMaxScoredCursor const&) = delete;
Expand Down
8 changes: 7 additions & 1 deletion include/pisa/cursor/max_scored_cursor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,19 @@
namespace pisa {

template <typename Cursor>
PISA_REQUIRES((concepts::FrequencyPostingCursor<Cursor> && concepts::SortedPostingCursor<Cursor>))
class MaxScoredCursor: public ScoredCursor<Cursor> {
public:
using base_cursor_type = Cursor;

MaxScoredCursor(Cursor cursor, TermScorer term_scorer, float weight, float max_score)
: ScoredCursor<Cursor>(std::move(cursor), std::move(term_scorer), weight),
m_max_score(max_score) {}
m_max_score(max_score) {
PISA_ASSERT_CONCEPT(
(concepts::MaxScorePostingCursor<MaxScoredCursor>
&& concepts::SortedPostingCursor<MaxScoredCursor>)
);
}
MaxScoredCursor(MaxScoredCursor const&) = delete;
MaxScoredCursor(MaxScoredCursor&&) = default;
MaxScoredCursor& operator=(MaxScoredCursor const&) = delete;
Expand Down
14 changes: 12 additions & 2 deletions include/pisa/cursor/scored_cursor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

#include <vector>

#include "concepts.hpp"
#include "concepts/posting_cursor.hpp"
#include "query.hpp"
#include "scorer/index_scorer.hpp"
#include "util/compiler_attribute.hpp"
Expand All @@ -18,14 +20,20 @@ auto resolve_term_scorer(Scorer scorer, float weight) -> TermScorer {
}

template <typename Cursor>
PISA_REQUIRES((concepts::FrequencyPostingCursor<Cursor> && concepts::SortedPostingCursor<Cursor>))
class ScoredCursor {
public:
using base_cursor_type = Cursor;

ScoredCursor(Cursor cursor, TermScorer term_scorer, float weight)
: m_base_cursor(std::move(cursor)),
m_weight(weight),
m_term_scorer(resolve_term_scorer(term_scorer, weight)) {}
m_term_scorer(resolve_term_scorer(term_scorer, weight)) {
PISA_ASSERT_CONCEPT(
(concepts::ScoredPostingCursor<ScoredCursor>
&& concepts::SortedPostingCursor<ScoredCursor>)
);
}
ScoredCursor(ScoredCursor const&) = delete;
ScoredCursor(ScoredCursor&&) = default;
ScoredCursor& operator=(ScoredCursor const&) = delete;
Expand All @@ -40,7 +48,9 @@ class ScoredCursor {
[[nodiscard]] PISA_ALWAYSINLINE auto score() -> float { return m_term_scorer(docid(), freq()); }
void PISA_ALWAYSINLINE next() { m_base_cursor.next(); }
void PISA_ALWAYSINLINE next_geq(std::uint32_t docid) { m_base_cursor.next_geq(docid); }
[[nodiscard]] PISA_ALWAYSINLINE auto size() -> std::size_t { return m_base_cursor.size(); }
[[nodiscard]] PISA_ALWAYSINLINE auto size() const noexcept -> std::size_t {
return m_base_cursor.size();
}

private:
Cursor m_base_cursor;
Expand Down
Loading
Loading