diff --git a/CMakeLists.txt b/CMakeLists.txt index 07f82b48..53ad3338 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,6 +12,7 @@ set(CMAKE_CXX_EXTENSIONS OFF) if(NOT CMAKE_CXX_STANDARD EQUAL 17) add_compile_definitions(PISA_ENABLE_CONCEPTS=1) add_compile_definitions(PISA_CXX20=1) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fconcepts-diagnostics-depth=2") endif() add_compile_definitions(BOOST_NO_CXX98_FUNCTION_BASE=1) diff --git a/include/pisa/block_freq_index.hpp b/include/pisa/block_freq_index.hpp index 6bf1a552..eedfe4e6 100644 --- a/include/pisa/block_freq_index.hpp +++ b/include/pisa/block_freq_index.hpp @@ -8,6 +8,8 @@ #include "bit_vector.hpp" #include "block_posting_list.hpp" #include "codec/compact_elias_fano.hpp" +#include "concepts.hpp" +#include "concepts/inverted_index.hpp" #include "mappable/mappable_vector.hpp" #include "mappable/mapper.hpp" #include "memory_source.hpp" @@ -44,6 +46,9 @@ class block_freq_index { * any index operations may result in undefined behavior. */ explicit block_freq_index(MemorySource source) : m_source(std::move(source)) { + PISA_ASSERT_CONCEPT((concepts::SortedInvertedIndex< + block_freq_index, + typename block_posting_list::document_enumerator>)); mapper::map(*this, m_source.data(), mapper::map_flags::warmup); } diff --git a/include/pisa/block_posting_list.hpp b/include/pisa/block_posting_list.hpp index 25e8462a..7ff21809 100644 --- a/include/pisa/block_posting_list.hpp +++ b/include/pisa/block_posting_list.hpp @@ -1,6 +1,8 @@ #pragma once #include "codec/block_codecs.hpp" +#include "concepts.hpp" +#include "concepts/posting_cursor.hpp" #include "util/block_profiler.hpp" #include "util/util.hpp" @@ -86,6 +88,11 @@ struct block_posting_list { m_block_endpoints(m_block_maxs + 4 * m_blocks), m_blocks_data(m_block_endpoints + 4 * (m_blocks - 1)), m_universe(universe) { + PISA_ASSERT_CONCEPT( + (concepts::FrequencyPostingCursor + && concepts::SortedPostingCursor) + ); + if (Profile) { // std::cout << "OPEN\t" << m_term_id << "\t" << m_blocks << "\n"; m_block_profile = block_profiler::open_list(term_id, m_blocks); @@ -159,9 +166,11 @@ struct block_posting_list { return m_freqs_buf[m_pos_in_block] + 1; } + uint64_t PISA_ALWAYSINLINE value() { return freq(); } + uint64_t position() const { return m_cur_block * BlockCodec::block_size + m_pos_in_block; } - uint64_t size() const { return m_n; } + uint64_t size() const noexcept { return m_n; } uint64_t num_blocks() const { return m_blocks; } diff --git a/include/pisa/concepts/container.hpp b/include/pisa/concepts/container.hpp new file mode 100644 index 00000000..e55907df --- /dev/null +++ b/include/pisa/concepts/container.hpp @@ -0,0 +1,39 @@ + +// Copyright 2024 PISA developers +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// clang-format off + +#pragma once + +#ifdef PISA_ENABLE_CONCEPTS + +#include + +namespace pisa::concepts { + +/** + * Any container with a size. + */ +template +concept SizedContainer = requires(T const container) { + /** Returns the number of posting lists in the index. */ + { container.size() } noexcept -> std::convertible_to; +}; + +}; // namespace pisa + +// clang-format on + +#endif diff --git a/include/pisa/concepts/inverted_index.hpp b/include/pisa/concepts/inverted_index.hpp new file mode 100644 index 00000000..bb5d3bcb --- /dev/null +++ b/include/pisa/concepts/inverted_index.hpp @@ -0,0 +1,52 @@ +// Copyright 2024 PISA developers +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +// clang-format off + +#ifdef PISA_ENABLE_CONCEPTS + +#include +#include + +#include "container.hpp" +#include "posting_cursor.hpp" + +namespace pisa::concepts { + +/** + * Inverted index is a collection of posting lists. + */ +template +concept InvertedIndex = PostingCursor && SizedContainer +&& requires(T const i, std::uint32_t termid) { + /** Accesses a posting list via a cursor. */ + { i.operator[](termid) } -> std::same_as; + + /** Returns the number of indexed documents. */ + { i.num_docs() } noexcept -> std::convertible_to; +}; + +/** + * Inverted index that stores postings sorted by document IDs. + */ +template +concept SortedInvertedIndex = InvertedIndex && SortedPostingCursor; + +}; // namespace pisa + +// clang-format on + +#endif diff --git a/include/pisa/concepts/mapping.hpp b/include/pisa/concepts/mapping.hpp new file mode 100644 index 00000000..23d373ff --- /dev/null +++ b/include/pisa/concepts/mapping.hpp @@ -0,0 +1,63 @@ +// Copyright 2024 PISA developers +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// clang-format off + +#pragma once + +#ifdef PISA_ENABLE_CONCEPTS + +#include +#include +#include + +namespace pisa::concepts { + +/** + * Mapping from an integer to a payload value. + * + * One of the examples is a mapping from document ID to document title or URL. + */ +template +concept Mapping = requires(T const map, std::uint32_t pos) { + /** Get payload at position `pos`. */ + { map[pos] } -> std::convertible_to; + + /** Returns the number of posting lists in the index. */ + { map.size() } noexcept -> std::convertible_to; +}; + +/** + * Mapping from a payload value to ordinal ID. + */ +template +concept ReverseMapping = requires(T const map, Payload payload) { + /** Get the position of the given payload. */ + { map.find(payload) } -> std::convertible_to>; +}; + +/** + * Mapping from an integer to a payload value and back. + * + * One of the examples is a term lexicon, which maps from term IDs to terms and back. + * The backwards mapping can be used to look up term IDs after parsing a query to term tokens. + */ +template +concept BidirectionalMapping = Mapping && ReverseMapping; + +}; // namespace pisa + +// clang-format on + +#endif diff --git a/include/pisa/concepts/posting_cursor.hpp b/include/pisa/concepts/posting_cursor.hpp new file mode 100644 index 00000000..26dbb1e0 --- /dev/null +++ b/include/pisa/concepts/posting_cursor.hpp @@ -0,0 +1,99 @@ +// Copyright 2024 PISA developers +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// clang-format off + +#pragma once + +#ifdef PISA_ENABLE_CONCEPTS + +#include +#include + +#include "container.hpp" +#include "type_alias.hpp" + +namespace pisa::concepts { + +/** + * A posting cursor iterates over a posting list. + */ +template +concept PostingCursor = SizedContainer && requires(C const &cursor) +{ + /** Returns the document ID at the current position. */ + { cursor.docid() } -> std::convertible_to; +} && requires(C cursor) { + /** Moves the cursor to the next position. */ + cursor.next(); +}; + +/** + * A posting cursor returning a score. + */ +template +concept FrequencyPostingCursor = PostingCursor && requires(C cursor) { + /** Returns the value of the payload. */ + { cursor.freq() } -> std::convertible_to; +}; + +/** + * A posting cursor returning a score. + */ +template +concept ScoredPostingCursor = PostingCursor && requires(C cursor) { + /** Returns the value of the payload. */ + { cursor.score() } -> std::convertible_to; +}; + +/** + * A cursor over a posting list that stores postings in increasing order of document IDs. + */ +template +concept SortedPostingCursor = PostingCursor +&& requires(C cursor, std::uint32_t docid) { + /** + * Moves the cursor to the next position at which the document ID is at least `docid`. + * If the current ID already satisfies this condition, the cursor will not move. It will + * never move backwards. + */ + cursor.next_geq(docid); +}; + +/** + * A posting cursor with max score. + */ +template +concept MaxScorePostingCursor = ScoredPostingCursor && requires(C const& cursor) { + /** Returns the max score of the entire list. */ + { cursor.max_score() } noexcept -> std::convertible_to; +}; + +/** + * A posting cursor with block-max scores. + */ +template +concept BlockMaxPostingCursor = MaxScorePostingCursor && SortedPostingCursor +&& requires(C cursor) { + /** Returns the max highest docid of the current block. */ + { cursor.block_max_docid() } -> std::convertible_to; + /** Returns the max score of the current block. */ + { cursor.block_max_score() } -> std::convertible_to; +}; + +}; // namespace pisa + +// clang-format on + +#endif diff --git a/include/pisa/cursor/block_max_scored_cursor.hpp b/include/pisa/cursor/block_max_scored_cursor.hpp index 30996815..f2e82312 100644 --- a/include/pisa/cursor/block_max_scored_cursor.hpp +++ b/include/pisa/cursor/block_max_scored_cursor.hpp @@ -10,6 +10,7 @@ namespace pisa { template +PISA_REQUIRES((concepts::FrequencyPostingCursor && concepts::SortedPostingCursor)) class BlockMaxScoredCursor: public MaxScoredCursor { public: using base_cursor_type = Cursor; @@ -22,7 +23,9 @@ class BlockMaxScoredCursor: public MaxScoredCursor { typename Wand::wand_data_enumerator wdata ) : MaxScoredCursor(std::move(cursor), std::move(term_scorer), weight, max_score), - m_wdata(std::move(wdata)) {} + m_wdata(std::move(wdata)) { + PISA_ASSERT_CONCEPT((concepts::BlockMaxPostingCursor)); + } BlockMaxScoredCursor(BlockMaxScoredCursor const&) = delete; BlockMaxScoredCursor(BlockMaxScoredCursor&&) = default; BlockMaxScoredCursor& operator=(BlockMaxScoredCursor const&) = delete; diff --git a/include/pisa/cursor/max_scored_cursor.hpp b/include/pisa/cursor/max_scored_cursor.hpp index af05ef2c..70b02dc9 100644 --- a/include/pisa/cursor/max_scored_cursor.hpp +++ b/include/pisa/cursor/max_scored_cursor.hpp @@ -9,13 +9,19 @@ namespace pisa { template +PISA_REQUIRES((concepts::FrequencyPostingCursor && concepts::SortedPostingCursor)) class MaxScoredCursor: public ScoredCursor { public: using base_cursor_type = Cursor; MaxScoredCursor(Cursor cursor, TermScorer term_scorer, float weight, float max_score) : ScoredCursor(std::move(cursor), std::move(term_scorer), weight), - m_max_score(max_score) {} + m_max_score(max_score) { + PISA_ASSERT_CONCEPT( + (concepts::MaxScorePostingCursor + && concepts::SortedPostingCursor) + ); + } MaxScoredCursor(MaxScoredCursor const&) = delete; MaxScoredCursor(MaxScoredCursor&&) = default; MaxScoredCursor& operator=(MaxScoredCursor const&) = delete; diff --git a/include/pisa/cursor/scored_cursor.hpp b/include/pisa/cursor/scored_cursor.hpp index 8e3c797a..c7250109 100644 --- a/include/pisa/cursor/scored_cursor.hpp +++ b/include/pisa/cursor/scored_cursor.hpp @@ -2,6 +2,8 @@ #include +#include "concepts.hpp" +#include "concepts/posting_cursor.hpp" #include "query.hpp" #include "scorer/index_scorer.hpp" #include "util/compiler_attribute.hpp" @@ -18,6 +20,7 @@ auto resolve_term_scorer(Scorer scorer, float weight) -> TermScorer { } template +PISA_REQUIRES((concepts::FrequencyPostingCursor && concepts::SortedPostingCursor)) class ScoredCursor { public: using base_cursor_type = Cursor; @@ -25,7 +28,12 @@ class ScoredCursor { ScoredCursor(Cursor cursor, TermScorer term_scorer, float weight) : m_base_cursor(std::move(cursor)), m_weight(weight), - m_term_scorer(resolve_term_scorer(term_scorer, weight)) {} + m_term_scorer(resolve_term_scorer(term_scorer, weight)) { + PISA_ASSERT_CONCEPT( + (concepts::ScoredPostingCursor + && concepts::SortedPostingCursor) + ); + } ScoredCursor(ScoredCursor const&) = delete; ScoredCursor(ScoredCursor&&) = default; ScoredCursor& operator=(ScoredCursor const&) = delete; @@ -40,7 +48,9 @@ class ScoredCursor { [[nodiscard]] PISA_ALWAYSINLINE auto score() -> float { return m_term_scorer(docid(), freq()); } void PISA_ALWAYSINLINE next() { m_base_cursor.next(); } void PISA_ALWAYSINLINE next_geq(std::uint32_t docid) { m_base_cursor.next_geq(docid); } - [[nodiscard]] PISA_ALWAYSINLINE auto size() -> std::size_t { return m_base_cursor.size(); } + [[nodiscard]] PISA_ALWAYSINLINE auto size() const noexcept -> std::size_t { + return m_base_cursor.size(); + } private: Cursor m_base_cursor; diff --git a/include/pisa/freq_index.hpp b/include/pisa/freq_index.hpp index 65877244..7763196f 100644 --- a/include/pisa/freq_index.hpp +++ b/include/pisa/freq_index.hpp @@ -4,8 +4,10 @@ #include #include "bitvector_collection.hpp" -#include "codec/compact_elias_fano.hpp" #include "codec/integer_codes.hpp" +#include "concepts.hpp" +#include "concepts/inverted_index.hpp" +#include "concepts/posting_cursor.hpp" #include "global_parameters.hpp" #include "mappable/mapper.hpp" #include "memory_source.hpp" @@ -41,6 +43,9 @@ class freq_index { * any index operations may result in undefined behavior. */ explicit freq_index(MemorySource source) : m_source(std::move(source)) { + PISA_ASSERT_CONCEPT( + (concepts::SortedInvertedIndex) + ); mapper::map(*this, m_source.data(), mapper::map_flags::warmup); } @@ -157,9 +162,11 @@ class freq_index { uint64_t PISA_FLATTEN_FUNC freq() { return m_freqs_enum.move(m_cur_pos).second; } + uint64_t PISA_FLATTEN_FUNC value() { return freq(); } + uint64_t position() const { return m_cur_pos; } - uint64_t size() const { return m_docs_enum.size(); } + uint64_t size() const noexcept { return m_docs_enum.size(); } typename DocsSequence::enumerator const& docs_enum() const { return m_docs_enum; } @@ -172,6 +179,10 @@ class freq_index { typename DocsSequence::enumerator docs_enum, typename FreqsSequence::enumerator freqs_enum ) : m_docs_enum(docs_enum), m_freqs_enum(freqs_enum) { + PISA_ASSERT_CONCEPT( + (concepts::FrequencyPostingCursor + && concepts::SortedPostingCursor) + ); reset(); } diff --git a/include/pisa/query/algorithm/and_query.hpp b/include/pisa/query/algorithm/and_query.hpp index 9f8ea1f4..5c8b0bde 100644 --- a/include/pisa/query/algorithm/and_query.hpp +++ b/include/pisa/query/algorithm/and_query.hpp @@ -4,6 +4,9 @@ #include #include +#include "concepts.hpp" +#include "concepts/posting_cursor.hpp" + namespace pisa { /** @@ -15,6 +18,7 @@ namespace pisa { */ struct and_query { template + PISA_REQUIRES((concepts::SortedPostingCursor)) auto operator()(CursorRange&& cursors, uint32_t max_docid) const { using Cursor = typename std::decay_t::value_type; diff --git a/include/pisa/query/algorithm/block_max_maxscore_query.hpp b/include/pisa/query/algorithm/block_max_maxscore_query.hpp index 1dcc036c..7e2f9bc3 100644 --- a/include/pisa/query/algorithm/block_max_maxscore_query.hpp +++ b/include/pisa/query/algorithm/block_max_maxscore_query.hpp @@ -2,6 +2,8 @@ #include +#include "concepts.hpp" +#include "concepts/posting_cursor.hpp" #include "topk_queue.hpp" namespace pisa { @@ -10,6 +12,7 @@ struct block_max_maxscore_query { explicit block_max_maxscore_query(topk_queue& topk) : m_topk(topk) {} template + PISA_REQUIRES((concepts::BlockMaxPostingCursor>)) void operator()(CursorRange&& cursors, uint64_t max_docid) { using Cursor = typename std::decay_t::value_type; if (cursors.empty()) { diff --git a/include/pisa/query/algorithm/block_max_ranked_and_query.hpp b/include/pisa/query/algorithm/block_max_ranked_and_query.hpp index 6f5cd9d7..d5a133c1 100644 --- a/include/pisa/query/algorithm/block_max_ranked_and_query.hpp +++ b/include/pisa/query/algorithm/block_max_ranked_and_query.hpp @@ -2,6 +2,8 @@ #include +#include "concepts.hpp" +#include "concepts/posting_cursor.hpp" #include "topk_queue.hpp" namespace pisa { @@ -10,6 +12,7 @@ struct block_max_ranked_and_query { explicit block_max_ranked_and_query(topk_queue& topk) : m_topk(topk) {} template + PISA_REQUIRES(concepts::BlockMaxPostingCursor>) void operator()(CursorRange&& cursors, uint64_t max_docid) { using Cursor = typename std::decay_t::value_type; diff --git a/include/pisa/query/algorithm/block_max_wand_query.hpp b/include/pisa/query/algorithm/block_max_wand_query.hpp index fa43dd93..a7b374f6 100644 --- a/include/pisa/query/algorithm/block_max_wand_query.hpp +++ b/include/pisa/query/algorithm/block_max_wand_query.hpp @@ -2,6 +2,8 @@ #include +#include "concepts.hpp" +#include "concepts/posting_cursor.hpp" #include "topk_queue.hpp" namespace pisa { @@ -10,6 +12,7 @@ struct block_max_wand_query { explicit block_max_wand_query(topk_queue& topk) : m_topk(topk) {} template + PISA_REQUIRES(concepts::BlockMaxPostingCursor>) void operator()(CursorRange&& cursors, uint64_t max_docid) { using Cursor = typename std::decay_t::value_type; if (cursors.empty()) { diff --git a/include/pisa/query/algorithm/maxscore_query.hpp b/include/pisa/query/algorithm/maxscore_query.hpp index c68edb6a..d18e7e5a 100644 --- a/include/pisa/query/algorithm/maxscore_query.hpp +++ b/include/pisa/query/algorithm/maxscore_query.hpp @@ -5,6 +5,8 @@ #include #include +#include "concepts.hpp" +#include "concepts/posting_cursor.hpp" #include "topk_queue.hpp" #include "util/compiler_attribute.hpp" @@ -14,14 +16,18 @@ struct maxscore_query { explicit maxscore_query(topk_queue& topk) : m_topk(topk) {} template + PISA_REQUIRES( + (concepts::MaxScorePostingCursor> + && concepts::SortedPostingCursor>) + ) [[nodiscard]] PISA_ALWAYSINLINE auto sorted(Cursors&& cursors) - -> std::vector::value_type> { + -> std::vector> { std::vector term_positions(cursors.size()); std::iota(term_positions.begin(), term_positions.end(), 0); std::sort(term_positions.begin(), term_positions.end(), [&](auto&& lhs, auto&& rhs) { return cursors[lhs].max_score() > cursors[rhs].max_score(); }); - std::vector::value_type> sorted; + std::vector> sorted; for (auto pos: term_positions) { sorted.push_back(std::move(cursors[pos])); }; @@ -29,6 +35,7 @@ struct maxscore_query { } template + PISA_REQUIRES((concepts::MaxScorePostingCursor>)) [[nodiscard]] PISA_ALWAYSINLINE auto calc_upper_bounds(Cursors&& cursors) -> std::vector { std::vector upper_bounds(cursors.size()); auto out = upper_bounds.rbegin(); @@ -41,6 +48,7 @@ struct maxscore_query { } template + PISA_REQUIRES((concepts::MaxScorePostingCursor>)) [[nodiscard]] PISA_ALWAYSINLINE auto min_docid(Cursors&& cursors) -> std::uint32_t { return std::min_element( cursors.begin(), @@ -53,6 +61,7 @@ struct maxscore_query { enum class DocumentStatus : bool { Insert, Skip }; template + PISA_REQUIRES((concepts::MaxScorePostingCursor>)) PISA_ALWAYSINLINE void run_sorted(Cursors&& cursors, uint64_t max_docid) { auto upper_bounds = calc_upper_bounds(cursors); auto above_threshold = [&](auto score) { return m_topk.would_enter(score); }; @@ -122,6 +131,7 @@ struct maxscore_query { } template + PISA_REQUIRES((concepts::MaxScorePostingCursor>)) void operator()(Cursors&& cursors_, uint64_t max_docid) { if (cursors_.empty()) { return; diff --git a/include/pisa/query/algorithm/or_query.hpp b/include/pisa/query/algorithm/or_query.hpp index 41604eb7..d38213d7 100644 --- a/include/pisa/query/algorithm/or_query.hpp +++ b/include/pisa/query/algorithm/or_query.hpp @@ -4,6 +4,8 @@ #include #include +#include "concepts.hpp" +#include "concepts/posting_cursor.hpp" #include "util/do_not_optimize_away.hpp" namespace pisa { @@ -11,6 +13,7 @@ namespace pisa { template struct or_query { template + PISA_REQUIRES((concepts::SortedPostingCursor)) uint64_t operator()(CursorRange&& cursors, uint64_t max_docid) const { using Cursor = typename std::decay_t::value_type; if (cursors.empty()) { diff --git a/include/pisa/query/algorithm/range_query.hpp b/include/pisa/query/algorithm/range_query.hpp index 8b9ae280..70ddc48c 100644 --- a/include/pisa/query/algorithm/range_query.hpp +++ b/include/pisa/query/algorithm/range_query.hpp @@ -1,5 +1,7 @@ #pragma once +#include "concepts.hpp" +#include "concepts/posting_cursor.hpp" #include "topk_queue.hpp" namespace pisa { @@ -9,6 +11,7 @@ struct range_query { explicit range_query(topk_queue& topk) : m_topk(topk) {} template + PISA_REQUIRES((concepts::MaxScorePostingCursor::value_type>)) void operator()(CursorRange&& cursors, uint64_t max_docid, size_t range_size) { m_topk.clear(); if (cursors.empty()) { @@ -24,6 +27,7 @@ struct range_query { std::vector const& topk() const { return m_topk.topk(); } template + PISA_REQUIRES((concepts::MaxScorePostingCursor::value_type>)) void process_range(CursorRange&& cursors, size_t end) { QueryAlg query_alg(m_topk); query_alg(cursors, end); diff --git a/include/pisa/query/algorithm/range_taat_query.hpp b/include/pisa/query/algorithm/range_taat_query.hpp index cd4aa774..6fc08d3b 100644 --- a/include/pisa/query/algorithm/range_taat_query.hpp +++ b/include/pisa/query/algorithm/range_taat_query.hpp @@ -2,6 +2,7 @@ #include "accumulator/partial_score_accumulator.hpp" #include "concepts.hpp" +#include "concepts/posting_cursor.hpp" #include "topk_queue.hpp" namespace pisa { @@ -11,8 +12,12 @@ struct range_taat_query { explicit range_taat_query(topk_queue& topk) : m_topk(topk) {} template - PISA_REQUIRES(PartialScoreAccumulator) - void operator()(CursorRange&& cursors, uint64_t max_docid, size_t range_size, Acc&& accumulator) { + PISA_REQUIRES( + (PartialScoreAccumulator + && pisa::concepts::MaxScorePostingCursor::value_type>) + ) + void + operator()(CursorRange&& cursors, uint64_t max_docid, size_t range_size, Acc&& accumulator) { if (cursors.empty()) { return; } diff --git a/include/pisa/query/algorithm/ranked_and_query.hpp b/include/pisa/query/algorithm/ranked_and_query.hpp index 318ca9f0..c79cffff 100644 --- a/include/pisa/query/algorithm/ranked_and_query.hpp +++ b/include/pisa/query/algorithm/ranked_and_query.hpp @@ -2,6 +2,8 @@ #include +#include "concepts.hpp" +#include "concepts/posting_cursor.hpp" #include "topk_queue.hpp" namespace pisa { @@ -10,7 +12,12 @@ struct ranked_and_query { explicit ranked_and_query(topk_queue& topk) : m_topk(topk) {} template - void operator()(CursorRange&& cursors, uint64_t max_docid) { + PISA_REQUIRES( + (concepts::ScoredPostingCursor> + && concepts::SortedPostingCursor>) + ) + void + operator()(CursorRange&& cursors, uint64_t max_docid) { using Cursor = typename std::decay_t::value_type; if (cursors.empty()) { return; diff --git a/include/pisa/query/algorithm/ranked_or_query.hpp b/include/pisa/query/algorithm/ranked_or_query.hpp index f10583e3..95c31db2 100644 --- a/include/pisa/query/algorithm/ranked_or_query.hpp +++ b/include/pisa/query/algorithm/ranked_or_query.hpp @@ -2,6 +2,8 @@ #include +#include "concepts.hpp" +#include "concepts/posting_cursor.hpp" #include "topk_queue.hpp" namespace pisa { @@ -16,7 +18,12 @@ struct ranked_or_query { explicit ranked_or_query(topk_queue& topk) : m_topk(topk) {} template - void operator()(CursorRange&& cursors, uint64_t max_docid) { + PISA_REQUIRES( + (concepts::ScoredPostingCursor> + && concepts::SortedPostingCursor>) + ) + void + operator()(CursorRange&& cursors, uint64_t max_docid) { using Cursor = typename std::decay_t::value_type; if (cursors.empty()) { return; diff --git a/include/pisa/query/algorithm/ranked_or_taat_query.hpp b/include/pisa/query/algorithm/ranked_or_taat_query.hpp index e7c0bced..9f70af53 100644 --- a/include/pisa/query/algorithm/ranked_or_taat_query.hpp +++ b/include/pisa/query/algorithm/ranked_or_taat_query.hpp @@ -2,6 +2,7 @@ #include "accumulator/partial_score_accumulator.hpp" #include "concepts.hpp" +#include "concepts/posting_cursor.hpp" #include "topk_queue.hpp" namespace pisa { @@ -11,8 +12,12 @@ class ranked_or_taat_query { explicit ranked_or_taat_query(topk_queue& topk) : m_topk(topk) {} template - PISA_REQUIRES(PartialScoreAccumulator) - void operator()(CursorRange&& cursors, uint64_t max_docid, Acc&& accumulator) { + PISA_REQUIRES( + (PartialScoreAccumulator && concepts::ScoredPostingCursor> + && concepts::SortedPostingCursor>) + ) + void + operator()(CursorRange&& cursors, uint64_t max_docid, Acc&& accumulator) { if (cursors.empty()) { return; } diff --git a/include/pisa/query/algorithm/wand_query.hpp b/include/pisa/query/algorithm/wand_query.hpp index 4e7b9527..fd42689e 100644 --- a/include/pisa/query/algorithm/wand_query.hpp +++ b/include/pisa/query/algorithm/wand_query.hpp @@ -2,6 +2,8 @@ #include +#include "concepts.hpp" +#include "concepts/posting_cursor.hpp" #include "topk_queue.hpp" namespace pisa { @@ -10,7 +12,12 @@ struct wand_query { explicit wand_query(topk_queue& topk) : m_topk(topk) {} template - void operator()(CursorRange&& cursors, uint64_t max_docid) { + PISA_REQUIRES( + (concepts::MaxScorePostingCursor> + && concepts::SortedPostingCursor>) + ) + void + operator()(CursorRange&& cursors, uint64_t max_docid) { using Cursor = typename std::decay_t::value_type; if (cursors.empty()) { return; diff --git a/include/pisa/term_map.hpp b/include/pisa/term_map.hpp index adc631fd..f0e087c8 100644 --- a/include/pisa/term_map.hpp +++ b/include/pisa/term_map.hpp @@ -4,6 +4,9 @@ #include #include +#include "concepts.hpp" +#include "concepts/container.hpp" +#include "concepts/mapping.hpp" #include "payload_vector.hpp" namespace pisa { @@ -17,22 +20,26 @@ class TermMap { TermMap& operator=(TermMap&&); virtual ~TermMap(); - [[nodiscard]] virtual auto operator()(std::string_view term) -> std::optional = 0; - [[nodiscard]] virtual auto operator()(std::string const& term) + [[nodiscard]] virtual auto find(std::string_view term) const -> std::optional = 0; + [[nodiscard]] virtual auto find(std::string const& term) const -> std::optional = 0; }; +PISA_ASSERT_CONCEPT((concepts::ReverseMapping)); + /** * Maps string representations of numbers to their numeric representations. */ class IntMap final: public TermMap { public: - [[nodiscard]] virtual auto operator()(std::string_view term) + [[nodiscard]] virtual auto find(std::string_view term) const -> std::optional override; - [[nodiscard]] virtual auto operator()(std::string const& term) + [[nodiscard]] virtual auto find(std::string const& term) const -> std::optional override; }; +PISA_ASSERT_CONCEPT((concepts::ReverseMapping)); + class LexiconMap final: public TermMap { std::optional m_buffer; Payload_Vector m_lexicon; @@ -41,10 +48,16 @@ class LexiconMap final: public TermMap { explicit LexiconMap(std::string const& file); explicit LexiconMap(Payload_Vector lexicon); - [[nodiscard]] virtual auto operator()(std::string_view term) + [[nodiscard]] auto operator[](std::uint32_t term_id) const -> std::string_view; + + [[nodiscard]] virtual auto find(std::string_view term) const -> std::optional override; - [[nodiscard]] virtual auto operator()(std::string const& term) + [[nodiscard]] virtual auto find(std::string const& term) const -> std::optional override; + [[nodiscard]] auto size() const noexcept -> std::size_t; }; +PISA_ASSERT_CONCEPT((concepts::BidirectionalMapping)); +PISA_ASSERT_CONCEPT((concepts::SizedContainer)); + } // namespace pisa diff --git a/include/pisa/type_alias.hpp b/include/pisa/type_alias.hpp index 109b26ae..d2913c7b 100644 --- a/include/pisa/type_alias.hpp +++ b/include/pisa/type_alias.hpp @@ -15,6 +15,7 @@ #pragma once #include +#include namespace pisa { @@ -22,4 +23,7 @@ using DocId = std::uint32_t; using TermId = std::uint32_t; using Score = float; +template +using val_t = typename std::decay_t::value_type; + } // namespace pisa diff --git a/src/query/query_parser.cpp b/src/query/query_parser.cpp index 0fa71501..43e517e4 100644 --- a/src/query/query_parser.cpp +++ b/src/query/query_parser.cpp @@ -30,7 +30,7 @@ auto QueryParser::parse(std::string_view query) -> Query { auto tokens = m_analyzer.analyze(raw_query); std::vector term_ids; for (auto token: *tokens) { - if (auto tid = (*m_term_map)(token); tid) { + if (auto tid = m_term_map->find(token); tid) { term_ids.push_back(*tid); } else { spdlog::warn("Term `{}` not found and will be ignored", token); diff --git a/src/term_map.cpp b/src/term_map.cpp index 3499bdd6..0b72aaf5 100644 --- a/src/term_map.cpp +++ b/src/term_map.cpp @@ -15,7 +15,7 @@ TermMap& TermMap::operator=(TermMap const&) = default; TermMap& TermMap::operator=(TermMap&&) = default; TermMap::~TermMap() = default; -auto IntMap::operator()(std::string_view term) -> std::optional { +auto IntMap::find(std::string_view term) const -> std::optional { std::uint32_t value; auto [ptr, ec] = std::from_chars(term.begin(), term.end(), value, 10); if (ec == std::errc::result_out_of_range || ec == std::errc::invalid_argument @@ -25,8 +25,8 @@ auto IntMap::operator()(std::string_view term) -> std::optional { return value; } -auto IntMap::operator()(std::string const& term) -> std::optional { - return (*this)(std::string_view(term)); +auto IntMap::find(std::string const& term) const -> std::optional { + return this->find(std::string_view(term)); } LexiconMap::LexiconMap(std::string const& file) @@ -35,12 +35,20 @@ LexiconMap::LexiconMap(std::string const& file) LexiconMap::LexiconMap(Payload_Vector lexicon) : m_buffer(std::nullopt), m_lexicon(lexicon) {} -auto LexiconMap::operator()(std::string_view term) -> std::optional { +auto LexiconMap::find(std::string_view term) const -> std::optional { return pisa::binary_search(m_lexicon.begin(), m_lexicon.end(), term); } -auto LexiconMap::operator()(std::string const& term) -> std::optional { +auto LexiconMap::find(std::string const& term) const -> std::optional { return pisa::binary_search(m_lexicon.begin(), m_lexicon.end(), term); } +auto LexiconMap::operator[](std::uint32_t term_id) const -> std::string_view { + return m_lexicon[term_id]; +} + +auto LexiconMap::size() const noexcept -> std::size_t { + return m_lexicon.size(); +} + } // namespace pisa diff --git a/test/in_memory_index.cpp b/test/in_memory_index.cpp index b7adede7..3f51ea99 100644 --- a/test/in_memory_index.cpp +++ b/test/in_memory_index.cpp @@ -8,7 +8,7 @@ auto VectorCursor::docid() const noexcept -> std::uint32_t { return documents[0]; } -auto VectorCursor::freq() const noexcept -> float { +auto VectorCursor::freq() const noexcept -> std::uint32_t { return frequencies[0]; } diff --git a/test/in_memory_index.hpp b/test/in_memory_index.hpp index a5edffdf..8bc73521 100644 --- a/test/in_memory_index.hpp +++ b/test/in_memory_index.hpp @@ -14,7 +14,7 @@ struct VectorCursor { [[nodiscard]] auto size() const noexcept -> std::size_t; [[nodiscard]] auto docid() const noexcept -> std::uint32_t; - [[nodiscard]] auto freq() const noexcept -> float; + [[nodiscard]] auto freq() const noexcept -> std::uint32_t; void next(); void next_geq(std::uint32_t docid); diff --git a/test/test_ranked_queries.cpp b/test/test_ranked_queries.cpp index d4154f80..b3ac556b 100644 --- a/test/test_ranked_queries.cpp +++ b/test/test_ranked_queries.cpp @@ -96,6 +96,7 @@ class ranked_or_taat_query_acc: public ranked_or_taat_query { using ranked_or_taat_query::ranked_or_taat_query; template + PISA_REQUIRES((pisa::concepts::MaxScorePostingCursor::value_type>)) void operator()(CursorRange&& cursors, uint64_t max_docid) { Acc accumulator(max_docid); ranked_or_taat_query::operator()(cursors, max_docid, accumulator); @@ -108,6 +109,7 @@ class range_query_128: public range_query { using range_query::range_query; template + PISA_REQUIRES((pisa::concepts::MaxScorePostingCursor::value_type>)) void operator()(CursorRange&& cursors, uint64_t max_docid) { range_query::operator()(cursors, max_docid, 128); }