From 4fe8d326ff1db491b62943bc0017f986ab832925 Mon Sep 17 00:00:00 2001 From: Michal Siedlaczek Date: Sat, 18 Nov 2023 11:10:33 -0500 Subject: [PATCH] Remove configuration.hpp Removes the configuration file that defined how many bits to use for quantization. Instead, this is now configured by CLI options. --- docs/book/404.html | 2 +- docs/book/cli/compress_inverted_index.html | 5 +- docs/book/cli/compute_intersection.html | 2 +- docs/book/cli/count-postings.html | 2 +- docs/book/cli/create_wand_data.html | 14 ++- docs/book/cli/evaluate_queries.html | 2 +- docs/book/cli/extract-maxscores.html | 2 +- docs/book/cli/extract_topics.html | 2 +- docs/book/cli/invert.html | 2 +- docs/book/cli/kth_threshold.html | 2 +- docs/book/cli/lexicon.html | 2 +- docs/book/cli/map_queries.html | 2 +- docs/book/cli/parse_collection.html | 2 +- docs/book/cli/partition_fwd_index.html | 2 +- docs/book/cli/queries.html | 2 +- docs/book/cli/read_collection.html | 2 +- docs/book/cli/reorder-docids.html | 2 +- docs/book/cli/sample_inverted_index.html | 2 +- docs/book/cli/selective_queries.html | 2 +- docs/book/cli/shards.html | 2 +- docs/book/cli/stem_queries.html | 2 +- docs/book/cli/taily-stats.html | 2 +- docs/book/cli/taily-thresholds.html | 2 +- docs/book/cli/thresholds.html | 2 +- docs/book/guide/algorithms.html | 2 +- docs/book/guide/compressing.html | 6 +- docs/book/guide/indexing-pipeline.html | 2 +- docs/book/guide/installation.html | 2 +- docs/book/guide/inverting.html | 2 +- docs/book/guide/parsing.html | 2 +- docs/book/guide/querying.html | 6 +- docs/book/guide/reordering.html | 2 +- docs/book/guide/requirements.html | 2 +- docs/book/guide/sharding.html | 2 +- docs/book/guide/threshold-estimation.html | 2 +- docs/book/index.html | 2 +- docs/book/introduction.html | 2 +- docs/book/print.html | 49 ++++++++-- docs/book/searchindex.js | 2 +- docs/book/searchindex.json | 2 +- docs/book/tutorial/robust04.html | 2 +- docs/src/SUMMARY.md | 1 + docs/src/cli/create_wand_data.md | 6 +- include/pisa/compress.hpp | 3 +- include/pisa/configuration.hpp | 40 -------- .../pisa/cursor/block_max_scored_cursor.hpp | 2 +- include/pisa/cursor/max_scored_cursor.hpp | 2 +- include/pisa/cursor/scored_cursor.hpp | 2 +- include/pisa/ensure.hpp | 10 ++ include/pisa/intersection.hpp | 2 +- include/pisa/mappable/mapper.hpp | 15 +-- include/pisa/type_safe.hpp | 27 ++++-- include/pisa/wand_data.hpp | 20 ++-- include/pisa/wand_data_compressed.hpp | 65 ++++++++----- include/pisa/wand_data_range.hpp | 14 ++- include/pisa/wand_data_raw.hpp | 10 +- include/pisa/wand_utils.hpp | 1 - src/compress.cpp | 25 +++-- test/cli/run.sh | 7 +- test/cli/test_wand_data.sh | 96 +++++++++++++++++++ test/docker/benchmark/run.sh | 2 +- test/test_bmw_queries.cpp | 7 +- test/test_ranked_queries.cpp | 22 ++++- test/test_recursive_graph_bisection.cpp | 1 - test/test_taily_stats.cpp | 5 +- test/test_wand_data.cpp | 5 +- tools/app.cpp | 23 +++-- tools/app.hpp | 8 +- tools/compress_inverted_index.cpp | 16 +--- tools/compute_intersection.cpp | 4 +- tools/create_wand_data.cpp | 3 +- tools/evaluate_queries.cpp | 13 ++- tools/kth_threshold.cpp | 6 +- tools/profile_queries.cpp | 10 +- tools/queries.cpp | 13 ++- tools/shards.cpp | 7 +- tools/thresholds.cpp | 8 +- 77 files changed, 394 insertions(+), 259 deletions(-) delete mode 100644 include/pisa/configuration.hpp create mode 100644 test/cli/test_wand_data.sh diff --git a/docs/book/404.html b/docs/book/404.html index ef4f6bf58..277c60596 100644 --- a/docs/book/404.html +++ b/docs/book/404.html @@ -89,7 +89,7 @@ diff --git a/docs/book/cli/compress_inverted_index.html b/docs/book/cli/compress_inverted_index.html index 0e9e242a6..9174316ee 100644 --- a/docs/book/cli/compress_inverted_index.html +++ b/docs/book/cli/compress_inverted_index.html @@ -88,7 +88,7 @@ @@ -195,7 +195,8 @@

Usage

PL2 c parameter. --qld-mu FLOAT Needs: --scorer QLD mu parameter. - --quantize Needs: --scorer Quantizes the scores + --quantize UINT Needs: --scorer + Quantizes the scores using this many bits -L,--log-level TEXT:{critical,debug,err,info,off,trace,warn} [info] Log level --config Configuration .ini file diff --git a/docs/book/cli/compute_intersection.html b/docs/book/cli/compute_intersection.html index e31f80076..8deb383e1 100644 --- a/docs/book/cli/compute_intersection.html +++ b/docs/book/cli/compute_intersection.html @@ -88,7 +88,7 @@ diff --git a/docs/book/cli/count-postings.html b/docs/book/cli/count-postings.html index b0862b23b..d88a24163 100644 --- a/docs/book/cli/count-postings.html +++ b/docs/book/cli/count-postings.html @@ -88,7 +88,7 @@ diff --git a/docs/book/cli/create_wand_data.html b/docs/book/cli/create_wand_data.html index c4d9c3cd3..9910aec67 100644 --- a/docs/book/cli/create_wand_data.html +++ b/docs/book/cli/create_wand_data.html @@ -88,7 +88,7 @@ @@ -181,8 +181,9 @@

Usage

-c,--collection TEXT REQUIRED Collection basename -o,--output TEXT REQUIRED Output filename - --compress Compress additional data - --quantize Quantize scores + --quantize UINT Quantizes the scores using this many bits + --compress Needs: --quantize + Compress additional data -s,--scorer TEXT REQUIRED Scorer function --bm25-k1 FLOAT Needs: --scorer BM25 k1 parameter. @@ -208,11 +209,8 @@

Usage

Lambda parameter for variable blocks

Description

-

Creates additional data needed for certain query algorithms.

-

Algorithms such as WAND and MaxScore (among others) need more data than -available in posting lists alone. This includes max scores for each -term, as well as max scores for ranges of posting lists that can be used -as skip lists.

+

Creates additional data needed for certain query algorithms. +See "WAND" Data for more details.

Refer to queries for details about scoring functions.

Blocks

Each posting list is divided into blocks, and each block gets a diff --git a/docs/book/cli/evaluate_queries.html b/docs/book/cli/evaluate_queries.html index c358dd98d..cfbfb9f10 100644 --- a/docs/book/cli/evaluate_queries.html +++ b/docs/book/cli/evaluate_queries.html @@ -88,7 +88,7 @@

diff --git a/docs/book/cli/extract-maxscores.html b/docs/book/cli/extract-maxscores.html index 550b3b6c2..0554ace5b 100644 --- a/docs/book/cli/extract-maxscores.html +++ b/docs/book/cli/extract-maxscores.html @@ -88,7 +88,7 @@ diff --git a/docs/book/cli/extract_topics.html b/docs/book/cli/extract_topics.html index f49f51e3c..8a7753708 100644 --- a/docs/book/cli/extract_topics.html +++ b/docs/book/cli/extract_topics.html @@ -88,7 +88,7 @@ diff --git a/docs/book/cli/invert.html b/docs/book/cli/invert.html index 3b6fd8e9a..25b5856b3 100644 --- a/docs/book/cli/invert.html +++ b/docs/book/cli/invert.html @@ -88,7 +88,7 @@ diff --git a/docs/book/cli/kth_threshold.html b/docs/book/cli/kth_threshold.html index 97e7682c6..cd0e74246 100644 --- a/docs/book/cli/kth_threshold.html +++ b/docs/book/cli/kth_threshold.html @@ -88,7 +88,7 @@ diff --git a/docs/book/cli/lexicon.html b/docs/book/cli/lexicon.html index de6e1df6f..6cd53f620 100644 --- a/docs/book/cli/lexicon.html +++ b/docs/book/cli/lexicon.html @@ -88,7 +88,7 @@ diff --git a/docs/book/cli/map_queries.html b/docs/book/cli/map_queries.html index baa270fc0..b1bcbf49b 100644 --- a/docs/book/cli/map_queries.html +++ b/docs/book/cli/map_queries.html @@ -88,7 +88,7 @@ diff --git a/docs/book/cli/parse_collection.html b/docs/book/cli/parse_collection.html index b1e680324..0ca2567a3 100644 --- a/docs/book/cli/parse_collection.html +++ b/docs/book/cli/parse_collection.html @@ -88,7 +88,7 @@ diff --git a/docs/book/cli/partition_fwd_index.html b/docs/book/cli/partition_fwd_index.html index 13dbb112b..f254d40ec 100644 --- a/docs/book/cli/partition_fwd_index.html +++ b/docs/book/cli/partition_fwd_index.html @@ -88,7 +88,7 @@ diff --git a/docs/book/cli/queries.html b/docs/book/cli/queries.html index 9aa9063e8..8afbe64b0 100644 --- a/docs/book/cli/queries.html +++ b/docs/book/cli/queries.html @@ -88,7 +88,7 @@ diff --git a/docs/book/cli/read_collection.html b/docs/book/cli/read_collection.html index 9f8aa50ef..f2003f3d3 100644 --- a/docs/book/cli/read_collection.html +++ b/docs/book/cli/read_collection.html @@ -88,7 +88,7 @@ diff --git a/docs/book/cli/reorder-docids.html b/docs/book/cli/reorder-docids.html index 84f150fa4..3b1ec86d0 100644 --- a/docs/book/cli/reorder-docids.html +++ b/docs/book/cli/reorder-docids.html @@ -88,7 +88,7 @@ diff --git a/docs/book/cli/sample_inverted_index.html b/docs/book/cli/sample_inverted_index.html index 2b2e09bbc..3cd018828 100644 --- a/docs/book/cli/sample_inverted_index.html +++ b/docs/book/cli/sample_inverted_index.html @@ -88,7 +88,7 @@ diff --git a/docs/book/cli/selective_queries.html b/docs/book/cli/selective_queries.html index 00b7a3066..7bd69ce7b 100644 --- a/docs/book/cli/selective_queries.html +++ b/docs/book/cli/selective_queries.html @@ -88,7 +88,7 @@ diff --git a/docs/book/cli/shards.html b/docs/book/cli/shards.html index a2de3d87d..a9aa0c079 100644 --- a/docs/book/cli/shards.html +++ b/docs/book/cli/shards.html @@ -88,7 +88,7 @@ diff --git a/docs/book/cli/stem_queries.html b/docs/book/cli/stem_queries.html index 76dacfe1f..d8374cac0 100644 --- a/docs/book/cli/stem_queries.html +++ b/docs/book/cli/stem_queries.html @@ -88,7 +88,7 @@ diff --git a/docs/book/cli/taily-stats.html b/docs/book/cli/taily-stats.html index 6e6a47e02..2fdf94507 100644 --- a/docs/book/cli/taily-stats.html +++ b/docs/book/cli/taily-stats.html @@ -88,7 +88,7 @@ diff --git a/docs/book/cli/taily-thresholds.html b/docs/book/cli/taily-thresholds.html index b75f08614..eea1e8113 100644 --- a/docs/book/cli/taily-thresholds.html +++ b/docs/book/cli/taily-thresholds.html @@ -88,7 +88,7 @@ diff --git a/docs/book/cli/thresholds.html b/docs/book/cli/thresholds.html index 275de180c..4c220cc3c 100644 --- a/docs/book/cli/thresholds.html +++ b/docs/book/cli/thresholds.html @@ -88,7 +88,7 @@ diff --git a/docs/book/guide/algorithms.html b/docs/book/guide/algorithms.html index 2eab4a256..fcd37cf55 100644 --- a/docs/book/guide/algorithms.html +++ b/docs/book/guide/algorithms.html @@ -88,7 +88,7 @@ diff --git a/docs/book/guide/compressing.html b/docs/book/guide/compressing.html index d253f7dc7..822c5c9bb 100644 --- a/docs/book/guide/compressing.html +++ b/docs/book/guide/compressing.html @@ -88,7 +88,7 @@ @@ -258,7 +258,7 @@

VarintGB

- @@ -272,7 +272,7 @@

VarintGB

- diff --git a/docs/book/guide/indexing-pipeline.html b/docs/book/guide/indexing-pipeline.html index b5c30ab42..f52055845 100644 --- a/docs/book/guide/indexing-pipeline.html +++ b/docs/book/guide/indexing-pipeline.html @@ -88,7 +88,7 @@ diff --git a/docs/book/guide/installation.html b/docs/book/guide/installation.html index d17143280..402c10411 100644 --- a/docs/book/guide/installation.html +++ b/docs/book/guide/installation.html @@ -88,7 +88,7 @@ diff --git a/docs/book/guide/inverting.html b/docs/book/guide/inverting.html index ea492e615..48d5552e7 100644 --- a/docs/book/guide/inverting.html +++ b/docs/book/guide/inverting.html @@ -88,7 +88,7 @@ diff --git a/docs/book/guide/parsing.html b/docs/book/guide/parsing.html index ab69a76f8..159a0ac44 100644 --- a/docs/book/guide/parsing.html +++ b/docs/book/guide/parsing.html @@ -88,7 +88,7 @@ diff --git a/docs/book/guide/querying.html b/docs/book/guide/querying.html index cb9e6400e..f179e41ce 100644 --- a/docs/book/guide/querying.html +++ b/docs/book/guide/querying.html @@ -88,7 +88,7 @@ @@ -215,7 +215,7 @@

B