Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

For small index scans, avoid spurious copy of whole block #1260

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion e2e/e2e.sh
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ fi
# here because then we can't easily get the SERVER_PID out of that subshell
pushd "$BINARY_DIR"
echo "Launching server from path $(pwd)"
./ServerMain -i "$INDEX" -p 9099 -m 1GB -t --default-query-timeout 500s &> server_log.txt &
./ServerMain -i "$INDEX" -p 9099 -m 1GB -t --default-query-timeout 30s &> server_log.txt &
SERVER_PID=$!
popd

Expand Down
35 changes: 21 additions & 14 deletions src/index/CompressedRelation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -423,13 +423,15 @@ DecompressedBlock CompressedRelationReader::readPossiblyIncompleteBlock(
std::back_inserter(allColumns));
// A block is uniquely identified by its start position in the file.
auto cacheKey = blockMetadata.offsetsAndCompressedSize_.at(0).offsetInFile_;
DecompressedBlock block = blockCache_
.computeOnce(cacheKey,
[&]() {
return readAndDecompressBlock(
blockMetadata, allColumns);
})
._resultPointer->clone();
auto sharedResultFromCache =
blockCache_
.computeOnce(cacheKey,
[&]() {
return readAndDecompressBlock(blockMetadata,
allColumns);
})
._resultPointer;
const DecompressedBlock& block = *sharedResultFromCache;
const auto& col1Column = block.getColumn(0);

// Find the range in the blockMetadata, that belongs to the same relation
Expand All @@ -452,17 +454,22 @@ DecompressedBlock CompressedRelationReader::readPossiblyIncompleteBlock(
}
}();
auto numResults = subBlock.size();
block.erase(block.begin(),
block.begin() + (subBlock.begin() - col1Column.begin()));
block.resize(numResults);

auto beginIndex = subBlock.begin() - col1Column.begin();
auto endIndex = subBlock.end() - col1Column.begin();

DecompressedBlock result{columnIndices.size(), allocator_};
result.resize(numResults);
for (auto i : ad_utility::integerRange(columnIndices.size())) {
const auto& inputCol = block.getColumn(columnIndices[i]);
std::ranges::copy(inputCol.begin() + beginIndex,
inputCol.begin() + endIndex, result.getColumn(i).begin());
}
if (scanMetadata.has_value()) {
auto& details = scanMetadata.value().get();
++details.numBlocksRead_;
details.numElementsRead_ += block.numRows();
details.numElementsRead_ += result.numRows();
}
block.setColumnSubset(columnIndices);
return block;
return result;
};

// _____________________________________________________________________________
Expand Down
5 changes: 4 additions & 1 deletion src/index/CompressedRelation.h
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,10 @@ class CompressedRelationReader {
// the `col1Id` doesn't match. For this to work, the block has to be one of
// the blocks that actually store triples from the given `relationMetadata`'s
// relation, else the behavior is undefined. Only return the columns specified
// by the `columnIndices`.
// by the `columnIndices`. Note: Do not call this function for blocks of which
// you know that you need them completely, as then this function wastes some
// time and space. It is only typically needed for the first and last block of
// certain scans.
DecompressedBlock readPossiblyIncompleteBlock(
const CompressedRelationMetadata& relationMetadata,
std::optional<Id> col1Id, const CompressedBlockMetadata& blockMetadata,
Expand Down
28 changes: 16 additions & 12 deletions test/engine/IndexScanTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include "../IndexTestHelpers.h"
#include "../util/GTestHelpers.h"
#include "../util/IdTableHelpers.h"
#include "engine/IndexScan.h"
#include "parser/ParsedQuery.h"

Expand Down Expand Up @@ -323,24 +324,27 @@ TEST(IndexScan, additionalColumn) {
auto qec = getQec("<x> <y> <z>.");
using V = Variable;
SparqlTriple triple{V{"?x"}, "<y>", V{"?z"}};
triple._additionalScanColumns.emplace_back(1, V{"?blib"});
triple._additionalScanColumns.emplace_back(0, V{"?blub"});
triple._additionalScanColumns.emplace_back(
ADDITIONAL_COLUMN_INDEX_SUBJECT_PATTERN, V{"?xpattern"});
triple._additionalScanColumns.emplace_back(
ADDITIONAL_COLUMN_INDEX_OBJECT_PATTERN, V{"?ypattern"});
auto scan = IndexScan{qec, Permutation::PSO, triple};
ASSERT_EQ(scan.getResultWidth(), 4);
auto col = makeAlwaysDefinedColumn;
VariableToColumnMap expected = {{V{"?x"}, col(0)},
{V{"?z"}, col(1)},
{V("?blib"), col(2)},
{V("?blub"), col(3)}};
{V("?xpattern"), col(2)},
{V("?ypattern"), col(3)}};
ASSERT_THAT(scan.getExternallyVisibleVariableColumns(),
::testing::UnorderedElementsAreArray(expected));
ASSERT_THAT(scan.getCacheKey(),
::testing::ContainsRegex("Additional Columns: 1 0"));
// Executing such a query that has the same column multiple times is currently
// not supported and fails with an exception inside the `IdTable.h` module
// TODO<joka921> Add proper tests as soon as we can properly add additional
// columns. Maybe we cann add additional columns generically during the index
// build by adding a generic transformation function etc.
AD_EXPECT_THROW_WITH_MESSAGE(scan.computeResultOnlyForTesting(),
::testing::ContainsRegex("IdTable.h"));
::testing::ContainsRegex("Additional Columns: 2 3"));
auto res = scan.computeResultOnlyForTesting();
auto getId = makeGetId(qec->getIndex());
auto I = IntId;
// <x> is the only subject, so it has pattern 0, <z> doesn't appear as a
// subject, so it has no pattern.
auto exp = makeIdTableFromVector(
{{getId("<x>"), getId("<z>"), I(0), I(NO_PATTERN)}});
EXPECT_THAT(res.idTable(), ::testing::ElementsAreArray(exp));
}
Loading