diff --git a/e2e/e2e.sh b/e2e/e2e.sh index 50c3b6f2f4..665fd9c8e7 100755 --- a/e2e/e2e.sh +++ b/e2e/e2e.sh @@ -111,7 +111,7 @@ fi # here because then we can't easily get the SERVER_PID out of that subshell pushd "$BINARY_DIR" echo "Launching server from path $(pwd)" -./ServerMain -i "$INDEX" -p 9099 -m 1GB -t --default-query-timeout 500s &> server_log.txt & +./ServerMain -i "$INDEX" -p 9099 -m 1GB -t --default-query-timeout 30s &> server_log.txt & SERVER_PID=$! popd diff --git a/src/index/CompressedRelation.cpp b/src/index/CompressedRelation.cpp index 07703eec16..4414e31646 100644 --- a/src/index/CompressedRelation.cpp +++ b/src/index/CompressedRelation.cpp @@ -423,13 +423,15 @@ DecompressedBlock CompressedRelationReader::readPossiblyIncompleteBlock( std::back_inserter(allColumns)); // A block is uniquely identified by its start position in the file. auto cacheKey = blockMetadata.offsetsAndCompressedSize_.at(0).offsetInFile_; - DecompressedBlock block = blockCache_ - .computeOnce(cacheKey, - [&]() { - return readAndDecompressBlock( - blockMetadata, allColumns); - }) - ._resultPointer->clone(); + auto sharedResultFromCache = + blockCache_ + .computeOnce(cacheKey, + [&]() { + return readAndDecompressBlock(blockMetadata, + allColumns); + }) + ._resultPointer; + const DecompressedBlock& block = *sharedResultFromCache; const auto& col1Column = block.getColumn(0); // Find the range in the blockMetadata, that belongs to the same relation @@ -452,17 +454,22 @@ DecompressedBlock CompressedRelationReader::readPossiblyIncompleteBlock( } }(); auto numResults = subBlock.size(); - block.erase(block.begin(), - block.begin() + (subBlock.begin() - col1Column.begin())); - block.resize(numResults); - + auto beginIndex = subBlock.begin() - col1Column.begin(); + auto endIndex = subBlock.end() - col1Column.begin(); + + DecompressedBlock result{columnIndices.size(), allocator_}; + result.resize(numResults); + for (auto i : ad_utility::integerRange(columnIndices.size())) { + const auto& inputCol = block.getColumn(columnIndices[i]); + std::ranges::copy(inputCol.begin() + beginIndex, + inputCol.begin() + endIndex, result.getColumn(i).begin()); + } if (scanMetadata.has_value()) { auto& details = scanMetadata.value().get(); ++details.numBlocksRead_; - details.numElementsRead_ += block.numRows(); + details.numElementsRead_ += result.numRows(); } - block.setColumnSubset(columnIndices); - return block; + return result; }; // _____________________________________________________________________________ diff --git a/src/index/CompressedRelation.h b/src/index/CompressedRelation.h index 67bd946610..5c16816ed6 100644 --- a/src/index/CompressedRelation.h +++ b/src/index/CompressedRelation.h @@ -515,7 +515,10 @@ class CompressedRelationReader { // the `col1Id` doesn't match. For this to work, the block has to be one of // the blocks that actually store triples from the given `relationMetadata`'s // relation, else the behavior is undefined. Only return the columns specified - // by the `columnIndices`. + // by the `columnIndices`. Note: Do not call this function for blocks of which + // you know that you need them completely, as then this function wastes some + // time and space. It is only typically needed for the first and last block of + // certain scans. DecompressedBlock readPossiblyIncompleteBlock( const CompressedRelationMetadata& relationMetadata, std::optional col1Id, const CompressedBlockMetadata& blockMetadata, diff --git a/test/engine/IndexScanTest.cpp b/test/engine/IndexScanTest.cpp index e0014ed347..5e460d8325 100644 --- a/test/engine/IndexScanTest.cpp +++ b/test/engine/IndexScanTest.cpp @@ -6,6 +6,7 @@ #include "../IndexTestHelpers.h" #include "../util/GTestHelpers.h" +#include "../util/IdTableHelpers.h" #include "engine/IndexScan.h" #include "parser/ParsedQuery.h" @@ -323,24 +324,27 @@ TEST(IndexScan, additionalColumn) { auto qec = getQec(" ."); using V = Variable; SparqlTriple triple{V{"?x"}, "", V{"?z"}}; - triple._additionalScanColumns.emplace_back(1, V{"?blib"}); - triple._additionalScanColumns.emplace_back(0, V{"?blub"}); + triple._additionalScanColumns.emplace_back( + ADDITIONAL_COLUMN_INDEX_SUBJECT_PATTERN, V{"?xpattern"}); + triple._additionalScanColumns.emplace_back( + ADDITIONAL_COLUMN_INDEX_OBJECT_PATTERN, V{"?ypattern"}); auto scan = IndexScan{qec, Permutation::PSO, triple}; ASSERT_EQ(scan.getResultWidth(), 4); auto col = makeAlwaysDefinedColumn; VariableToColumnMap expected = {{V{"?x"}, col(0)}, {V{"?z"}, col(1)}, - {V("?blib"), col(2)}, - {V("?blub"), col(3)}}; + {V("?xpattern"), col(2)}, + {V("?ypattern"), col(3)}}; ASSERT_THAT(scan.getExternallyVisibleVariableColumns(), ::testing::UnorderedElementsAreArray(expected)); ASSERT_THAT(scan.getCacheKey(), - ::testing::ContainsRegex("Additional Columns: 1 0")); - // Executing such a query that has the same column multiple times is currently - // not supported and fails with an exception inside the `IdTable.h` module - // TODO Add proper tests as soon as we can properly add additional - // columns. Maybe we cann add additional columns generically during the index - // build by adding a generic transformation function etc. - AD_EXPECT_THROW_WITH_MESSAGE(scan.computeResultOnlyForTesting(), - ::testing::ContainsRegex("IdTable.h")); + ::testing::ContainsRegex("Additional Columns: 2 3")); + auto res = scan.computeResultOnlyForTesting(); + auto getId = makeGetId(qec->getIndex()); + auto I = IntId; + // is the only subject, so it has pattern 0, doesn't appear as a + // subject, so it has no pattern. + auto exp = makeIdTableFromVector( + {{getId(""), getId(""), I(0), I(NO_PATTERN)}}); + EXPECT_THAT(res.idTable(), ::testing::ElementsAreArray(exp)); }