Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support BASE declarations in SPARQL queries #1786

Merged
merged 13 commits into from
Feb 14, 2025
40 changes: 34 additions & 6 deletions src/parser/sparqlParser/SparqlQleverVisitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1163,8 +1163,20 @@ TripleComponent::Iri Visitor::visit(Parser::IriContext* ctx) {
}

// ____________________________________________________________________________________
string Visitor::visit(Parser::IrirefContext* ctx) {
return RdfEscaping::unescapeIriref(ctx->getText());
string Visitor::visit(Parser::IrirefContext* ctx) const {
auto string = ctx->getText();
auto removeIriBrackets = [](std::string_view string) {
AD_CORRECTNESS_CHECK(string.starts_with('<'));
AD_CORRECTNESS_CHECK(string.ends_with('>'));
return string.substr(1, string.size() - 2);
};
// Handle relative IRIs, technically relative IRIs are not allowed when no
// BASE declaration is present, but we use them a lot for unit tests.
if (!baseIri_.empty() && !isAbsoluteIri(string)) {
return absl::StrCat("<", removeIriBrackets(baseIri_),
removeIriBrackets(string), ">");
}
return string;
}

// ____________________________________________________________________________________
Expand Down Expand Up @@ -1212,8 +1224,15 @@ DatasetClause SparqlQleverVisitor::visit(Parser::UsingClauseContext* ctx) {

// ____________________________________________________________________________________
void Visitor::visit(Parser::PrologueContext* ctx) {
visitVector(ctx->baseDecl());
visitVector(ctx->prefixDecl());
for (auto* child : ctx->children) {
if (auto* baseDecl = dynamic_cast<Parser::BaseDeclContext*>(child)) {
visit(baseDecl);
} else {
auto* prefixDecl = dynamic_cast<Parser::PrefixDeclContext*>(child);
AD_CORRECTNESS_CHECK(prefixDecl);
visit(prefixDecl);
}
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is wrong with the simpler older version?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Imagine the following case:

BASE <http://example.com>
PREFIX test: <test>
BASE <http://other.example.com>
PREFIX test2: <test>
BASE <http://alternative.example.com>
SELECT * FROM {
  <s> ?p test:abc
}

Then the first BASE should apply to the first prefix, and the second BASE should apply to the second prefix, and the third BASE should apply to everything else that follows. So to ensure that we apply BASE to PREFIX correctly, we can't parse them after each other, and instead have to interleave parsing.

// Remember the whole prologue (we need this when we encounter a SERVICE
// clause, see `visit(ServiceGraphPatternContext*)` below.
if (ctx->getStart() && ctx->getStop()) {
Expand All @@ -1222,8 +1241,17 @@ void Visitor::visit(Parser::PrologueContext* ctx) {
}

// ____________________________________________________________________________________
void Visitor::visit(const Parser::BaseDeclContext* ctx) {
reportNotSupported(ctx, "BASE declarations are");
bool Visitor::isAbsoluteIri(std::string_view string) {
return ctre::starts_with<"<[A-Za-z]*[A-Za-z0-9+-.]:">(string);
}

// ____________________________________________________________________________________
void Visitor::visit(Parser::BaseDeclContext* ctx) {
auto rawIri = ctx->iriref()->getText();
if (!isAbsoluteIri(ctx->iriref()->getText())) {
reportError(ctx, "The base IRI must be an absolute IRI, was: " + rawIri);
}
baseIri_ = visit(ctx->iriref());
}

// ____________________________________________________________________________________
Expand Down
8 changes: 6 additions & 2 deletions src/parser/sparqlParser/SparqlQleverVisitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ class SparqlQleverVisitor {
// `addVisibleVariable`.
std::vector<Variable> visibleVariables_{};
PrefixMap prefixMap_{};
std::string baseIri_ = "";
// We need to remember the prologue (prefix declarations) when we encounter it
// because we need it when we encounter a SERVICE query. When there is no
// prologue, this string simply remains empty.
Expand Down Expand Up @@ -121,8 +122,11 @@ class SparqlQleverVisitor {
// ___________________________________________________________________________
void visit(Parser::PrologueContext* ctx);

// Returns true if the given string is an absolute IRI.
static bool isAbsoluteIri(std::string_view);

// ___________________________________________________________________________
[[noreturn]] static void visit(const Parser::BaseDeclContext* ctx);
void visit(Parser::BaseDeclContext* ctx);

// ___________________________________________________________________________
void visit(Parser::PrefixDeclContext* ctx);
Expand Down Expand Up @@ -469,7 +473,7 @@ class SparqlQleverVisitor {

TripleComponent::Iri visit(Parser::IriContext* ctx);

static string visit(Parser::IrirefContext* ctx);
string visit(Parser::IrirefContext* ctx) const;

string visit(Parser::PrefixedNameContext* ctx);

Expand Down
34 changes: 34 additions & 0 deletions test/SparqlParserTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1349,3 +1349,37 @@ TEST(ParserTest, HandlesSurrogatesCorrectly) {
// So writing unit tests for these cases is not possible without creating
// semi-invalid UTF-8 strings.
}

// _____________________________________________________________________________
TEST(ParserTest, BaseDeclaration) {
// Simple case
auto query1 = SparqlParser::parseQuery(
"BASE <http://example.org/> SELECT * WHERE { ?s <p> <test> }");
EXPECT_EQ(getFirstTriple(query1),
"{s: ?s, p: <http://example.org/p>, o: <http://example.org/test>}");
// Relative and absolute IRIs mixed
auto query2 = SparqlParser::parseQuery(
"BASE <http://example.org/> "
"SELECT * WHERE { ?s <p> <http://other.example.org/p> }");
EXPECT_EQ(
getFirstTriple(query2),
"{s: ?s, p: <http://example.org/p>, o: <http://other.example.org/p>}");

// Cascading prefixes
auto query3 = SparqlParser::parseQuery(
"BASE <http://example.org/> "
"PREFIX ex1: <ex1/> "
"BASE <http://other.example.org/> "
"PREFIX ex2: <ex2/> "
"BASE <http://alternative.example.org/> "
"SELECT * WHERE { ex2:hello <world> ex1:test }");
EXPECT_EQ(getFirstTriple(query3),
"{s: <http://other.example.org/ex2/hello>,"
" p: <http://alternative.example.org/world>,"
" o: <http://example.org/ex1/test>}");

AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE(
SparqlParser::parseQuery("BASE <http://example.com> BASE <relative> "
"SELECT * WHERE { ?s ?p ?o }"),
::testing::HasSubstr("absolute IRI"), InvalidSparqlQueryException);
}
Loading