Skip to content

Commit

Permalink
arbitrary geometry types and areas dont need to be approximated as po…
Browse files Browse the repository at this point in the history
…ints
  • Loading branch information
Jonathan24680 committed Jan 25, 2025
1 parent fc1a1fe commit ec75e89
Show file tree
Hide file tree
Showing 5 changed files with 217 additions and 49 deletions.
4 changes: 4 additions & 0 deletions src/engine/SpatialJoin.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,10 @@ class SpatialJoin : public Operation {
return childRight_;
}

PreparedSpatialJoinParams onlyForTestingGetPrepareJoin() const {
return prepareJoin();
}

private:
// helper function to generate a variable to column map from `childRight_`
// that only contains the columns selected by `config_.payloadVariables_`
Expand Down
118 changes: 73 additions & 45 deletions src/engine/SpatialJoinAlgorithms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,21 +50,48 @@ std::string SpatialJoinAlgorithms::betweenQuotes(
}

Check warning on line 50 in src/engine/SpatialJoinAlgorithms.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoinAlgorithms.cpp#L49-L50

Added lines #L49 - L50 were not covered by tests
}

struct DistanceVisitor : public boost::static_visitor<double> {
template <typename Geometry1, typename Geometry2>
double operator()(const Geometry1& geom1, const Geometry2& geom2) const {
return bg::distance(geom1, geom2);
}
};

// ____________________________________________________________________________
Id SpatialJoinAlgorithms::computeDist(const IdTable* idTableLeft,
const IdTable* idTableRight,
size_t rowLeft, size_t rowRight,
ColumnIndex leftPointCol,
ColumnIndex rightPointCol) const {
auto getAreaString = [&](const IdTable* idtable, size_t row, size_t col) {
return betweenQuotes(ExportQueryExecutionTrees::idToStringAndType(
qec_->getIndex(), idtable->at(row, col), {})
.value()
.first);
};

auto getAreaOrPointGeometry =
[&](const IdTable* idtable, size_t row, size_t col,
std::optional<GeoPoint> point) -> std::optional<AnyGeometry> {
AnyGeometry geometry;
try {
if (!point) {
boost::geometry::read_wkt(getAreaString(idtable, row, col), geometry);
} else {
geometry = Point(point.value().getLng(), point.value().getLat());
}

Check warning on line 82 in src/engine/SpatialJoinAlgorithms.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoinAlgorithms.cpp#L81-L82

Added lines #L81 - L82 were not covered by tests
} catch (...) {
return std::nullopt;
}

Check warning on line 85 in src/engine/SpatialJoinAlgorithms.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoinAlgorithms.cpp#L84-L85

Added lines #L84 - L85 were not covered by tests
return geometry;
};

// for now we need to get the data from the disk, but in the future, this
// information will be stored in an ID, just like GeoPoint
auto getAreaPoint = [&](const IdTable* idtable, size_t row,
size_t col) -> std::optional<GeoPoint> {
std::string areastring =
betweenQuotes(ExportQueryExecutionTrees::idToStringAndType(
qec_->getIndex(), idtable->at(row, col), {})
.value()
.first);
std::string areastring = getAreaString(idtable, row, col);

Box areaBox;
try {
areaBox = calculateBoundingBoxOfArea(areastring);
Expand All @@ -73,29 +100,41 @@ Id SpatialJoinAlgorithms::computeDist(const IdTable* idTableLeft,
// gets printed at another place and the point/area just gets skipped
return std::nullopt;
}
if (useMidpointForAreas_) {
Point p = calculateMidpointOfBox(areaBox);
return GeoPoint(p.get<1>(), p.get<0>());
} else {
AD_FAIL(); // TODO not yet implemented
}
Point p = calculateMidpointOfBox(areaBox);
return GeoPoint(p.get<1>(), p.get<0>());
};

auto point1 = getPoint(idTableLeft, rowLeft, leftPointCol);
if (!point1) {
point1 = getAreaPoint(idTableLeft, rowLeft, leftPointCol);
}

auto point2 = getPoint(idTableRight, rowRight, rightPointCol);
if (!point2) {
point2 = getAreaPoint(idTableRight, rowRight, rightPointCol);
}
if (useMidpointForAreas_) {
if (!point1) {
point1 = getAreaPoint(idTableLeft, rowLeft, leftPointCol);
}

if (!point2) {
point2 = getAreaPoint(idTableRight, rowRight, rightPointCol);
}

if (!point1.has_value() || !point2.has_value()) {
return Id::makeUndefined();
if (!point1.has_value() || !point2.has_value()) {
return Id::makeUndefined();
}
return Id::makeFromDouble(
ad_utility::detail::wktDistImpl(point1.value(), point2.value()));
} else {
auto geometry1 =
getAreaOrPointGeometry(idTableLeft, rowLeft, leftPointCol, point1);
auto geometry2 =
getAreaOrPointGeometry(idTableRight, rowRight, rightPointCol, point2);
if (!geometry1.has_value() || !geometry2.has_value()) {
return Id::makeUndefined();
}

Check warning on line 130 in src/engine/SpatialJoinAlgorithms.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoinAlgorithms.cpp#L129-L130

Added lines #L129 - L130 were not covered by tests
// convert to m and return. Note that the 78630 is an approximation, which
// does not provide accurate results for the poles.
return Id::makeFromDouble(boost::apply_visitor(DistanceVisitor(),
geometry1.value(),
geometry2.value()) *
78630);
}
return Id::makeFromDouble(
ad_utility::detail::wktDistImpl(point1.value(), point2.value()));
}

// ____________________________________________________________________________
Expand Down Expand Up @@ -489,32 +528,21 @@ std::array<bool, 2> SpatialJoinAlgorithms::isAPoleTouched(
return std::array{northPoleReached, southPoleReached};
}

struct BoundingBoxVisitor : public boost::static_visitor<Box> {
template <typename Geometry>
Box operator()(const Geometry& geometry) const {
Box box;
bg::envelope(geometry, box);
return box;
}
};

// ____________________________________________________________________________
Box SpatialJoinAlgorithms::calculateBoundingBoxOfArea(
const std::string& wktString) const {
Polygon polygon;
boost::geometry::read_wkt(wktString, polygon);
double minLng = std::numeric_limits<double>::infinity();
double maxLng = -std::numeric_limits<double>::infinity();
double minLat = std::numeric_limits<double>::infinity();
double maxLat = -std::numeric_limits<double>::infinity();
for (const auto& point : polygon.outer()) {
double lng = boost::geometry::get<0>(point);
double lat = boost::geometry::get<1>(point);
if (lng < minLng) {
minLng = lng;
}
if (lng > maxLng) {
maxLng = lng;
}
if (lat < minLat) {
minLat = lat;
}
if (lat > maxLat) {
maxLat = lat;
}
}
return Box(Point(minLng, minLat), Point(maxLng, maxLat));
AnyGeometry geometry;
boost::geometry::read_wkt(wktString, geometry);
return boost::apply_visitor(BoundingBoxVisitor(), geometry);
}

// ____________________________________________________________________________
Expand Down
19 changes: 19 additions & 0 deletions src/engine/SpatialJoinAlgorithms.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ using Box = bg::model::box<Point>;
using Value = std::pair<Box, size_t>;
using Polygon = boost::geometry::model::polygon<
boost::geometry::model::d2::point_xy<double>>;
using Linestring = bg::model::linestring<Point>;
using MultiPoint = bg::model::multi_point<Point>;
using MultiLinestring = bg::model::multi_linestring<Linestring>;
using MultiPolygon = bg::model::multi_polygon<Polygon>;
using AnyGeometry = boost::variant<Point, Linestring, Polygon, MultiPoint,
MultiLinestring, MultiPolygon>;
} // namespace BoostGeometryNamespace

class SpatialJoinAlgorithms {
Expand Down Expand Up @@ -58,6 +64,19 @@ class SpatialJoinAlgorithms {
return calculateMidpointOfBox(box);
}

void setUseMidpointForAreas_(bool useMidpointForAreas) {
useMidpointForAreas_ = useMidpointForAreas;
}

Id OnlyForTestingWrapperComputeDist(const IdTable* idTableLeft,
const IdTable* idTableRight,
size_t rowLeft, size_t rowRight,
ColumnIndex leftPointCol,
ColumnIndex rightPointCol) const {
return computeDist(idTableLeft, idTableRight, rowLeft, rowRight,
leftPointCol, rightPointCol);
}

double OnlyForTestingWrapperGetMaxDistFromMidpointToAnyPointInsideTheBox(
const BoostGeometryNamespace::Box& box,
std::optional<BoostGeometryNamespace::Point> midpoint =
Expand Down
114 changes: 114 additions & 0 deletions test/engine/SpatialJoinAlgorithmsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1514,6 +1514,120 @@ TEST(SpatialJoin, getMaxDistFromMidpointToAnyPointInsideTheBox) {
area_statue, midpoint_statue));
}

QueryExecutionContext* getAllGeometriesQEC() {
auto addRow = [](std::string& kg, std::string nr, std::string wktStr) {
kg += absl::StrCat("<geometry" + nr + "> <asWKT> \"", wktStr, "\".\n");
};
// each object (except for the point) has its leftmost coordinate at an
// integer number and its rightmost coordinate 0.5 units further right. The
// y coordinate will be 0 for those case. All other points are inbetween these
// two points with different y coordinates. The reason for that is, that it
// is very easy to calculate the shortest distance between two geometries.
std::string point = "POINT(1.5 0)";
std::string linestring = "LINESTRING(2.0 0, 2.2 1, 2.5 0)";
std::string polygon = "POLYGON((3.0 0, 3.1 1, 3.2 2, 3.5 0))";
std::string multiPoint = "MULTIPOINT((4.0 0), (4.2 1), (4.5 0))";
std::string multiLinestring =
"MULTILINESTRING((5.0 0, 5.2 1, 5.5 1), (5.1 3, 5.5 0))";
std::string multiPolygon =
"MULTIPOLYGON(((6.0 0, 6.1 1, 6.3 5, 6.4 2)), ((6.2 1, 6.3 4, 6.4 3, 6.5 "
"0)))";

std::string kg = ""; // tiny test knowledge graph for all geometries
addRow(kg, "1", point);
addRow(kg, "2", linestring);
addRow(kg, "3", polygon);
addRow(kg, "4", multiPoint);
addRow(kg, "5", multiLinestring);
addRow(kg, "6", multiPolygon);

auto qec = ad_utility::testing::getQec(kg, true, true, false, 16_MB, false,
true, std::nullopt, 10_kB);
return qec;
}

TEST(SpatialJoin, areaFormat) {
auto qec = getAllGeometriesQEC();
auto leftChild =
buildIndexScan(qec, {"?geo1", std::string{"<asWKT>"}, "?obj1"});
auto rightChild =
buildIndexScan(qec, {"?geo2", std::string{"<asWKT>"}, "?obj2"});

std::shared_ptr<QueryExecutionTree> spatialJoinOperation =
ad_utility::makeExecutionTree<SpatialJoin>(
qec,
SpatialJoinConfiguration{MaxDistanceConfig(100000000),
Variable{"?obj1"}, Variable{"?obj2"}},
leftChild, rightChild);

std::shared_ptr<Operation> op = spatialJoinOperation->getRootOperation();
SpatialJoin* spatialJoin = static_cast<SpatialJoin*>(op.get());
spatialJoin->selectAlgorithm(SpatialJoinAlgorithm::BOUNDING_BOX);
auto res = spatialJoin->getResult();
// if all rows can be parsed correctly, the result should be the cross
// product. (Lines which can't be parsed will be ignored (and a warning gets
// printed) and therefore the cross product of all parsed lines would be
// smaller then 36)
ASSERT_EQ(res->idTable().numRows(), 36);
}

TEST(SpatialJoin, trueAreaDistance) {
auto testDist = [](QueryExecutionContext* qec, std::string nr1,
std::string nr2, double distance) {
auto makeIndexScan = [&](std::string nr) {
auto subject = absl::StrCat("<geometry", nr, ">");
auto objStr = absl::StrCat("?obj", nr);
TripleComponent object{Variable{objStr}};
return ad_utility::makeExecutionTree<IndexScan>(
qec, Permutation::Enum::PSO,
SparqlTriple{TripleComponent::Iri::fromIriref(subject), "<asWKT>",
object});
};
auto scan1 = makeIndexScan(nr1);
auto scan2 = makeIndexScan(nr2);
auto var1 = absl::StrCat("?obj", nr1);
auto var2 = absl::StrCat("?obj", nr2);

std::shared_ptr<QueryExecutionTree> spatialJoinOperation =
ad_utility::makeExecutionTree<SpatialJoin>(
qec,
SpatialJoinConfiguration{MaxDistanceConfig(100000000),
Variable{var1}, Variable{var2}},
scan1, scan2);

std::shared_ptr<Operation> op = spatialJoinOperation->getRootOperation();
SpatialJoin* spatialJoin = static_cast<SpatialJoin*>(op.get());
spatialJoin->selectAlgorithm(SpatialJoinAlgorithm::BOUNDING_BOX);
PreparedSpatialJoinParams params =
spatialJoin->onlyForTestingGetPrepareJoin();
SpatialJoinAlgorithms algorithms{
qec, params, spatialJoin->onlyForTestingGetConfig(), std::nullopt};
algorithms.setUseMidpointForAreas_(false);
auto distID = algorithms.OnlyForTestingWrapperComputeDist(
params.idTableLeft_, params.idTableRight_, 0, 0, params.leftJoinCol_,
params.rightJoinCol_);
ASSERT_EQ(distID.getDatatype(), Datatype::Double);
ASSERT_DOUBLE_EQ(distID.getDouble(), distance);
};
auto qec = getAllGeometriesQEC();
double conversionFactor = 78630; // convert to meters
testDist(qec, "1", "2", 0.5 * conversionFactor);
testDist(qec, "1", "3", 1.5 * conversionFactor);
testDist(qec, "1", "4", 2.5 * conversionFactor);
testDist(qec, "1", "5", 3.5 * conversionFactor);
testDist(qec, "1", "6", 4.5 * conversionFactor);
testDist(qec, "2", "3", 0.5 * conversionFactor);
testDist(qec, "2", "4", 1.5 * conversionFactor);
testDist(qec, "2", "5", 2.5 * conversionFactor);
testDist(qec, "2", "6", 3.5 * conversionFactor);
testDist(qec, "3", "4", 0.5 * conversionFactor);
testDist(qec, "3", "5", 1.5 * conversionFactor);
testDist(qec, "3", "6", 2.5 * conversionFactor);
testDist(qec, "4", "5", 0.5 * conversionFactor);
testDist(qec, "4", "6", 1.5 * conversionFactor);
testDist(qec, "5", "6", 0.5 * conversionFactor);
}

} // namespace boundingBox

} // namespace
11 changes: 7 additions & 4 deletions test/engine/SpatialJoinTestHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -349,12 +349,15 @@ inline std::shared_ptr<QueryExecutionTree> buildSmallChild(
// one of the wrapper classes needs a proper maxDistance, otherwise the wrapper
// can't be used to test the function
inline SpatialJoinAlgorithms getDummySpatialJoinAlgsForWrapperTesting(
size_t maxDist = 1000) {
auto qec = buildTestQEC();
size_t maxDist = 1000,
std::optional<QueryExecutionContext*> qec = std::nullopt) {
if (!qec) {
qec = buildTestQEC();
}
MaxDistanceConfig task{maxDist};
std::shared_ptr<QueryExecutionTree> spatialJoinOperation =
ad_utility::makeExecutionTree<SpatialJoin>(
qec,
qec.value(),
SpatialJoinConfiguration{task, Variable{"?point1"},
Variable{"?point2"}},
std::nullopt, std::nullopt);
Expand All @@ -373,7 +376,7 @@ inline SpatialJoinAlgorithms getDummySpatialJoinAlgsForWrapperTesting(
spatialJoin->getMaxDist(),
std::nullopt};

return {qec, params, spatialJoin->onlyForTestingGetConfig()};
return {qec.value(), params, spatialJoin->onlyForTestingGetConfig()};
}

} // namespace SpatialJoinTestHelpers

0 comments on commit ec75e89

Please sign in to comment.