Skip to content

Commit

Permalink
Support TBB backend (#82)
Browse files Browse the repository at this point in the history
* introduce gtsam_points/config.hpp

* tbb backend

* tbb for preprocess
  • Loading branch information
koide3 authored Oct 1, 2024
1 parent c49a3e8 commit b9cd0da
Show file tree
Hide file tree
Showing 11 changed files with 160 additions and 57 deletions.
4 changes: 3 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ endif()

# GPU-related
if(BUILD_WITH_CUDA)
add_definitions(-DBUILD_GTSAM_POINTS_GPU)
if(NOT GTSAM_POINTS_USE_CUDA)
message(FATAL_ERROR "gtsam_points is not built with CUDA!!")
endif()
set(GPU_SRCS src/glim/odometry/odometry_estimation_gpu.cpp)
endif()

Expand Down
3 changes: 3 additions & 0 deletions include/glim/preprocess/cloud_preprocessor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,16 @@ class CloudPreprocessor {
virtual PreprocessedFrame::Ptr preprocess(const RawPoints::ConstPtr& raw_points);

private:
PreprocessedFrame::Ptr preprocess_impl(const RawPoints::ConstPtr& raw_points);
std::vector<int> find_neighbors(const Eigen::Vector4d* points, const int num_points, const int k) const;

private:
using Params = CloudPreprocessorParams;
Params params;

mutable std::mt19937 mt;

std::shared_ptr<void> tbb_task_arena;
};

} // namespace glim
50 changes: 44 additions & 6 deletions src/glim/common/cloud_covariance_estimation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,13 @@

#include <spdlog/spdlog.h>

#include <gtsam_points/config.hpp>
#include <gtsam_points/util/parallelism.hpp>

#ifdef GTSAM_POINTS_USE_TBB
#include <tbb/parallel_for.h>
#endif

namespace glim {

CloudCovarianceEstimation::CloudCovarianceEstimation(const int num_threads) : regularization_method(RegularizationMethod::PLANE), num_threads(num_threads) {}
Expand Down Expand Up @@ -47,18 +54,30 @@ void CloudCovarianceEstimation::estimate(
assert(k_correspondences * points.size() == neighbors.size());
assert(k_neighbors <= k_correspondences);

// Precompute pt * pt.transpose()
std::vector<Eigen::Matrix4d> pt_cross(points.size());
#pragma omp parallel for num_threads(num_threads) schedule(guided, 8)
for (int i = 0; i < points.size(); i++) {
pt_cross[i] = points[i] * points[i].transpose();
if (gtsam_points::is_omp_default()) {
#pragma omp parallel for num_threads(num_threads) schedule(guided, 64)
for (int i = 0; i < points.size(); i++) {
pt_cross[i] = points[i] * points[i].transpose();
}
} else {
#ifdef GTSAM_POINTS_USE_TBB
tbb::parallel_for(tbb::blocked_range<int>(0, points.size(), 64), [&](const tbb::blocked_range<int>& range) {
for (int i = range.begin(); i < range.end(); i++) {
pt_cross[i] = points[i] * points[i].transpose();
}
});
#else
std::cerr << "error : TBB is not enabled" << std::endl;
abort();
#endif
}

normals.resize(points.size());
covs.resize(points.size());

// Calculate covariances
#pragma omp parallel for num_threads(num_threads) schedule(guided, 8)
for (int i = 0; i < points.size(); i++) {
const auto calc_cov = [&](int i) {
Eigen::Vector4d sum_points = Eigen::Vector4d::Zero();
Eigen::Matrix4d sum_cross = Eigen::Matrix4d::Zero();

Expand All @@ -80,6 +99,25 @@ void CloudCovarianceEstimation::estimate(
if (points[i].dot(normals[i]) > 0.0) {
normals[i] = -normals[i];
}
};

// Calculate covariances
if (gtsam_points::is_omp_default()) {
#pragma omp parallel for num_threads(num_threads) schedule(guided, 8)
for (int i = 0; i < points.size(); i++) {
calc_cov(i);
}
} else {
#ifdef GTSAM_POINTS_USE_TBB
tbb::parallel_for(tbb::blocked_range<int>(0, points.size(), 8), [&](const tbb::blocked_range<int>& range) {
for (int i = range.begin(); i < range.end(); i++) {
calc_cov(i);
}
});
#else
std::cerr << "error : TBB is not enabled" << std::endl;
abort();
#endif
}
}

Expand Down
19 changes: 10 additions & 9 deletions src/glim/mapping/global_mapping.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <gtsam/nonlinear/Values.h>
#include <gtsam/nonlinear/NonlinearFactorGraph.h>

#include <gtsam_points/config.hpp>
#include <gtsam_points/types/point_cloud_cpu.hpp>
#include <gtsam_points/types/point_cloud_gpu.hpp>
#include <gtsam_points/types/gaussian_voxelmap_cpu.hpp>
Expand Down Expand Up @@ -72,7 +73,7 @@ GlobalMappingParams::GlobalMappingParams() {
GlobalMappingParams::~GlobalMappingParams() {}

GlobalMapping::GlobalMapping(const GlobalMappingParams& params) : params(params) {
#ifndef BUILD_GTSAM_POINTS_GPU
#ifndef GTSAM_POINTS_USE_CUDA
if (params.enable_gpu) {
logger->error("GPU-based factors cannot be used because GLIM is built without GPU option!!");
}
Expand All @@ -97,7 +98,7 @@ GlobalMapping::GlobalMapping(const GlobalMappingParams& params) : params(params)
isam2.reset(new gtsam_points::ISAM2ExtDummy(isam2_params));
}

#ifdef BUILD_GTSAM_POINTS_GPU
#ifdef GTSAM_POINTS_USE_CUDA
stream_buffer_roundrobin = std::make_shared<gtsam_points::StreamTempBufferRoundRobin>(64);
#endif

Expand Down Expand Up @@ -233,7 +234,7 @@ void GlobalMapping::insert_submap(int current, const SubMap::Ptr& submap) {
subsampled_submap = gtsam_points::random_sampling(submap->frame, params.randomsampling_rate, mt);
}

#ifdef BUILD_GTSAM_POINTS_GPU
#ifdef GTSAM_POINTS_USE_CUDA
if (params.enable_gpu && !submap->frame->points_gpu) {
submap->frame = gtsam_points::PointCloudGPU::clone(*submap->frame);
}
Expand Down Expand Up @@ -309,7 +310,7 @@ void GlobalMapping::find_overlapping_submaps(double min_overlap) {

if (false) {
}
#ifdef BUILD_GTSAM_POINTS_GPU
#ifdef GTSAM_POINTS_USE_CUDA
else if (std::dynamic_pointer_cast<gtsam_points::GaussianVoxelMapGPU>(submaps[i]->voxelmaps.back()) && subsampled_submaps[j]->points_gpu) {
const auto stream_buffer = std::any_cast<std::shared_ptr<gtsam_points::StreamTempBufferRoundRobin>>(stream_buffer_roundrobin)->get_stream_buffer();
const auto& stream = stream_buffer.first;
Expand Down Expand Up @@ -431,7 +432,7 @@ boost::shared_ptr<gtsam::NonlinearFactorGraph> GlobalMapping::create_matching_co
factors->emplace_shared<gtsam_points::IntegratedVGICPFactor>(X(i), X(current), voxelmap, subsampled_submaps[current]);
}
}
#ifdef BUILD_GTSAM_POINTS_GPU
#ifdef GTSAM_POINTS_USE_CUDA
else if (params.registration_error_factor_type == "VGICP_GPU") {
const auto stream_buffer = std::any_cast<std::shared_ptr<gtsam_points::StreamTempBufferRoundRobin>>(stream_buffer_roundrobin)->get_stream_buffer();
const auto& stream = stream_buffer.first;
Expand Down Expand Up @@ -480,7 +481,7 @@ void GlobalMapping::save(const std::string& path) {

for (const auto& factor : isam2->getFactorsUnsafe()) {
bool serializable = !boost::dynamic_pointer_cast<gtsam_points::IntegratedMatchingCostFactor>(factor)
#ifdef BUILD_GTSAM_POINTS_GPU
#ifdef GTSAM_POINTS_USE_CUDA
&& !boost::dynamic_pointer_cast<gtsam_points::IntegratedVGICPFactorGPU>(factor)
#endif
;
Expand Down Expand Up @@ -513,7 +514,7 @@ void GlobalMapping::save(const std::string& path) {
} else if (boost::dynamic_pointer_cast<gtsam_points::IntegratedVGICPFactor>(factor.second)) {
type = "vgicp";
}
#ifdef BUILD_GTSAM_POINTS_GPU
#ifdef GTSAM_POINTS_USE_CUDA
else if (boost::dynamic_pointer_cast<gtsam_points::IntegratedVGICPFactorGPU>(factor.second)) {
type = "vgicp_gpu";
}
Expand Down Expand Up @@ -612,7 +613,7 @@ bool GlobalMapping::load(const std::string& path) {
subsampled_submaps[i] = subsampled_submap;

if (params.enable_gpu) {
#ifdef BUILD_GTSAM_POINTS_GPU
#ifdef GTSAM_POINTS_USE_CUDA
subsampled_submaps[i] = gtsam_points::PointCloudGPU::clone(*subsampled_submaps[i]);

for (int j = 0; j < params.submap_voxelmap_levels; j++) {
Expand Down Expand Up @@ -662,7 +663,7 @@ bool GlobalMapping::load(const std::string& path) {

if (type == "vgicp" || type == "vgicp_gpu") {
if (params.enable_gpu) {
#ifdef BUILD_GTSAM_POINTS_GPU
#ifdef GTSAM_POINTS_USE_CUDA
const auto stream_buffer = std::any_cast<std::shared_ptr<gtsam_points::StreamTempBufferRoundRobin>>(stream_buffer_roundrobin)->get_stream_buffer();
const auto& stream = stream_buffer.first;
const auto& buffer = stream_buffer.second;
Expand Down
62 changes: 35 additions & 27 deletions src/glim/mapping/global_mapping_pose_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#include <gtsam_points/optimizers/isam2_ext.hpp>
#include <gtsam_points/optimizers/isam2_ext_dummy.hpp>
#include <gtsam_points/optimizers/levenberg_marquardt_ext.hpp>
#include <gtsam_points/util/easy_profiler.hpp>
#include <gtsam_points/util/parallelism.hpp>

#include <glim/util/config.hpp>
#include <glim/util/serialization.hpp>
Expand Down Expand Up @@ -370,14 +370,12 @@ void GlobalMappingPoseGraph::loop_detection_task() {
candidates_buffer.erase(candidates_buffer.begin(), candidates_buffer.begin() + eval_count);
}

std::vector<double> inlier_fractions(candidates.size());
std::vector<double> inlier_fractions(candidates.size(), 0.0);
std::vector<gtsam::Pose3> T_target_source(candidates.size());

// Evaluate loop candidates in parallel.
#pragma omp parallel for num_threads(params.num_threads) schedule(dynamic)
for (int i = 0; i < candidates.size(); i++) {
const auto evaluate_candidate = [&](int i) {
if (kill_switch) {
continue;
return;
}

const auto candidate = candidates[i];
Expand All @@ -399,16 +397,7 @@ void GlobalMappingPoseGraph::loop_detection_task() {

gtsam_points::LevenbergMarquardtExtParams lm_params;
lm_params.setMaxIterations(10);

#ifdef GTSAM_USE_TBB
auto arena = static_cast<tbb::task_arena*>(tbb_task_arena.get());
arena->execute([&] {
#endif
values = gtsam_points::LevenbergMarquardtOptimizerExt(graph, values, lm_params).optimize();

#ifdef GTSAM_USE_TBB
});
#endif
values = gtsam_points::LevenbergMarquardtOptimizerExt(graph, values, lm_params).optimize();

error = factor->error(values);
inlier_fraction = factor->inlier_fraction();
Expand All @@ -421,28 +410,47 @@ void GlobalMappingPoseGraph::loop_detection_task() {
gtsam_points::LevenbergMarquardtExtParams lm_params;
lm_params.setMaxIterations(10);

#ifdef GTSAM_USE_TBB
auto arena = static_cast<tbb::task_arena*>(tbb_task_arena.get());
arena->execute([&] {
#endif
values = gtsam_points::LevenbergMarquardtOptimizerExt(graph, values, lm_params).optimize();

#ifdef GTSAM_USE_TBB
});
#endif
values = gtsam_points::LevenbergMarquardtOptimizerExt(graph, values, lm_params).optimize();

error = factor->error(values);
inlier_fraction = factor->inlier_fraction();
} else {
logger->warn("unknown registration type: {}", params.registration_type);
continue;
return;
}

logger->debug("target={}, source={}, error={}, inlier_fraction={}", target->submap->id, source->submap->id, error, inlier_fraction);

inlier_fractions[i] = inlier_fraction;
T_target_source[i] = values.at<gtsam::Pose3>(0);
}
};

// Evaluate loop candidates in parallel.
#ifdef GTSAM_USE_TBB
auto arena = static_cast<tbb::task_arena*>(tbb_task_arena.get());
arena->execute([&] {
#endif
if (gtsam_points::is_omp_default()) {
#pragma omp parallel for num_threads(params.num_threads) schedule(dynamic)
for (int i = 0; i < candidates.size(); i++) {
evaluate_candidate(i);
}
} else {
#ifdef GTSAM_POINTS_USE_TBB
tbb::parallel_for(tbb::blocked_range<int>(0, candidates.size(), 2), [&](const tbb::blocked_range<int>& range) {
for (int i = range.begin(); i < range.end(); i++) {
evaluate_candidate(i);
}
});
#else
std::cerr << "error : TBB is not enabled" << std::endl;
abort();
#endif
}

#ifdef GTSAM_USE_TBB
});
#endif

// Check the matching results.
std::vector<gtsam::NonlinearFactor::shared_ptr> factors;
Expand Down
11 changes: 6 additions & 5 deletions src/glim/mapping/sub_mapping.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <gtsam/nonlinear/LinearContainerFactor.h>
#include <gtsam/nonlinear/LevenbergMarquardtOptimizer.h>

#include <gtsam_points/config.hpp>
#include <gtsam_points/types/point_cloud_cpu.hpp>
#include <gtsam_points/types/point_cloud_gpu.hpp>
#include <gtsam_points/types/gaussian_voxelmap_cpu.hpp>
Expand Down Expand Up @@ -81,7 +82,7 @@ SubMapping::SubMapping(const SubMappingParams& params) : params(params) {
values.reset(new gtsam::Values);
graph.reset(new gtsam::NonlinearFactorGraph);

#ifdef BUILD_GTSAM_POINTS_GPU
#ifdef GTSAM_POINTS_USE_CUDA
stream = std::make_shared<gtsam_points::CUDAStream>();
stream_buffer_roundrobin = std::make_shared<gtsam_points::StreamTempBufferRoundRobin>(8);
#endif
Expand Down Expand Up @@ -159,7 +160,7 @@ void SubMapping::insert_frame(const EstimationFrame::ConstPtr& odom_frame_) {
}
}

#ifdef BUILD_GTSAM_POINTS_GPU
#ifdef GTSAM_POINTS_USE_CUDA
if (params.enable_gpu && !odom_frame->frame->points_gpu) {
if (params.enable_gpu) {
auto stream = std::static_pointer_cast<gtsam_points::CUDAStream>(this->stream);
Expand Down Expand Up @@ -289,7 +290,7 @@ void SubMapping::insert_frame(const EstimationFrame::ConstPtr& odom_frame_) {
graph->emplace_shared<gtsam_points::IntegratedVGICPFactor>(X(keyframe_indices[i]), X(current), voxelmap, keyframes.back()->frame);
}
}
#ifdef BUILD_GTSAM_POINTS_GPU
#ifdef GTSAM_POINTS_USE_CUDA
else if (params.registration_error_factor_type == "VGICP_GPU") {
auto roundrobin = std::static_pointer_cast<gtsam_points::StreamTempBufferRoundRobin>(stream_buffer_roundrobin);
auto stream_buffer = roundrobin->get_stream_buffer();
Expand Down Expand Up @@ -379,7 +380,7 @@ void SubMapping::insert_keyframe(const int current, const EstimationFrame::Const
*keyframe = *odom_frame;

if (params.enable_gpu) {
#ifdef BUILD_GTSAM_POINTS_GPU
#ifdef GTSAM_POINTS_USE_CUDA
auto stream = std::static_pointer_cast<gtsam_points::CUDAStream>(this->stream);
keyframe->frame = gtsam_points::PointCloudGPU::clone(*subsampled_frame, *stream);
keyframe->voxelmaps.clear();
Expand Down Expand Up @@ -477,7 +478,7 @@ SubMap::Ptr SubMapping::create_submap(bool force_create) const {
}

// TODO: improve merging process
#ifdef BUILD_GTSAM_POINTS_GPU
#ifdef GTSAM_POINTS_USE_CUDA
if (params.enable_gpu) {
// submap->frame = gtsam_points::merge_frames_gpu(poses_to_merge, keyframes_to_merge, submap_downsample_resolution);
}
Expand Down
Loading

0 comments on commit b9cd0da

Please sign in to comment.