Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve benchmarks #312

Open
wants to merge 20 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ venv/

# Ignored files
*.bak
benchmarks*.xml
gmon.out
perf.*
compile_commands.json
Expand Down
50 changes: 46 additions & 4 deletions benchmark/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,56 @@
#
# SPDX-License-Identifier: MIT

add_subdirectory(bin_table_coords_to_id)
add_subdirectory(bin_table_id_to_coords)
find_package(Catch2 REQUIRED)
find_package(Filesystem REQUIRED)
find_package(FMT REQUIRED)
include(CTest)
include(Catch)

add_executable(hictk_benchmark)

target_sources(hictk_benchmark PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/main.cpp")

add_subdirectory(bin_table)
add_subdirectory(bins)
add_subdirectory(cooler_creation)
add_subdirectory(cooler_traversal)
add_subdirectory(fetch_and_sum)
add_subdirectory(formatting)
add_subdirectory(hdf5_iterator)
add_subdirectory(hic_creation)
add_subdirectory(hic_traversal)
add_subdirectory(pixel_formatting)
add_subdirectory(interaction_fetching)
add_subdirectory(pixel_merger)
add_subdirectory(pixels)
add_subdirectory(reference)
add_subdirectory(zoomify)

target_include_directories(hictk_benchmark PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/include/")

target_link_libraries(
hictk_benchmark
PRIVATE
hictk_project_warnings
hictk_project_options
PUBLIC
hictk::cooler
hictk::file
hictk::hic
)

target_link_system_libraries(
hictk_benchmark
PRIVATE fmt::fmt-header-only std::filesystem
PUBLIC Catch2::Catch2
)

catch_discover_tests(
hictk_benchmark
TEST_SPEC
WORKING_DIRECTORY
"${PROJECT_SOURCE_DIR}"
OUTPUT_DIR
"${CMAKE_CURRENT_BINARY_DIR}/Benchmarking/"
EXTRA_ARGS
--success
)
12 changes: 12 additions & 0 deletions benchmark/bin_table/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Copyright (C) 2024 Roberto Rossini <roberros@uio.no>
#
# SPDX-License-Identifier: MIT

target_sources(
hictk_benchmark
PRIVATE
"${CMAKE_CURRENT_SOURCE_DIR}/bin_ids_to_genomic_coords.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/genomic_coords_to_bin_ids.cpp"
)

target_link_libraries(hictk_benchmark PUBLIC hictk::bin_table)
64 changes: 64 additions & 0 deletions benchmark/bin_table/bin_ids_to_genomic_coords.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// Copyright (C) 2024 Roberto Rossini <roberros@uio.no>
//
// SPDX-License-Identifier: MIT

#include <fmt/format.h>

#include <catch2/benchmark/catch_benchmark.hpp>
#include <catch2/catch_test_macros.hpp>
#include <cstddef>
#include <cstdint>
#include <hictk/bin_table.hpp>
#include <hictk/bin_table_fixed.hpp>
#include <vector>

#include "./common.hpp"
#include "hictk/benchmark/hg38.hpp"

namespace hictk::benchmark {

// NOLINTBEGIN(*-avoid-magic-numbers)
TEST_CASE("BinTable::at(bin_id)") {
const std::vector<std::uint32_t> resolutions{10, 100, 1'000, 10'000, 100'000, 1'000'000};

for (const auto &res : resolutions) {
BENCHMARK_ADVANCED(fmt::format(FMT_STRING("hg38 ({}bp)"), res))
(Catch::Benchmark::Chronometer meter) {
const BinTable bin_table{hg38.begin(), hg38.end(), res};
const auto bin_ids = generate_bin_ids(bin_table, static_cast<std::size_t>(meter.runs()));

meter.measure([&bin_table, &bin_ids](std::size_t i) { return bin_table.at(bin_ids[i]); });
};
}
}

TEST_CASE("BinTableFixed::at(bin_id)") {
const std::vector<std::uint32_t> resolutions{10, 100, 1'000, 10'000, 100'000, 1'000'000};

for (const auto &res : resolutions) {
BENCHMARK_ADVANCED(fmt::format(FMT_STRING("hg38 ({}bp)"), res))
(Catch::Benchmark::Chronometer meter) {
const BinTableFixed bin_table{hg38.begin(), hg38.end(), res};
const auto bin_ids = generate_bin_ids(bin_table, static_cast<std::size_t>(meter.runs()));

meter.measure([&bin_table, &bin_ids](std::size_t i) { return bin_table.at(bin_ids[i]); });
};
}
}

TEST_CASE("BinTableVariable::at(bin_id)") {
const std::vector<std::uint32_t> resolutions{5'000, 10'000, 100'000, 1'000'000};

for (const auto &res : resolutions) {
BENCHMARK_ADVANCED(fmt::format(FMT_STRING("hg38 ({}bp)"), res))
(Catch::Benchmark::Chronometer meter) {
const auto bin_table = generate_variable_bin_table(res);
const auto bin_ids = generate_bin_ids(bin_table, static_cast<std::size_t>(meter.runs()));

meter.measure([&bin_table, &bin_ids](std::size_t i) { return bin_table.at(bin_ids[i]); });
};
}
}
// NOLINTEND(*-avoid-magic-numbers)

} // namespace hictk::benchmark
82 changes: 82 additions & 0 deletions benchmark/bin_table/common.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
// Copyright (C) 2024 Roberto Rossini <roberros@uio.no>
//
// SPDX-License-Identifier: MIT

#pragma once

#include <algorithm>
#include <cstdint>
#include <random>
#include <string_view>
#include <vector>

#include "hictk/benchmark/hg38.hpp"
#include "hictk/bin_table_variable.hpp"
#include "hictk/chromosome.hpp"

namespace hictk::benchmark {

template <typename BinTable>
[[nodiscard]] inline std::vector<std::uint64_t> generate_bin_ids(const BinTable &bins,
std::size_t size) {
std::random_device rd{};
std::mt19937_64 rand_eng(rd());

std::vector<std::uint64_t> buff(size);
std::generate(buff.begin(), buff.end(), [&]() {
return std::uniform_int_distribution<std::uint64_t>{0, bins.size() - 1}(rand_eng);
});

return buff;
}

template <typename BinTable>
[[nodiscard]] inline auto generate_genomic_coords(const BinTable &bins, std::size_t size) {
std::random_device rd{};
std::mt19937_64 rand_eng(rd());

using Coord = std::pair<std::uint32_t, std::uint32_t>;
std::vector<Coord> buff(size);
std::generate(buff.begin(), buff.end(), [&]() {
const auto bin_id = std::uniform_int_distribution<std::uint64_t>{0, bins.size() - 1}(rand_eng);

const auto chrom = bins.at(bin_id).chrom();
const auto pos = std::uniform_int_distribution<std::uint32_t>{0, chrom.size() - 1}(rand_eng);

return std::make_pair(chrom.id(), pos);
});

return buff;
}

[[nodiscard]] inline hictk::BinTableVariable<std::uint32_t> generate_variable_bin_table(
std::uint32_t target_resolution) {
std::random_device rd{};
std::mt19937_64 rand_eng(rd());

const auto resolution_avg = static_cast<double>(target_resolution);
const auto resolution_std = std::max(10.0, resolution_avg / 10);

auto generate_bin_size = [&](const hictk::Chromosome &chrom, std::uint32_t pos) {
const auto bin_size =
std::normal_distribution<double>{resolution_avg, resolution_std}(rand_eng);
return static_cast<std::uint32_t>(
std::clamp(bin_size, 1.0, static_cast<double>(chrom.size() - pos)));
};

std::vector<std::uint32_t> start_pos{};
std::vector<std::uint32_t> end_pos{};

for (const auto &chrom : hg38) {
start_pos.push_back(0);
end_pos.push_back(start_pos.back() + generate_bin_size(chrom, start_pos.back()));
while (end_pos.back() < chrom.size()) {
start_pos.push_back(end_pos.back());
end_pos.push_back(start_pos.back() + generate_bin_size(chrom, start_pos.back()));
}
}

return {hictk::Reference{hg38.begin(), hg38.end()}, start_pos, end_pos};
}

} // namespace hictk::benchmark
73 changes: 73 additions & 0 deletions benchmark/bin_table/genomic_coords_to_bin_ids.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
// Copyright (C) 2024 Roberto Rossini <roberros@uio.no>
//
// SPDX-License-Identifier: MIT

#include <fmt/format.h>

#include <catch2/benchmark/catch_benchmark.hpp>
#include <catch2/catch_test_macros.hpp>
#include <cstddef>
#include <cstdint>
#include <hictk/bin_table.hpp>
#include <hictk/bin_table_fixed.hpp>
#include <vector>

#include "./common.hpp"
#include "hictk/benchmark/hg38.hpp"

namespace hictk::benchmark {

// NOLINTBEGIN(*-avoid-magic-numbers)
TEST_CASE("BinTable::at(chrom, pos)") {
const std::vector<std::uint32_t> resolutions{10, 100, 1'000, 10'000, 100'000, 1'000'000};

for (const auto &res : resolutions) {
BENCHMARK_ADVANCED(fmt::format(FMT_STRING("hg38 ({}bp)"), res))
(Catch::Benchmark::Chronometer meter) {
const BinTable bin_table{hg38.begin(), hg38.end(), res};
const auto coords =
generate_genomic_coords(bin_table, static_cast<std::size_t>(meter.runs()));

meter.measure([&bin_table, &coords](std::size_t i) {
return bin_table.at(coords[i].first, coords[i].second);
});
};
}
}

TEST_CASE("BinTableFixed::at(chrom, pos)") {
const std::vector<std::uint32_t> resolutions{10, 100, 1'000, 10'000, 100'000, 1'000'000};

for (const auto &res : resolutions) {
BENCHMARK_ADVANCED(fmt::format(FMT_STRING("hg38 ({}bp)"), res))
(Catch::Benchmark::Chronometer meter) {
const BinTableFixed bin_table{hg38.begin(), hg38.end(), res};
const auto coords =
generate_genomic_coords(bin_table, static_cast<std::size_t>(meter.runs()));

meter.measure([&bin_table, &coords](std::size_t i) {
return bin_table.at(coords[i].first, coords[i].second);
});
};
}
}

TEST_CASE("BinTableVariable::at(chrom, pos)") {
const std::vector<std::uint32_t> resolutions{5'000, 10'000, 100'000, 1'000'000};

for (const auto &res : resolutions) {
BENCHMARK_ADVANCED(fmt::format(FMT_STRING("hg38 ({}bp)"), res))
(Catch::Benchmark::Chronometer meter) {
const auto bin_table = generate_variable_bin_table(res);
const auto coords =
generate_genomic_coords(bin_table, static_cast<std::size_t>(meter.runs()));

meter.measure([&bin_table, &coords](std::size_t i) {
return bin_table.at(coords[i].first, coords[i].second);
});
};
}
}
// NOLINTEND(*-avoid-magic-numbers)

} // namespace hictk::benchmark
24 changes: 0 additions & 24 deletions benchmark/bin_table_coords_to_id/CMakeLists.txt

This file was deleted.

Loading
Loading