diff --git a/.gitignore b/.gitignore index c6272683..e9101d0d 100644 --- a/.gitignore +++ b/.gitignore @@ -23,6 +23,7 @@ venv/ # Ignored files *.bak +benchmarks*.xml gmon.out perf.* compile_commands.json diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 31f4e5b6..bd094ab2 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -2,14 +2,56 @@ # # SPDX-License-Identifier: MIT -add_subdirectory(bin_table_coords_to_id) -add_subdirectory(bin_table_id_to_coords) +find_package(Catch2 REQUIRED) +find_package(Filesystem REQUIRED) +find_package(FMT REQUIRED) +include(CTest) +include(Catch) + +add_executable(hictk_benchmark) + +target_sources(hictk_benchmark PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/main.cpp") + +add_subdirectory(bin_table) +add_subdirectory(bins) add_subdirectory(cooler_creation) add_subdirectory(cooler_traversal) -add_subdirectory(fetch_and_sum) +add_subdirectory(formatting) add_subdirectory(hdf5_iterator) add_subdirectory(hic_creation) add_subdirectory(hic_traversal) -add_subdirectory(pixel_formatting) +add_subdirectory(interaction_fetching) add_subdirectory(pixel_merger) +add_subdirectory(pixels) +add_subdirectory(reference) add_subdirectory(zoomify) + +target_include_directories(hictk_benchmark PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/include/") + +target_link_libraries( + hictk_benchmark + PRIVATE + hictk_project_warnings + hictk_project_options + PUBLIC + hictk::cooler + hictk::file + hictk::hic +) + +target_link_system_libraries( + hictk_benchmark + PRIVATE fmt::fmt-header-only std::filesystem + PUBLIC Catch2::Catch2 +) + +catch_discover_tests( + hictk_benchmark + TEST_SPEC + WORKING_DIRECTORY + "${PROJECT_SOURCE_DIR}" + OUTPUT_DIR + "${CMAKE_CURRENT_BINARY_DIR}/Benchmarking/" + EXTRA_ARGS + --success +) diff --git a/benchmark/bin_table/CMakeLists.txt b/benchmark/bin_table/CMakeLists.txt new file mode 100644 index 00000000..e90a71ac --- /dev/null +++ b/benchmark/bin_table/CMakeLists.txt @@ -0,0 +1,12 @@ +# Copyright (C) 2024 Roberto Rossini +# +# SPDX-License-Identifier: MIT + +target_sources( + hictk_benchmark + PRIVATE + "${CMAKE_CURRENT_SOURCE_DIR}/bin_ids_to_genomic_coords.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/genomic_coords_to_bin_ids.cpp" +) + +target_link_libraries(hictk_benchmark PUBLIC hictk::bin_table) diff --git a/benchmark/bin_table/bin_ids_to_genomic_coords.cpp b/benchmark/bin_table/bin_ids_to_genomic_coords.cpp new file mode 100644 index 00000000..90cbcf20 --- /dev/null +++ b/benchmark/bin_table/bin_ids_to_genomic_coords.cpp @@ -0,0 +1,64 @@ +// Copyright (C) 2024 Roberto Rossini +// +// SPDX-License-Identifier: MIT + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "./common.hpp" +#include "hictk/benchmark/hg38.hpp" + +namespace hictk::benchmark { + +// NOLINTBEGIN(*-avoid-magic-numbers) +TEST_CASE("BinTable::at(bin_id)") { + const std::vector resolutions{10, 100, 1'000, 10'000, 100'000, 1'000'000}; + + for (const auto &res : resolutions) { + BENCHMARK_ADVANCED(fmt::format(FMT_STRING("hg38 ({}bp)"), res)) + (Catch::Benchmark::Chronometer meter) { + const BinTable bin_table{hg38.begin(), hg38.end(), res}; + const auto bin_ids = generate_bin_ids(bin_table, static_cast(meter.runs())); + + meter.measure([&bin_table, &bin_ids](std::size_t i) { return bin_table.at(bin_ids[i]); }); + }; + } +} + +TEST_CASE("BinTableFixed::at(bin_id)") { + const std::vector resolutions{10, 100, 1'000, 10'000, 100'000, 1'000'000}; + + for (const auto &res : resolutions) { + BENCHMARK_ADVANCED(fmt::format(FMT_STRING("hg38 ({}bp)"), res)) + (Catch::Benchmark::Chronometer meter) { + const BinTableFixed bin_table{hg38.begin(), hg38.end(), res}; + const auto bin_ids = generate_bin_ids(bin_table, static_cast(meter.runs())); + + meter.measure([&bin_table, &bin_ids](std::size_t i) { return bin_table.at(bin_ids[i]); }); + }; + } +} + +TEST_CASE("BinTableVariable::at(bin_id)") { + const std::vector resolutions{5'000, 10'000, 100'000, 1'000'000}; + + for (const auto &res : resolutions) { + BENCHMARK_ADVANCED(fmt::format(FMT_STRING("hg38 ({}bp)"), res)) + (Catch::Benchmark::Chronometer meter) { + const auto bin_table = generate_variable_bin_table(res); + const auto bin_ids = generate_bin_ids(bin_table, static_cast(meter.runs())); + + meter.measure([&bin_table, &bin_ids](std::size_t i) { return bin_table.at(bin_ids[i]); }); + }; + } +} +// NOLINTEND(*-avoid-magic-numbers) + +} // namespace hictk::benchmark diff --git a/benchmark/bin_table/common.hpp b/benchmark/bin_table/common.hpp new file mode 100644 index 00000000..3def4333 --- /dev/null +++ b/benchmark/bin_table/common.hpp @@ -0,0 +1,82 @@ +// Copyright (C) 2024 Roberto Rossini +// +// SPDX-License-Identifier: MIT + +#pragma once + +#include +#include +#include +#include +#include + +#include "hictk/benchmark/hg38.hpp" +#include "hictk/bin_table_variable.hpp" +#include "hictk/chromosome.hpp" + +namespace hictk::benchmark { + +template +[[nodiscard]] inline std::vector generate_bin_ids(const BinTable &bins, + std::size_t size) { + std::random_device rd{}; + std::mt19937_64 rand_eng(rd()); + + std::vector buff(size); + std::generate(buff.begin(), buff.end(), [&]() { + return std::uniform_int_distribution{0, bins.size() - 1}(rand_eng); + }); + + return buff; +} + +template +[[nodiscard]] inline auto generate_genomic_coords(const BinTable &bins, std::size_t size) { + std::random_device rd{}; + std::mt19937_64 rand_eng(rd()); + + using Coord = std::pair; + std::vector buff(size); + std::generate(buff.begin(), buff.end(), [&]() { + const auto bin_id = std::uniform_int_distribution{0, bins.size() - 1}(rand_eng); + + const auto chrom = bins.at(bin_id).chrom(); + const auto pos = std::uniform_int_distribution{0, chrom.size() - 1}(rand_eng); + + return std::make_pair(chrom.id(), pos); + }); + + return buff; +} + +[[nodiscard]] inline hictk::BinTableVariable generate_variable_bin_table( + std::uint32_t target_resolution) { + std::random_device rd{}; + std::mt19937_64 rand_eng(rd()); + + const auto resolution_avg = static_cast(target_resolution); + const auto resolution_std = std::max(10.0, resolution_avg / 10); + + auto generate_bin_size = [&](const hictk::Chromosome &chrom, std::uint32_t pos) { + const auto bin_size = + std::normal_distribution{resolution_avg, resolution_std}(rand_eng); + return static_cast( + std::clamp(bin_size, 1.0, static_cast(chrom.size() - pos))); + }; + + std::vector start_pos{}; + std::vector end_pos{}; + + for (const auto &chrom : hg38) { + start_pos.push_back(0); + end_pos.push_back(start_pos.back() + generate_bin_size(chrom, start_pos.back())); + while (end_pos.back() < chrom.size()) { + start_pos.push_back(end_pos.back()); + end_pos.push_back(start_pos.back() + generate_bin_size(chrom, start_pos.back())); + } + } + + return {hictk::Reference{hg38.begin(), hg38.end()}, start_pos, end_pos}; +} + +} // namespace hictk::benchmark diff --git a/benchmark/bin_table/genomic_coords_to_bin_ids.cpp b/benchmark/bin_table/genomic_coords_to_bin_ids.cpp new file mode 100644 index 00000000..88539281 --- /dev/null +++ b/benchmark/bin_table/genomic_coords_to_bin_ids.cpp @@ -0,0 +1,73 @@ +// Copyright (C) 2024 Roberto Rossini +// +// SPDX-License-Identifier: MIT + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "./common.hpp" +#include "hictk/benchmark/hg38.hpp" + +namespace hictk::benchmark { + +// NOLINTBEGIN(*-avoid-magic-numbers) +TEST_CASE("BinTable::at(chrom, pos)") { + const std::vector resolutions{10, 100, 1'000, 10'000, 100'000, 1'000'000}; + + for (const auto &res : resolutions) { + BENCHMARK_ADVANCED(fmt::format(FMT_STRING("hg38 ({}bp)"), res)) + (Catch::Benchmark::Chronometer meter) { + const BinTable bin_table{hg38.begin(), hg38.end(), res}; + const auto coords = + generate_genomic_coords(bin_table, static_cast(meter.runs())); + + meter.measure([&bin_table, &coords](std::size_t i) { + return bin_table.at(coords[i].first, coords[i].second); + }); + }; + } +} + +TEST_CASE("BinTableFixed::at(chrom, pos)") { + const std::vector resolutions{10, 100, 1'000, 10'000, 100'000, 1'000'000}; + + for (const auto &res : resolutions) { + BENCHMARK_ADVANCED(fmt::format(FMT_STRING("hg38 ({}bp)"), res)) + (Catch::Benchmark::Chronometer meter) { + const BinTableFixed bin_table{hg38.begin(), hg38.end(), res}; + const auto coords = + generate_genomic_coords(bin_table, static_cast(meter.runs())); + + meter.measure([&bin_table, &coords](std::size_t i) { + return bin_table.at(coords[i].first, coords[i].second); + }); + }; + } +} + +TEST_CASE("BinTableVariable::at(chrom, pos)") { + const std::vector resolutions{5'000, 10'000, 100'000, 1'000'000}; + + for (const auto &res : resolutions) { + BENCHMARK_ADVANCED(fmt::format(FMT_STRING("hg38 ({}bp)"), res)) + (Catch::Benchmark::Chronometer meter) { + const auto bin_table = generate_variable_bin_table(res); + const auto coords = + generate_genomic_coords(bin_table, static_cast(meter.runs())); + + meter.measure([&bin_table, &coords](std::size_t i) { + return bin_table.at(coords[i].first, coords[i].second); + }); + }; + } +} +// NOLINTEND(*-avoid-magic-numbers) + +} // namespace hictk::benchmark diff --git a/benchmark/bin_table_coords_to_id/CMakeLists.txt b/benchmark/bin_table_coords_to_id/CMakeLists.txt deleted file mode 100644 index 0087cb87..00000000 --- a/benchmark/bin_table_coords_to_id/CMakeLists.txt +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright (C) 2024 Roberto Rossini -# -# SPDX-License-Identifier: MIT - -find_package(CLI11 REQUIRED) -find_package(Filesystem REQUIRED) - -add_executable(hictk_bin_table_coords_to_id_bench bin_table_coords_to_id.cpp) - -target_link_libraries( - hictk_bin_table_coords_to_id_bench - PRIVATE - hictk_project_warnings - hictk_project_options - PUBLIC - hictk::bin_table -) - -target_link_system_libraries( - hictk_bin_table_coords_to_id_bench - PUBLIC - CLI11::CLI11 - std::filesystem -) diff --git a/benchmark/bin_table_coords_to_id/bin_table_coords_to_id.cpp b/benchmark/bin_table_coords_to_id/bin_table_coords_to_id.cpp deleted file mode 100644 index f7925fd3..00000000 --- a/benchmark/bin_table_coords_to_id/bin_table_coords_to_id.cpp +++ /dev/null @@ -1,138 +0,0 @@ -// Copyright (C) 2024 Roberto Rossini -// -// SPDX-License-Identifier: MIT - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace hictk; - -// clang-format off -// NOLINTNEXTLINE(cert-err58-cpp) -static const std::vector hg38{ - Chromosome{0, "chr1", 248956422}, - Chromosome{1, "chr2", 242193529}, - Chromosome{2, "chr3", 198295559}, - Chromosome{3, "chr4", 190214555}, - Chromosome{4, "chr5", 181538259}, - Chromosome{5, "chr6", 170805979}, - Chromosome{6, "chr7", 159345973}, - Chromosome{7, "chr8", 145138636}, - Chromosome{8, "chr9", 138394717}, - Chromosome{9, "chr10", 133797422}, - Chromosome{10, "chr11", 135086622}, - Chromosome{11, "chr12", 133275309}, - Chromosome{12, "chr13", 114364328}, - Chromosome{13, "chr14", 107043718}, - Chromosome{14, "chr15", 101991189}, - Chromosome{15, "chr16", 90338345}, - Chromosome{16, "chr17", 83257441}, - Chromosome{17, "chr18", 80373285}, - Chromosome{18, "chr19", 58617616}, - Chromosome{19, "chr20", 64444167}, - Chromosome{20, "chr21", 46709983}, - Chromosome{21, "chr22", 50818468}, - Chromosome{22, "chrX", 156040895}, - Chromosome{23, "chrY", 57227415} -}; - -// clang-format on - -// NOLINTBEGIN(*-avoid-magic-numbers) -struct Config { - std::uint32_t resolution{1'000}; - std::size_t batch_size{10'000'000}; - std::size_t iterations{1}; - std::uint64_t seed{123456789}; -}; -// NOLINTEND(*-avoid-magic-numbers) - -[[nodiscard]] static std::vector init_bin_ids(const BinTable &bins, - std::size_t batch_size, - std::uint64_t seed) { - std::vector buff(batch_size); - std::mt19937_64 rand_eng(seed); - std::generate(buff.begin(), buff.end(), [&]() { - return std::uniform_int_distribution{0, bins.size() - 1}(rand_eng); - }); - - return buff; -} - -[[nodiscard]] static std::vector init_bins(const BinTable &bins, - const std::vector &bin_ids) { - std::vector buff(bin_ids.size()); - for (std::size_t i = 0; i < bin_ids.size(); ++i) { - buff[i] = bins.at(bin_ids[i]); - } - return buff; -} - -[[nodiscard]] static std::uint64_t run_benchmark(const BinTable &bins, - const std::vector &queries) { - const auto t0 = std::chrono::system_clock::now(); - for (const auto &b : queries) { - std::ignore = bins.at(b.chrom().name(), b.start()); - } - const auto t1 = std::chrono::system_clock::now(); - - return static_cast( - std::chrono::duration_cast(t1 - t0).count()); -} - -// NOLINTNEXTLINE(bugprone-exception-escape) -int main(int argc, char **argv) noexcept { - const auto *argv0 = argv[0]; // NOLINT(*-pointer-arithmetic) - - CLI::App cli{}; - Config config{}; - cli.add_option("--resolution", config.resolution, "Resolution of the bin table.") - ->capture_default_str(); - cli.add_option("--batch-size", config.batch_size, "Batch size.")->capture_default_str(); - cli.add_option("--iterations", config.iterations, "Number of iterations to perform.") - ->capture_default_str(); - cli.add_option("--seed", config.seed, "Seed")->capture_default_str(); - - try { - cli.parse(argc, argv); - - const BinTable bin_table{hg38.begin(), hg38.end(), config.resolution}; - const auto bin_ids = init_bin_ids(bin_table, config.batch_size, config.seed); - const auto bins = init_bins(bin_table, bin_ids); - - std::uint64_t elapsed_time = 0; - for (std::size_t i = 0; i < config.iterations; ++i) { - elapsed_time += run_benchmark(bin_table, bins); - } - - const auto elapsed_seconds = static_cast(elapsed_time) / 1.0e9; - const auto throughput = - static_cast(config.batch_size * config.iterations) / elapsed_seconds; - - fmt::print(FMT_STRING("hictk::BinTable::at(chrom, pos) throughput: {:.4} num/s\n"), throughput); - - } catch (const CLI::ParseError &e) { - assert(cli); - return cli.exit(e); - } catch (const std::exception &e) { - fmt::print(stderr, FMT_STRING("FAILURE! {} encountered the following error: {}.\n"), argv0, - e.what()); - return 1; - } catch (...) { - fmt::print(stderr, - FMT_STRING("FAILURE! {} encountered the following error: Caught an " - "unhandled exception!\n"), - argv0); - return 1; - } - return 0; -} diff --git a/benchmark/bin_table_id_to_coords/CMakeLists.txt b/benchmark/bin_table_id_to_coords/CMakeLists.txt deleted file mode 100644 index 001579bc..00000000 --- a/benchmark/bin_table_id_to_coords/CMakeLists.txt +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright (C) 2024 Roberto Rossini -# -# SPDX-License-Identifier: MIT - -find_package(CLI11 REQUIRED) -find_package(Filesystem REQUIRED) - -add_executable(hictk_bin_table_id_to_coords_bench bin_table_id_to_coords.cpp) - -target_link_libraries( - hictk_bin_table_id_to_coords_bench - PRIVATE - hictk_project_warnings - hictk_project_options - PUBLIC - hictk::bin_table -) - -target_link_system_libraries( - hictk_bin_table_id_to_coords_bench - PUBLIC - CLI11::CLI11 - std::filesystem -) diff --git a/benchmark/bin_table_id_to_coords/bin_table_id_to_coords.cpp b/benchmark/bin_table_id_to_coords/bin_table_id_to_coords.cpp deleted file mode 100644 index 6c194390..00000000 --- a/benchmark/bin_table_id_to_coords/bin_table_id_to_coords.cpp +++ /dev/null @@ -1,128 +0,0 @@ -// Copyright (C) 2024 Roberto Rossini -// -// SPDX-License-Identifier: MIT - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace hictk; - -// clang-format off -// NOLINTNEXTLINE(cert-err58-cpp) -static const std::vector hg38{ - Chromosome{0, "chr1", 248956422}, - Chromosome{1, "chr2", 242193529}, - Chromosome{2, "chr3", 198295559}, - Chromosome{3, "chr4", 190214555}, - Chromosome{4, "chr5", 181538259}, - Chromosome{5, "chr6", 170805979}, - Chromosome{6, "chr7", 159345973}, - Chromosome{7, "chr8", 145138636}, - Chromosome{8, "chr9", 138394717}, - Chromosome{9, "chr10", 133797422}, - Chromosome{10, "chr11", 135086622}, - Chromosome{11, "chr12", 133275309}, - Chromosome{12, "chr13", 114364328}, - Chromosome{13, "chr14", 107043718}, - Chromosome{14, "chr15", 101991189}, - Chromosome{15, "chr16", 90338345}, - Chromosome{16, "chr17", 83257441}, - Chromosome{17, "chr18", 80373285}, - Chromosome{18, "chr19", 58617616}, - Chromosome{19, "chr20", 64444167}, - Chromosome{20, "chr21", 46709983}, - Chromosome{21, "chr22", 50818468}, - Chromosome{22, "chrX", 156040895}, - Chromosome{23, "chrY", 57227415} -}; - -// clang-format on - -// NOLINTBEGIN(*-avoid-magic-numbers) -struct Config { - std::uint32_t resolution{1'000}; - std::size_t batch_size{10'000'000}; - std::size_t iterations{1}; - std::uint64_t seed{123456789}; -}; -// NOLINTEND(*-avoid-magic-numbers) - -[[nodiscard]] static std::vector init_bin_ids(const BinTable &bins, - std::size_t batch_size, - std::uint64_t seed) { - std::vector buff(batch_size); - std::mt19937_64 rand_eng(seed); - std::generate(buff.begin(), buff.end(), [&]() { - return std::uniform_int_distribution{0, bins.size() - 1}(rand_eng); - }); - - return buff; -} - -[[nodiscard]] static std::uint64_t run_benchmark(const BinTable &bins, - const std::vector &queries) { - const auto t0 = std::chrono::system_clock::now(); - for (const auto &q : queries) { - std::ignore = bins.at(q); - } - const auto t1 = std::chrono::system_clock::now(); - - return static_cast( - std::chrono::duration_cast(t1 - t0).count()); -} - -// NOLINTNEXTLINE(bugprone-exception-escape) -int main(int argc, char **argv) noexcept { - const auto *argv0 = argv[0]; // NOLINT(*-pointer-arithmetic) - - CLI::App cli{}; - Config config{}; - cli.add_option("--resolution", config.resolution, "Resolution of the bin table.") - ->capture_default_str(); - cli.add_option("--batch-size", config.batch_size, "Batch size.")->capture_default_str(); - cli.add_option("--iterations", config.iterations, "Number of iterations to perform.") - ->capture_default_str(); - cli.add_option("--seed", config.seed, "Seed")->capture_default_str(); - - try { - cli.parse(argc, argv); - - const BinTable bin_table{hg38.begin(), hg38.end(), config.resolution}; - const auto bin_ids = init_bin_ids(bin_table, config.batch_size, config.seed); - - std::uint64_t elapsed_time = 0; - for (std::size_t i = 0; i < config.iterations; ++i) { - elapsed_time += run_benchmark(bin_table, bin_ids); - } - - const auto elapsed_seconds = static_cast(elapsed_time) / 1.0e9; - const auto throughput = - static_cast(config.batch_size * config.iterations) / elapsed_seconds; - - fmt::print(FMT_STRING("hictk::BinTable::at(bin_id) throughput: {:.4} num/s\n"), throughput); - - } catch (const CLI::ParseError &e) { - assert(cli); - return cli.exit(e); - } catch (const std::exception &e) { - fmt::print(stderr, FMT_STRING("FAILURE! {} encountered the following error: {}.\n"), argv0, - e.what()); - return 1; - } catch (...) { - fmt::print(stderr, - FMT_STRING("FAILURE! {} encountered the following error: Caught an " - "unhandled exception!\n"), - argv0); - return 1; - } - return 0; -} diff --git a/benchmark/bins/CMakeLists.txt b/benchmark/bins/CMakeLists.txt new file mode 100644 index 00000000..a04f3155 --- /dev/null +++ b/benchmark/bins/CMakeLists.txt @@ -0,0 +1,7 @@ +# Copyright (C) 2024 Roberto Rossini +# +# SPDX-License-Identifier: MIT + +target_sources(hictk_benchmark PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/bins.cpp") + +target_link_libraries(hictk_benchmark PUBLIC hictk::bin_table) diff --git a/benchmark/bins/bins.cpp b/benchmark/bins/bins.cpp new file mode 100644 index 00000000..5c89b938 --- /dev/null +++ b/benchmark/bins/bins.cpp @@ -0,0 +1,101 @@ +// Copyright (C) 2024 Roberto Rossini +// +// SPDX-License-Identifier: MIT + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hictk/benchmark/hg38.hpp" +#include "hictk/bin.hpp" +#include "hictk/bin_table_fixed.hpp" +#include "hictk/chromosome.hpp" + +namespace hictk::benchmark { + +// NOLINTBEGIN(*-avoid-magic-numbers) +[[nodiscard]] static std::vector generate_bins(std::size_t size, bool erase_ids) { + std::random_device rd{}; + std::mt19937_64 rand_eng(rd()); + + const BinTableFixed bin_table({hg38.begin(), hg38.end()}, 1'000); + std::vector bins; + bins.reserve(size); + + while (bins.size() != size) { + std::sample(bin_table.begin(), bin_table.end(), std::back_inserter(bins), size - bins.size(), + rand_eng); + } + + if (erase_ids) { + std::transform(bins.begin(), bins.end(), bins.begin(), + [](const auto& bin) { return Bin{bin.chrom(), bin.start(), bin.end()}; }); + } + + return bins; +} + +TEST_CASE("Bin") { + BENCHMARK_ADVANCED("Construction") + (Catch::Benchmark::Chronometer meter) { + const auto num_runs = static_cast(meter.runs()); + std::vector> storage(num_runs); + + const Chromosome chrom{0, "chr1", 123'456'789}; + + meter.measure( + [&storage, &chrom](std::size_t i) { storage[i].construct(chrom, 10'000'000, 11'000'000); }); + }; + + BENCHMARK_ADVANCED("Destruction") + (Catch::Benchmark::Chronometer meter) { + const auto num_runs = static_cast(meter.runs()); + std::vector> storage(num_runs); + + const Chromosome chrom{0, "chr1", 123'456'789}; + + for (auto& bin : storage) { + bin.construct(chrom, 10'000'000, 11'000'000); + } + + meter.measure([&storage](std::size_t i) { storage[i].destruct(); }); + }; + + BENCHMARK_ADVANCED("sorting w/ id") + (Catch::Benchmark::Chronometer meter) { + const auto bins = generate_bins(1'000'000, false); + std::vector> data(static_cast(meter.runs()), bins); + meter.measure([&data](std::size_t i) { + std::size_t num_ops{}; + std::sort(data[i].begin(), data[i].end(), [&](const auto& bin1, const auto& bin2) { + ++num_ops; + return bin1 < bin2; + }); + return num_ops; + }); + }; + + BENCHMARK_ADVANCED("sorting wo/ id") + (Catch::Benchmark::Chronometer meter) { + const auto bins = generate_bins(1'000'000, true); + std::vector> data(static_cast(meter.runs()), bins); + meter.measure([&data](std::size_t i) { + std::size_t num_ops{}; + std::sort(data[i].begin(), data[i].end(), [&](const auto& bin1, const auto& bin2) { + ++num_ops; + return bin1 < bin2; + }); + return num_ops; + }); + }; +} +// NOLINTEND(*-avoid-magic-numbers) + +} // namespace hictk::benchmark diff --git a/benchmark/fetch_and_sum/CMakeLists.txt b/benchmark/fetch_and_sum/CMakeLists.txt deleted file mode 100644 index c82961f3..00000000 --- a/benchmark/fetch_and_sum/CMakeLists.txt +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (C) 2024 Roberto Rossini -# -# SPDX-License-Identifier: MIT - -find_package(CLI11 REQUIRED) -find_package(Filesystem REQUIRED) - -add_executable(hictk_fetch_and_sum_bench fetch_and_sum.cpp) - -target_link_libraries( - hictk_fetch_and_sum_bench - PRIVATE - hictk_project_warnings - hictk_project_options - PUBLIC - hictk::cooler - hictk::hic -) - -target_link_system_libraries( - hictk_fetch_and_sum_bench - PUBLIC - CLI11::CLI11 - std::filesystem -) diff --git a/benchmark/fetch_and_sum/fetch_and_sum.cpp b/benchmark/fetch_and_sum/fetch_and_sum.cpp deleted file mode 100644 index 5fa14344..00000000 --- a/benchmark/fetch_and_sum/fetch_and_sum.cpp +++ /dev/null @@ -1,137 +0,0 @@ -// Copyright (C) 2023 Roberto Rossini -// -// SPDX-License-Identifier: MIT - -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -struct Config { - std::string path{}; - std::string weights{"NONE"}; - - std::uint32_t resolution{}; -}; - -using namespace hictk; - -[[nodiscard]] static std::pair parse_bedpe(std::string_view line) { - auto parse_bed = [&]() { - assert(!line.empty()); - const auto pos1 = line.find('\t'); - const auto pos2 = line.find('\t', pos1 + 1); - const auto pos3 = line.find('\t', pos2 + 1); - - auto tok = std::string{line.substr(0, pos3)}; - tok[pos1] = ':'; - tok[pos2] = '-'; - line.remove_prefix(pos3 + 1); - return tok; - }; - - return std::make_pair(parse_bed(), parse_bed()); -} - -template -[[nodiscard]] static std::pair accumulate_interactions(PixelIt first_pixel, - PixelIt last_pixel) { - std::size_t nnz = 0; - const auto sum = std::accumulate(std::move(first_pixel), std::move(last_pixel), 0.0, - [&](const double accumulator, const auto &pixel) { - ++nnz; - return accumulator + double(pixel.count); - }); - return std::make_pair(nnz, sum); -} - -static void fetch_and_sum(const Config &c, cooler::File clr) { - auto weights = clr.normalization_ptr(c.weights); - - std::string line; - while (std::getline(std::cin, line)) { - const auto [range1, range2] = parse_bedpe(line); - const auto t0 = std::chrono::system_clock::now(); - auto sel = clr.fetch(range1, range2, weights); - const auto [nnz, sum] = accumulate_interactions(sel.begin(), sel.end()); - const auto t1 = std::chrono::system_clock::now(); - - const auto delta = std::chrono::duration_cast(t1 - t0).count(); - - // NOLINTNEXTLINE(*-avoid-magic-numbers) - fmt::print(FMT_STRING("{}\t{}\t{}\t{}\n"), line, nnz, sum, double(delta) / 1.0e9); - } -} - -static void fetch_and_sum(const Config &c, hic::File hf) { - hf.optimize_cache_size_for_random_access(); - const auto norm = balancing::Method(c.weights); - - std::string line; - while (std::getline(std::cin, line)) { - const auto [range1, range2] = parse_bedpe(line); - const auto t0 = std::chrono::system_clock::now(); - auto sel = hf.fetch(range1, range2, norm); - const auto [nnz, sum] = accumulate_interactions(sel.begin(false), sel.end()); - const auto t1 = std::chrono::system_clock::now(); - - const auto delta = std::chrono::duration_cast(t1 - t0).count(); - - // NOLINTNEXTLINE(*-avoid-magic-numbers) - fmt::print(FMT_STRING("{}\t{}\t{}\t{}\n"), line, nnz, sum, double(delta) / 1.0e9); - } -} - -static void fetch_and_sum(const Config &c) { - fmt::print(FMT_STRING("chrom1\tstart1\tend1\tchrom2\tstart2\tend2\tnnz\tsum\ttime\n")); - if (hic::utils::is_hic_file(c.path)) { - fetch_and_sum(c, hic::File(c.path, c.resolution)); - } else { - fetch_and_sum(c, cooler::File(c.path)); - } -} - -// NOLINTNEXTLINE(bugprone-exception-escape) -int main(int argc, char **argv) noexcept { - const auto *argv0 = argv[0]; // NOLINT(*-pointer-arithmetic) - - CLI::App cli{}; - Config config{}; - cli.add_option("file", config.path, "Path to a .cool or .hic file (Cooler URI syntax supported).") - ->required(); - - cli.add_option("--weights", config.weights, - "Name of the balancing weights to apply to interactions."); - - cli.add_option("--resolution", config.resolution, - "Matrix resolution. Ignored when input file is in Cooler format."); - try { - cli.parse(argc, argv); - - std::ios::sync_with_stdio(false); - if (!config.path.empty()) { - fetch_and_sum(config); - } - - } catch (const CLI::ParseError &e) { - assert(cli); - return cli.exit(e); - } catch (const std::exception &e) { - fmt::print(stderr, FMT_STRING("FAILURE! {} encountered the following error: {}.\n"), argv0, - e.what()); - return 1; - } catch (...) { - fmt::print(stderr, - FMT_STRING("FAILURE! {} encountered the following error: Caught an " - "unhandled exception!\n"), - argv0); - return 1; - } - return 0; -} diff --git a/benchmark/formatting/CMakeLists.txt b/benchmark/formatting/CMakeLists.txt new file mode 100644 index 00000000..c2c199c3 --- /dev/null +++ b/benchmark/formatting/CMakeLists.txt @@ -0,0 +1,15 @@ +# Copyright (C) 2024 Roberto Rossini +# +# SPDX-License-Identifier: MIT + +target_sources(hictk_benchmark PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/formatting.cpp") + +target_link_libraries( + hictk_benchmark + PUBLIC + hictk::bin_table + hictk::chromosome + hictk::format + hictk::pixel + hictk::reference +) diff --git a/benchmark/formatting/common.hpp b/benchmark/formatting/common.hpp new file mode 100644 index 00000000..065a74ce --- /dev/null +++ b/benchmark/formatting/common.hpp @@ -0,0 +1,250 @@ +// Copyright (C) 2024 Roberto Rossini +// +// SPDX-License-Identifier: MIT + +#pragma once + +#include +#include +#include + +#include "hictk/chromosome.hpp" +#include "hictk/pixel.hpp" + +// clang-format off +// NOLINTNEXTLINE(cert-err58-cpp) +inline const std::vector hg38{ + hictk::Chromosome{0, "chr1", 248956422}, + hictk::Chromosome{1, "chr2", 242193529}, + hictk::Chromosome{2, "chr3", 198295559}, + hictk::Chromosome{3, "chr4", 190214555}, + hictk::Chromosome{4, "chr5", 181538259}, + hictk::Chromosome{5, "chr6", 170805979}, + hictk::Chromosome{6, "chr7", 159345973}, + hictk::Chromosome{7, "chr8", 145138636}, + hictk::Chromosome{8, "chr9", 138394717}, + hictk::Chromosome{9, "chr10", 133797422}, + hictk::Chromosome{10, "chr11", 135086622}, + hictk::Chromosome{11, "chr12", 133275309}, + hictk::Chromosome{12, "chr13", 114364328}, + hictk::Chromosome{13, "chr14", 107043718}, + hictk::Chromosome{14, "chr15", 101991189}, + hictk::Chromosome{15, "chr16", 90338345}, + hictk::Chromosome{16, "chr17", 83257441}, + hictk::Chromosome{17, "chr18", 80373285}, + hictk::Chromosome{18, "chr19", 58617616}, + hictk::Chromosome{19, "chr20", 64444167}, + hictk::Chromosome{20, "chr21", 46709983}, + hictk::Chromosome{21, "chr22", 50818468}, + hictk::Chromosome{22, "chrX", 156040895}, + hictk::Chromosome{23, "chrY", 57227415} +}; + +// NOLINTNEXTLINE(cert-err58-cpp) +static const std::vector> pixels_int{ + hictk::ThinPixel{911, 1197, 506}, + hictk::ThinPixel{41, 764, 830}, + hictk::ThinPixel{606, 671, 642}, + hictk::ThinPixel{420, 817, 1015}, + hictk::ThinPixel{349, 370, 701}, + hictk::ThinPixel{700, 749, 142}, + hictk::ThinPixel{301, 689, 634}, + hictk::ThinPixel{525, 747, 708}, + hictk::ThinPixel{278, 599, 771}, + hictk::ThinPixel{93, 852, 662}, + hictk::ThinPixel{178, 990, 765}, + hictk::ThinPixel{977, 1179, 811}, + hictk::ThinPixel{88, 1082, 41}, + hictk::ThinPixel{272, 600, 1224}, + hictk::ThinPixel{731, 1154, 685}, + hictk::ThinPixel{898, 983, 3}, + hictk::ThinPixel{227, 858, 630}, + hictk::ThinPixel{408, 793, 704}, + hictk::ThinPixel{362, 1198, 377}, + hictk::ThinPixel{527, 600, 862}, + hictk::ThinPixel{20, 949, 867}, + hictk::ThinPixel{499, 951, 777}, + hictk::ThinPixel{855, 1126, 225}, + hictk::ThinPixel{532, 1131, 725}, + hictk::ThinPixel{567, 783, 21}, + hictk::ThinPixel{58, 644, 169}, + hictk::ThinPixel{82, 1002, 805}, + hictk::ThinPixel{686, 817, 759}, + hictk::ThinPixel{111, 1023, 1079}, + hictk::ThinPixel{6, 867, 578}, + hictk::ThinPixel{430, 496, 368}, + hictk::ThinPixel{90, 1227, 8}, + hictk::ThinPixel{407, 460, 804}, + hictk::ThinPixel{549, 799, 672}, + hictk::ThinPixel{718, 1154, 540}, + hictk::ThinPixel{635, 653, 3830}, + hictk::ThinPixel{231, 936, 424}, + hictk::ThinPixel{623, 644, 3099}, + hictk::ThinPixel{682, 1037, 405}, + hictk::ThinPixel{1005, 1166, 563}, + hictk::ThinPixel{170, 1030, 842}, + hictk::ThinPixel{5, 802, 496}, + hictk::ThinPixel{649, 1116, 748}, + hictk::ThinPixel{335, 1231, 11}, + hictk::ThinPixel{205, 1101, 815}, + hictk::ThinPixel{42, 479, 855}, + hictk::ThinPixel{769, 815, 1169}, + hictk::ThinPixel{258, 1199, 586}, + hictk::ThinPixel{104, 1008, 631}, + hictk::ThinPixel{302, 1138, 402}, + hictk::ThinPixel{338, 719, 1125}, + hictk::ThinPixel{32, 228, 889}, + hictk::ThinPixel{233, 309, 213}, + hictk::ThinPixel{818, 1116, 588}, + hictk::ThinPixel{132, 238, 820}, + hictk::ThinPixel{213, 1086, 435}, + hictk::ThinPixel{632, 1035, 932}, + hictk::ThinPixel{434, 1158, 428}, + hictk::ThinPixel{48, 874, 181}, + hictk::ThinPixel{48, 213, 179}, + hictk::ThinPixel{442, 871, 620}, + hictk::ThinPixel{1013, 1200, 428}, + hictk::ThinPixel{71, 651, 514}, + hictk::ThinPixel{923, 955, 685}, + hictk::ThinPixel{689, 861, 647}, + hictk::ThinPixel{288, 1218, 488}, + hictk::ThinPixel{183, 412, 1034}, + hictk::ThinPixel{588, 601, 11539}, + hictk::ThinPixel{387, 912, 891}, + hictk::ThinPixel{90, 974, 513}, + hictk::ThinPixel{371, 835, 569}, + hictk::ThinPixel{140, 577, 376}, + hictk::ThinPixel{302, 537, 813}, + hictk::ThinPixel{262, 390, 792}, + hictk::ThinPixel{340, 395, 954}, + hictk::ThinPixel{1037, 1064, 514}, + hictk::ThinPixel{239, 897, 921}, + hictk::ThinPixel{95, 1152, 459}, + hictk::ThinPixel{493, 493, 1018025}, + hictk::ThinPixel{669, 961, 739}, + hictk::ThinPixel{807, 969, 566}, + hictk::ThinPixel{742, 989, 835}, + hictk::ThinPixel{744, 990, 575}, + hictk::ThinPixel{23, 280, 498}, + hictk::ThinPixel{628, 967, 547}, + hictk::ThinPixel{1033, 1221, 767}, + hictk::ThinPixel{221, 696, 838}, + hictk::ThinPixel{134, 381, 629}, + hictk::ThinPixel{213, 904, 886}, + hictk::ThinPixel{377, 1204, 673}, + hictk::ThinPixel{498, 729, 574}, + hictk::ThinPixel{688, 872, 745}, + hictk::ThinPixel{153, 1114, 618}, + hictk::ThinPixel{316, 1059, 756}, + hictk::ThinPixel{821, 1113, 885}, + hictk::ThinPixel{74, 974, 453}, + hictk::ThinPixel{401, 763, 1053}, + hictk::ThinPixel{563, 1080, 260}, + hictk::ThinPixel{215, 776, 1411}, + hictk::ThinPixel{294, 351, 1402} +}; + +// NOLINTNEXTLINE(cert-err58-cpp) +static const std::vector> pixels_fp{ + hictk::ThinPixel{227, 975, 0.001774408751560567}, + hictk::ThinPixel{805, 919, 0.002398122290017494}, + hictk::ThinPixel{135, 914, 0.001886123711557552}, + hictk::ThinPixel{699, 1070, 0.00175961438017762}, + hictk::ThinPixel{309, 1199, 0.0008797559431597252}, + hictk::ThinPixel{735, 1081, 0.001899077974894686}, + hictk::ThinPixel{300, 1121, std::numeric_limits::quiet_NaN()}, + hictk::ThinPixel{78, 977, 0.001853849134012182}, + hictk::ThinPixel{33, 1085, 0.001798047882164512}, + hictk::ThinPixel{241, 1221, 0.001104969330974624}, + hictk::ThinPixel{414, 856, 0.00254256426412958}, + hictk::ThinPixel{466, 1106, 0.001188964144166828}, + hictk::ThinPixel{811, 1044, 0.002513614742710731}, + hictk::ThinPixel{901, 1220, 0.001296543334058325}, + hictk::ThinPixel{85, 634, 0.001702426741725685}, + hictk::ThinPixel{915, 1171, 0.001740827881847112}, + hictk::ThinPixel{274, 1187, 0.001241982824879383}, + hictk::ThinPixel{632, 865, 0.001958900317418727}, + hictk::ThinPixel{849, 1071, std::numeric_limits::quiet_NaN()}, + hictk::ThinPixel{345, 646, 0.001400935230723411}, + hictk::ThinPixel{72, 656, 0.002699732513395894}, + hictk::ThinPixel{150, 449, 0.00117523713695984}, + hictk::ThinPixel{7, 487, 0.001855693209805665}, + hictk::ThinPixel{129, 473, 0.001392220321566891}, + hictk::ThinPixel{517, 738, 0.001797602467344849}, + hictk::ThinPixel{198, 1090, 0.002340057686960196}, + hictk::ThinPixel{228, 606, 0.001347433430756599}, + hictk::ThinPixel{465, 1083, 0.001237653385960538}, + hictk::ThinPixel{290, 485, 0.001352846374287686}, + hictk::ThinPixel{489, 997, 0.001983854112812797}, + hictk::ThinPixel{194, 479, 0.001984706583416832}, + hictk::ThinPixel{618, 647, std::numeric_limits::quiet_NaN()}, + hictk::ThinPixel{762, 945, 0.001749388416938099}, + hictk::ThinPixel{308, 344, 0.00600788189583366}, + hictk::ThinPixel{782, 856, 0.002247969112741988}, + hictk::ThinPixel{472, 706, 0.001180281843016874}, + hictk::ThinPixel{121, 850, 0.001794105904354432}, + hictk::ThinPixel{476, 715, 0.001515535265227589}, + hictk::ThinPixel{88, 627, 0.001829689545703822}, + hictk::ThinPixel{511, 805, 0.002910960247250109}, + hictk::ThinPixel{174, 1067, 0.001294516626159685}, + hictk::ThinPixel{266, 1153, 0.001380861989442363}, + hictk::ThinPixel{420, 495, std::numeric_limits::quiet_NaN()}, + hictk::ThinPixel{790, 1018, 0.002829402613178424}, + hictk::ThinPixel{183, 1078, 0.00191550180489699}, + hictk::ThinPixel{276, 591, std::numeric_limits::quiet_NaN()}, + hictk::ThinPixel{509, 1210, 0.001347142201149551}, + hictk::ThinPixel{255, 1047, 0.001118667120892648}, + hictk::ThinPixel{480, 1159, 0.001833822203527065}, + hictk::ThinPixel{823, 897, 0.002237175147248529}, + hictk::ThinPixel{635, 777, 0.001788973652601114}, + hictk::ThinPixel{238, 371, 0.00138312234907274}, + hictk::ThinPixel{107, 980, 0.001991843260038653}, + hictk::ThinPixel{291, 1127, 0.002990049927034433}, + hictk::ThinPixel{589, 916, 0.001616584215434608}, + hictk::ThinPixel{20, 137, std::numeric_limits::quiet_NaN()}, + hictk::ThinPixel{718, 1203, 0.001289321103955408}, + hictk::ThinPixel{346, 1050, 0.00139600421161332}, + hictk::ThinPixel{334, 472, 0.001266419998353805}, + hictk::ThinPixel{327, 1117, 0.001669262103727093}, + hictk::ThinPixel{272, 915, 0.002213362856280068}, + hictk::ThinPixel{113, 1215, 0.001176913662435687}, + hictk::ThinPixel{425, 831, std::numeric_limits::quiet_NaN()}, + hictk::ThinPixel{622, 911, 0.00255637082462076}, + hictk::ThinPixel{173, 518, 0.0008180554051936757}, + hictk::ThinPixel{240, 544, 0.00140345723413591}, + hictk::ThinPixel{338, 657, 0.002702508830111692}, + hictk::ThinPixel{195, 561, 0.001633281495077414}, + hictk::ThinPixel{120, 693, 0.001166730697443794}, + hictk::ThinPixel{65, 623, 0.002105478371996377}, + hictk::ThinPixel{711, 1090, 0.004891665420874022}, + hictk::ThinPixel{3, 793, 0.002560498234710207}, + hictk::ThinPixel{504, 509, 0.03492763957024018}, + hictk::ThinPixel{1112, 1190, 0.001770562476910959}, + hictk::ThinPixel{596, 800, 0.0014736300231506}, + hictk::ThinPixel{988, 1115, 0.003100766122181472}, + hictk::ThinPixel{121, 257, 0.001450358502569562}, + hictk::ThinPixel{255, 912, 0.001946766330811857}, + hictk::ThinPixel{273, 1176, 0.001471314139261714}, + hictk::ThinPixel{82, 719, 0.002119365550486372}, + hictk::ThinPixel{74, 320, 0.001457332702302532}, + hictk::ThinPixel{85, 295, 0.001053081452103882}, + hictk::ThinPixel{859, 898, 0.002087164254033803}, + hictk::ThinPixel{768, 986, 0.0008979099325866837}, + hictk::ThinPixel{229, 1167, 0.002129395802735831}, + hictk::ThinPixel{135, 1177, 0.001264262931260046}, + hictk::ThinPixel{271, 719, 0.002250939077269411}, + hictk::ThinPixel{387, 1135, 0.002874491796377588}, + hictk::ThinPixel{331, 609, 0.001203402214890043}, + hictk::ThinPixel{386, 789, 0.002758325373934198}, + hictk::ThinPixel{760, 959, 0.002203914386091114}, + hictk::ThinPixel{80, 187, 0.003136003587115225}, + hictk::ThinPixel{765, 1091, 0.001969886053011074}, + hictk::ThinPixel{216, 1006, std::numeric_limits::quiet_NaN()}, + hictk::ThinPixel{835, 1029, 0.002725713793425285}, + hictk::ThinPixel{614, 765, 0.001925260038563195}, + hictk::ThinPixel{301, 663, 0.001745673555095985}, + hictk::ThinPixel{326, 583, 0.001331414866632903}, + hictk::ThinPixel{492, 1208, 0.001068728264867597}, + hictk::ThinPixel{137, 493, std::numeric_limits::quiet_NaN()} +}; +// clang-format on diff --git a/benchmark/formatting/formatting.cpp b/benchmark/formatting/formatting.cpp new file mode 100644 index 00000000..ab9a7824 --- /dev/null +++ b/benchmark/formatting/formatting.cpp @@ -0,0 +1,490 @@ +// Copyright (C) 2024 Roberto Rossini +// +// SPDX-License-Identifier: MIT + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "./common.hpp" +#include "hictk/bin.hpp" +#include "hictk/bin_table_fixed.hpp" +#include "hictk/chromosome.hpp" +#include "hictk/fmt.hpp" +#include "hictk/genomic_interval.hpp" +#include "hictk/pixel.hpp" + +using namespace hictk; + +[[nodiscard]] static auto open_dev_null() { + struct file_deleter { + void operator()(std::FILE* fp) { + std::fclose(fp); // NOLINT + } + }; + +#ifdef _MSC_VER + // NOLINTNEXTLINE(*-owning-memory) + return std::unique_ptr{std::fopen("nul", "w")}; +#else + // NOLINTNEXTLINE(*-owning-memory) + return std::unique_ptr{std::fopen("/dev/null", "w")}; +#endif +} + +template +[[nodiscard]] static std::vector to_chromosomes( + const BinTableFixed& bin_table, const std::vector>& pixels) { + std::vector chroms{}; + chroms.reserve(pixels.size() * 2); + + for (const auto& p : pixels) { + chroms.emplace_back(bin_table.at(p.bin1_id).chrom()); + chroms.emplace_back(bin_table.at(p.bin2_id).chrom()); + } + + return chroms; +} + +template +[[nodiscard]] static std::vector to_bins(const BinTableFixed& bin_table, + const std::vector>& pixels) { + std::vector bins{}; + bins.reserve(pixels.size() * 2); + + for (const auto& p : pixels) { + bins.emplace_back(bin_table.at(p.bin1_id)); + bins.emplace_back(bin_table.at(p.bin2_id)); + } + + return bins; +} + +template +[[nodiscard]] static std::vector to_genomic_intervals( + const BinTableFixed& bin_table, const std::vector>& pixels) { + std::vector gis{}; + gis.reserve(pixels.size() * 2); + + for (const auto& p : pixels) { + gis.emplace_back(bin_table.at(p.bin1_id).interval()); + gis.emplace_back(bin_table.at(p.bin2_id).interval()); + } + + return gis; +} + +template +[[nodiscard]] static std::vector> to_pixels(const BinTableFixed& bin_table, + const std::vector>& thin_pixels) { + std::vector> pixels(thin_pixels.size()); + + std::transform(thin_pixels.begin(), thin_pixels.end(), pixels.begin(), + [&](const ThinPixel& tp) { + return Pixel{bin_table.at(tp.bin1_id), bin_table.at(tp.bin2_id), tp.count}; + }); + + return pixels; +} + +template +[[nodiscard]] static std::vector random_sample_with_replacement(const std::vector& src, + std::size_t size) { + std::random_device rd{}; + std::mt19937_64 rand_eng(rd()); + + std::vector dest{}; + while (dest.size() < size) { + dest.insert(dest.end(), src.begin(), src.end()); + } + + dest.resize(size); + std::shuffle(dest.begin(), dest.end(), rand_eng); + + return dest; +} + +// NOLINTBEGIN(*-avoid-magic-numbers, readability-function-cognitive-complexity) +TEST_CASE("Formatting Chromosome") { + BENCHMARK_ADVANCED("wo/ compilation (TSV)") + (Catch::Benchmark::Chronometer meter) { + const BinTableFixed bin_table{{hg38.begin(), hg38.end()}, 2'500'000}; + + const auto chroms = + random_sample_with_replacement(to_chromosomes(bin_table, pixels_int), 100'000); + auto fp = open_dev_null(); + + meter.measure([&chroms, &fp]() { + for (const auto& c : chroms) { + fmt::print(fp.get(), FMT_STRING("{:tsv}"), c); + } + }); + }; + + BENCHMARK_ADVANCED("wo/ compilation (UCSC)") + (Catch::Benchmark::Chronometer meter) { + const BinTableFixed bin_table{{hg38.begin(), hg38.end()}, 2'500'000}; + + const auto chroms = + random_sample_with_replacement(to_chromosomes(bin_table, pixels_int), 100'000); + auto fp = open_dev_null(); + + meter.measure([&chroms, &fp]() { + for (const auto& c : chroms) { + fmt::print(fp.get(), FMT_STRING("{:ucsc}"), c); + } + }); + }; + + BENCHMARK_ADVANCED("w/ compilation (TSV)") + (Catch::Benchmark::Chronometer meter) { + const BinTableFixed bin_table{{hg38.begin(), hg38.end()}, 2'500'000}; + + const auto chroms = + random_sample_with_replacement(to_chromosomes(bin_table, pixels_int), 100'000); + auto fp = open_dev_null(); + + meter.measure([&chroms, &fp]() { + for (const auto& c : chroms) { + fmt::print(fp.get(), FMT_COMPILE("{:tsv}"), c); + } + }); + }; + + BENCHMARK_ADVANCED("w/ compilation (UCSC)") + (Catch::Benchmark::Chronometer meter) { + const BinTableFixed bin_table{{hg38.begin(), hg38.end()}, 2'500'000}; + + const auto chroms = + random_sample_with_replacement(to_chromosomes(bin_table, pixels_int), 100'000); + auto fp = open_dev_null(); + + meter.measure([&chroms, &fp]() { + for (const auto& c : chroms) { + fmt::print(fp.get(), FMT_COMPILE("{:ucsc}"), c); + } + }); + }; +} + +TEST_CASE("Formatting GenomicInterval") { + BENCHMARK_ADVANCED("wo/ compilation (BED)") + (Catch::Benchmark::Chronometer meter) { + const BinTableFixed bin_table{{hg38.begin(), hg38.end()}, 2'500'000}; + + const auto intervals = + random_sample_with_replacement(to_genomic_intervals(bin_table, pixels_int), 100'000); + auto fp = open_dev_null(); + + meter.measure([&intervals, &fp]() { + for (const auto& gi : intervals) { + fmt::print(fp.get(), FMT_STRING("{:bed}"), gi); + } + }); + }; + + BENCHMARK_ADVANCED("wo/ compilation (UCSC)") + (Catch::Benchmark::Chronometer meter) { + const BinTableFixed bin_table{{hg38.begin(), hg38.end()}, 2'500'000}; + + const auto intervals = + random_sample_with_replacement(to_genomic_intervals(bin_table, pixels_int), 100'000); + auto fp = open_dev_null(); + + meter.measure([&intervals, &fp]() { + for (const auto& gi : intervals) { + fmt::print(fp.get(), FMT_STRING("{:ucsc}"), gi); + } + }); + }; + + BENCHMARK_ADVANCED("w/ compilation (BED)") + (Catch::Benchmark::Chronometer meter) { + const BinTableFixed bin_table{{hg38.begin(), hg38.end()}, 2'500'000}; + + const auto intervals = + random_sample_with_replacement(to_genomic_intervals(bin_table, pixels_int), 100'000); + auto fp = open_dev_null(); + + meter.measure([&intervals, &fp]() { + for (const auto& gi : intervals) { + fmt::print(fp.get(), FMT_COMPILE("{:bed}"), gi); + } + }); + }; + + BENCHMARK_ADVANCED("w/ compilation (UCSC)") + (Catch::Benchmark::Chronometer meter) { + const BinTableFixed bin_table{{hg38.begin(), hg38.end()}, 2'500'000}; + + const auto intervals = + random_sample_with_replacement(to_genomic_intervals(bin_table, pixels_int), 100'000); + auto fp = open_dev_null(); + + meter.measure([&intervals, &fp]() { + for (const auto& gi : intervals) { + fmt::print(fp.get(), FMT_COMPILE("{:ucsc}"), gi); + } + }); + }; +} + +TEST_CASE("Formatting Bin") { + BENCHMARK_ADVANCED("wo/ compilation (raw)") + (Catch::Benchmark::Chronometer meter) { + const BinTableFixed bin_table{{hg38.begin(), hg38.end()}, 2'500'000}; + + const auto bins = random_sample_with_replacement(to_bins(bin_table, pixels_int), 100'000); + auto fp = open_dev_null(); + + meter.measure([&bins, &fp]() { + for (const auto& bin : bins) { + fmt::print(fp.get(), FMT_STRING("{:raw}"), bin); + } + }); + }; + + BENCHMARK_ADVANCED("wo/ compilation (BED)") + (Catch::Benchmark::Chronometer meter) { + const BinTableFixed bin_table{{hg38.begin(), hg38.end()}, 2'500'000}; + + const auto bins = random_sample_with_replacement(to_bins(bin_table, pixels_int), 100'000); + auto fp = open_dev_null(); + + meter.measure([&bins, &fp]() { + for (const auto& bin : bins) { + fmt::print(fp.get(), FMT_STRING("{:bed}"), bin); + } + }); + }; + + BENCHMARK_ADVANCED("wo/ compilation (UCSC)") + (Catch::Benchmark::Chronometer meter) { + const BinTableFixed bin_table{{hg38.begin(), hg38.end()}, 2'500'000}; + + const auto bins = random_sample_with_replacement(to_bins(bin_table, pixels_int), 100'000); + auto fp = open_dev_null(); + + meter.measure([&bins, &fp]() { + for (const auto& bin : bins) { + fmt::print(fp.get(), FMT_STRING("{:ucsc}"), bin); + } + }); + }; + + BENCHMARK_ADVANCED("w/ compilation (raw)") + (Catch::Benchmark::Chronometer meter) { + const BinTableFixed bin_table{{hg38.begin(), hg38.end()}, 2'500'000}; + + const auto bins = random_sample_with_replacement(to_bins(bin_table, pixels_int), 100'000); + auto fp = open_dev_null(); + + meter.measure([&bins, &fp]() { + for (const auto& bin : bins) { + fmt::print(fp.get(), FMT_COMPILE("{:raw}"), bin); + } + }); + }; + + BENCHMARK_ADVANCED("w/ compilation (BED)") + (Catch::Benchmark::Chronometer meter) { + const BinTableFixed bin_table{{hg38.begin(), hg38.end()}, 2'500'000}; + + const auto bins = random_sample_with_replacement(to_bins(bin_table, pixels_int), 100'000); + auto fp = open_dev_null(); + + meter.measure([&bins, &fp]() { + for (const auto& bin : bins) { + fmt::print(fp.get(), FMT_COMPILE("{:bed}"), bin); + } + }); + }; + + BENCHMARK_ADVANCED("w/ compilation (UCSC)") + (Catch::Benchmark::Chronometer meter) { + const BinTableFixed bin_table{{hg38.begin(), hg38.end()}, 2'500'000}; + + const auto bins = random_sample_with_replacement(to_bins(bin_table, pixels_int), 100'000); + auto fp = open_dev_null(); + + meter.measure([&bins, &fp]() { + for (const auto& bin : bins) { + fmt::print(fp.get(), FMT_COMPILE("{:ucsc}"), bin); + } + }); + }; +} + +TEST_CASE("Formatting ThinPixel") { + BENCHMARK_ADVANCED("wo/ compilation (int)") + (Catch::Benchmark::Chronometer meter) { + const auto pixels = random_sample_with_replacement(pixels_int, 100'000); + auto fp = open_dev_null(); + + meter.measure([&pixels, &fp]() { + for (const auto& tp : pixels) { + fmt::print(fp.get(), FMT_STRING("{}"), tp); + } + }); + }; + + BENCHMARK_ADVANCED("wo/ compilation (double)") + (Catch::Benchmark::Chronometer meter) { + const auto pixels = random_sample_with_replacement(pixels_fp, 100'000); + auto fp = open_dev_null(); + + meter.measure([&pixels, &fp]() { + for (const auto& tp : pixels) { + fmt::print(fp.get(), FMT_STRING("{}"), tp); + } + }); + }; + + BENCHMARK_ADVANCED("w/ compilation (int)") + (Catch::Benchmark::Chronometer meter) { + const auto pixels = random_sample_with_replacement(pixels_int, 100'000); + auto fp = open_dev_null(); + + meter.measure([&pixels, &fp]() { + for (const auto& tp : pixels) { + fmt::print(fp.get(), FMT_COMPILE("{}"), tp); + } + }); + }; + + BENCHMARK_ADVANCED("w/ compilation (double)") + (Catch::Benchmark::Chronometer meter) { + const auto pixels = random_sample_with_replacement(pixels_fp, 100'000); + auto fp = open_dev_null(); + + meter.measure([&pixels, &fp]() { + for (const auto& tp : pixels) { + fmt::print(fp.get(), FMT_COMPILE("{}"), tp); + } + }); + }; +} + +TEST_CASE("Formatting Pixel") { + BENCHMARK_ADVANCED("wo/ compilation (int; BG2)") + (Catch::Benchmark::Chronometer meter) { + const BinTableFixed bin_table{{hg38.begin(), hg38.end()}, 2'500'000}; + + const auto pixels = random_sample_with_replacement(to_pixels(bin_table, pixels_int), 100'000); + auto fp = open_dev_null(); + + meter.measure([&pixels, &fp]() { + for (const auto& pxl : pixels) { + fmt::print(fp.get(), FMT_STRING("{:bg2}"), pxl); + } + }); + }; + + BENCHMARK_ADVANCED("wo/ compilation (double; BG2)") + (Catch::Benchmark::Chronometer meter) { + const BinTableFixed bin_table{{hg38.begin(), hg38.end()}, 2'500'000}; + + const auto pixels = random_sample_with_replacement(to_pixels(bin_table, pixels_fp), 100'000); + auto fp = open_dev_null(); + + meter.measure([&pixels, &fp]() { + for (const auto& pxl : pixels) { + fmt::print(fp.get(), FMT_STRING("{:bg2}"), pxl); + } + }); + }; + + BENCHMARK_ADVANCED("w/ compilation (int; BG2)") + (Catch::Benchmark::Chronometer meter) { + const BinTableFixed bin_table{{hg38.begin(), hg38.end()}, 2'500'000}; + + const auto pixels = random_sample_with_replacement(to_pixels(bin_table, pixels_int), 100'000); + auto fp = open_dev_null(); + + meter.measure([&pixels, &fp]() { + for (const auto& pxl : pixels) { + fmt::print(fp.get(), FMT_COMPILE("{:bg2}"), pxl); + } + }); + }; + + BENCHMARK_ADVANCED("w/ compilation (double; BG2)") + (Catch::Benchmark::Chronometer meter) { + const BinTableFixed bin_table{{hg38.begin(), hg38.end()}, 2'500'000}; + + const auto pixels = random_sample_with_replacement(to_pixels(bin_table, pixels_fp), 100'000); + auto fp = open_dev_null(); + + meter.measure([&pixels, &fp]() { + for (const auto& pxl : pixels) { + fmt::print(fp.get(), FMT_COMPILE("{:bg2}"), pxl); + } + }); + }; + + BENCHMARK_ADVANCED("wo/ compilation (int; raw)") + (Catch::Benchmark::Chronometer meter) { + const BinTableFixed bin_table{{hg38.begin(), hg38.end()}, 2'500'000}; + + const auto pixels = random_sample_with_replacement(to_pixels(bin_table, pixels_int), 100'000); + auto fp = open_dev_null(); + + meter.measure([&pixels, &fp]() { + for (const auto& pxl : pixels) { + fmt::print(fp.get(), FMT_STRING("{:raw}"), pxl); + } + }); + }; + + BENCHMARK_ADVANCED("wo/ compilation (double; raw)") + (Catch::Benchmark::Chronometer meter) { + const BinTableFixed bin_table{{hg38.begin(), hg38.end()}, 2'500'000}; + + const auto pixels = random_sample_with_replacement(to_pixels(bin_table, pixels_fp), 100'000); + auto fp = open_dev_null(); + + meter.measure([&pixels, &fp]() { + for (const auto& pxl : pixels) { + fmt::print(fp.get(), FMT_STRING("{:raw}"), pxl); + } + }); + }; + + BENCHMARK_ADVANCED("w/ compilation (int; raw)") + (Catch::Benchmark::Chronometer meter) { + const BinTableFixed bin_table{{hg38.begin(), hg38.end()}, 2'500'000}; + + const auto pixels = random_sample_with_replacement(to_pixels(bin_table, pixels_int), 100'000); + auto fp = open_dev_null(); + + meter.measure([&pixels, &fp]() { + for (const auto& pxl : pixels) { + fmt::print(fp.get(), FMT_COMPILE("{:raw}"), pxl); + } + }); + }; + + BENCHMARK_ADVANCED("w/ compilation (double; raw)") + (Catch::Benchmark::Chronometer meter) { + const BinTableFixed bin_table{{hg38.begin(), hg38.end()}, 2'500'000}; + + const auto pixels = random_sample_with_replacement(to_pixels(bin_table, pixels_fp), 100'000); + auto fp = open_dev_null(); + + meter.measure([&pixels, &fp]() { + for (const auto& pxl : pixels) { + fmt::print(fp.get(), FMT_COMPILE("{:raw}"), pxl); + } + }); + }; +} + +// NOLINTEND(*-avoid-magic-numbers, readability-function-cognitive-complexity) diff --git a/benchmark/include/hictk/benchmark/benchmark_installers.hpp b/benchmark/include/hictk/benchmark/benchmark_installers.hpp new file mode 100644 index 00000000..37687e4a --- /dev/null +++ b/benchmark/include/hictk/benchmark/benchmark_installers.hpp @@ -0,0 +1,21 @@ +// Copyright (C) 2024 Roberto Rossini +// +// SPDX-License-Identifier: MIT + +#pragma once + +namespace hictk::benchmark { + +void register_cooler_cis_queries_benchmarks(); +void register_cooler_gw_queries_benchmarks(); +void register_cooler_trans_queries_benchmarks(); + +void register_file_cis_queries_benchmarks(); +void register_file_gw_queries_benchmarks(); +void register_file_trans_queries_benchmarks(); + +void register_hic_cis_queries_benchmarks(); +void register_hic_gw_queries_benchmarks(); +void register_hic_trans_queries_benchmarks(); + +} // namespace hictk::benchmark diff --git a/benchmark/include/hictk/benchmark/hg38.hpp b/benchmark/include/hictk/benchmark/hg38.hpp new file mode 100644 index 00000000..48963e83 --- /dev/null +++ b/benchmark/include/hictk/benchmark/hg38.hpp @@ -0,0 +1,43 @@ +// Copyright (C) 2024 Roberto Rossini +// +// SPDX-License-Identifier: MIT + +#pragma once + +#include + +#include "hictk/chromosome.hpp" + +namespace hictk::benchmark { + +// clang-format off +// NOLINTNEXTLINE(cert-err58-cpp) +inline const std::vector hg38{ + hictk::Chromosome{0, "chr1", 248956422}, + hictk::Chromosome{1, "chr2", 242193529}, + hictk::Chromosome{2, "chr3", 198295559}, + hictk::Chromosome{3, "chr4", 190214555}, + hictk::Chromosome{4, "chr5", 181538259}, + hictk::Chromosome{5, "chr6", 170805979}, + hictk::Chromosome{6, "chr7", 159345973}, + hictk::Chromosome{7, "chr8", 145138636}, + hictk::Chromosome{8, "chr9", 138394717}, + hictk::Chromosome{9, "chr10", 133797422}, + hictk::Chromosome{10, "chr11", 135086622}, + hictk::Chromosome{11, "chr12", 133275309}, + hictk::Chromosome{12, "chr13", 114364328}, + hictk::Chromosome{13, "chr14", 107043718}, + hictk::Chromosome{14, "chr15", 101991189}, + hictk::Chromosome{15, "chr16", 90338345}, + hictk::Chromosome{16, "chr17", 83257441}, + hictk::Chromosome{17, "chr18", 80373285}, + hictk::Chromosome{18, "chr19", 58617616}, + hictk::Chromosome{19, "chr20", 64444167}, + hictk::Chromosome{20, "chr21", 46709983}, + hictk::Chromosome{21, "chr22", 50818468}, + hictk::Chromosome{22, "chrX", 156040895}, + hictk::Chromosome{23, "chrY", 57227415} +}; +// clang-format on + +} // namespace hictk::benchmark diff --git a/benchmark/include/hictk/benchmark/utils.hpp b/benchmark/include/hictk/benchmark/utils.hpp new file mode 100644 index 00000000..ae25b676 --- /dev/null +++ b/benchmark/include/hictk/benchmark/utils.hpp @@ -0,0 +1,28 @@ +// Copyright (C) 2024 Roberto Rossini +// +// SPDX-License-Identifier: MIT + +#pragma once + +#include +#include + +#include +#include + +#include "hictk/version.hpp" + +namespace hictk::benchmark { + +[[nodiscard]] inline std::string generate_test_name(std::string_view title, + bool add_braces = true) { + return fmt::format(FMT_STRING("{}" + "\"name\": \"{}\", " + "\"hictk-version\": \"{}\", " + "\"start-time\": \"{:%FT%T}\"" + "{}"), + add_braces ? "{" : "", title, config::version::str(), + fmt::gmtime(std::time(nullptr)), add_braces ? "}" : ""); +} + +} // namespace hictk::benchmark diff --git a/benchmark/interaction_fetching/CMakeLists.txt b/benchmark/interaction_fetching/CMakeLists.txt new file mode 100644 index 00000000..dd9ae215 --- /dev/null +++ b/benchmark/interaction_fetching/CMakeLists.txt @@ -0,0 +1,25 @@ +# Copyright (C) 2024 Roberto Rossini +# +# SPDX-License-Identifier: MIT + +target_sources( + hictk_benchmark + PRIVATE + "${CMAKE_CURRENT_SOURCE_DIR}/cooler_cis_queries.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/cooler_gw_queries.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/cooler_trans_queries.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/file_cis_queries.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/file_gw_queries.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/file_trans_queries.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/hic_cis_queries.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/hic_gw_queries.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/hic_trans_queries.cpp" +) + +target_link_libraries( + hictk_benchmark + PUBLIC + hictk::cooler + hictk::file + hictk::hic +) diff --git a/benchmark/interaction_fetching/common.hpp b/benchmark/interaction_fetching/common.hpp new file mode 100644 index 00000000..21b3fafd --- /dev/null +++ b/benchmark/interaction_fetching/common.hpp @@ -0,0 +1,403 @@ + + +// Copyright (C) 2024 Roberto Rossini +// +// SPDX-License-Identifier: MIT + +#pragma once + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hictk/balancing/methods.hpp" +#include "hictk/benchmark/utils.hpp" +#include "hictk/chromosome.hpp" +#include "hictk/genomic_interval.hpp" +#include "hictk/reference.hpp" + +namespace hictk::benchmark { + +template +[[nodiscard]] constexpr std::string_view type_name() { + static_assert(std::is_arithmetic_v); + static_assert(sizeof(float) == 4); // NOLINT(*-avoid-magic-numbers) + static_assert(sizeof(double) == 8); // NOLINT(*-avoid-magic-numbers) + if constexpr (std::is_same_v) { + return "std::uint8_t"; + } + if constexpr (std::is_same_v) { + return "std::uint16_t"; + } + if constexpr (std::is_same_v) { + return "std::uint32_t"; + } + if constexpr (std::is_same_v) { + return "std::uint64_t"; + } + + if constexpr (std::is_same_v) { + return "std::int8_t"; + } + if constexpr (std::is_same_v) { + return "std::int16_t"; + } + if constexpr (std::is_same_v) { + return "std::int32_t"; + } + if constexpr (std::is_same_v) { + return "std::int64_t"; + } + + if constexpr (std::is_same_v) { + return "std::float32_t"; + } + if constexpr (std::is_same_v) { + return "std::float64_t"; + } + + throw std::logic_error("unsupported type"); +} + +struct Params { + std::string name; + std::string tags; + std::filesystem::path path; + std::uint32_t resolution{}; + std::string_view range1; + std::string_view range2; + balancing::Method normalization; +}; + +// This class is necessary to work around some limitations of Catch2. +// Basically what we are after are parametrized benchmarks with automatic name and tag generation. +// This is not possible out of the box as of Catch2 v3.7.1 for the following reasons: +// - We can only assign tags to test cases: that means we need to have one benchmark per test case. +// - Catch supports dynamic test registration through the REGISTER_TEST_CASE macro. +// Unfortunately, this macro only supports registering functions with signature void foo(); +// which prevents us from passing benchmark params directly to the benchmark function. +// To work around these limitations I have implemented the TestCaseGenerator class (see below). +// This class can be constructed given one or more std::array with the params. This allows us to +// compute the number of parameter combinations at compile time. This is useful because we can use +// std::index_sequence in combination with expression folding to call template void +// run_benchmark(). This function then uses the template parameter to fetch the appropriate set of +// parameters from a TestCaseGenerator class that has been declared in the global namespace as a +// static const variable. +template +class TestCaseGenerator { + public: + [[nodiscard]] static constexpr std::size_t size() noexcept { + return S1 * std::max(std::size_t{1}, S2) * std::max(std::size_t{1}, S3) * + std::max(std::size_t{1}, S4) * std::max(std::size_t{1}, S5); + } + + private: + std::vector _params{}; + static constexpr std::size_t _chunk_size{32}; + + [[nodiscard]] static std::uint64_t compute_num_pixels_ub(std::string_view range1, + std::string_view range2, + std::uint32_t resolution) { + const auto gi1 = GenomicInterval::parse_ucsc(std::string{range1}); + const auto gi2 = GenomicInterval::parse_ucsc(std::string{range2}); + + const auto size1 = std::get<2>(gi1) - std::get<1>(gi1); + const auto size2 = std::get<2>(gi2) - std::get<1>(gi2); + + const auto nbins1 = static_cast((size1 + resolution - 1) / resolution); + const auto nbins2 = static_cast((size2 + resolution - 1) / resolution); + + return nbins1 * nbins2; + } + + template + [[nodiscard]] static std::string generate_tags(const std::filesystem::path& path, + std::string_view range1, std::string_view range2, + std::uint32_t resolution) { + const auto ext = path.extension(); + assert(!ext.empty()); + auto tags = fmt::format(FMT_STRING("[benchmark][interaction_fetching][{}][{}bp]"), + ext.string().substr(1), resolution); + if (range1 == "GW") { + assert(range2 == "GW"); + tags += "[gw]"; + } else if (range1 == range2) { + tags += "[cis]"; + } else { + tags += "[trans]"; + } + + if (range1 == "GW") { + assert(range1 == "GW"); + tags += "[large]"; + } else { + const auto num_pixels = compute_num_pixels_ub(range1, range2, resolution); + if (num_pixels < 100'000) { // NOLINT(*-avoid-magic-numbers) + tags += "[small]"; + } else if (num_pixels < 2'500'000) { // NOLINT(*-avoid-magic-numbers) + tags += "[medium]"; + } else { + tags += "[large]"; + } + } + + tags += fmt::format(FMT_STRING("[{}]"), type_name()); + + return tags; + } + + public: + TestCaseGenerator() = delete; + // NOLINTNEXTLINE(*-function-cognitive-complexity) + TestCaseGenerator(std::string_view title, std::array files, + std::array resolutions, + std::array ranges1, + std::array ranges2, + std::array normalizations) + : _params(size()) { + if constexpr (size() == 0) { + throw std::logic_error("size cannot be 0"); + } + std::size_t i = 0; + const auto test_name = generate_test_name(title, false); + for (const auto& f : files) { + for (const auto& res : resolutions) { + for (const auto& r1 : ranges1) { + for (const auto& r2 : ranges2) { + for (const auto& norm : normalizations) { + const auto int_counts = norm == balancing::Method::NONE(); + std::filesystem::path path{f}; + + const auto format = path.extension().string().substr(1); + + _params[i].name = fmt::format(FMT_STRING("{{" + "{}, " + "\"format\": \"{}\", " + "\"range1\": \"{}\", " + "\"range2\": \"{}\", " + "\"resolution\": {}, " + "\"sorted\": true, " + "\"count-type\": \"{}\"" + "}}"), + test_name, format, r1, r2, res, + int_counts ? "std::uint32_t" : "std::float64_t", norm); + _params[i].tags = int_counts ? generate_tags(path, r1, r2, res) + : generate_tags(path, r1, r2, res); + _params[i].path = std::move(path); + _params[i].resolution = res; + _params[i].range1 = r1; + _params[i].range2 = r2; + _params[i++].normalization = norm; + } + } + } + } + } + } + + [[nodiscard]] constexpr auto operator[](std::size_t i) const noexcept -> const Params& { + assert(i < size()); + return _params[i]; + } + + [[nodiscard]] static constexpr std::size_t chunk_size() noexcept { return _chunk_size; } + + [[nodiscard]] static constexpr std::size_t num_chunks() noexcept { + return (size() + chunk_size() - 1) / chunk_size(); + } +}; + +// This macro defines the boilerplate required to register one test case (which corresponds to one +// benchmark) for each parameter combination generated by an instance of the TestCaseGenerator +// defined above. +// In principle, we should be able to use fold expressions plus a std::index_sequence to call +// template void run_benchmark() to run a single test case. +// Unfortunately there are scenarios where a single TestCaseGenerator yields too many parameter +// combinations to be handled by a single fold expression. +// The boilerplate defined below basically splits a std::index_sequence into multiple chunks. +// Then submits one chunk at a time. +#define HICTK_REGISTER_BENCHMARKS(_ParamGenerator, _BenchmarkRunner) \ + namespace internal { \ + template \ + static void register_benchmark() { \ + constexpr auto IDX = (I * _ParamGenerator.chunk_size()) + J; \ + if constexpr (IDX < _ParamGenerator.size()) { \ + REGISTER_TEST_CASE(_BenchmarkRunner, _ParamGenerator[IDX].name, \ + _ParamGenerator[IDX].tags); \ + } \ + } \ + \ + template \ + static void register_benchmarks_chunk(std::index_sequence) { \ + (register_benchmark(), ...); \ + } \ + \ + template \ + static void register_benchmarks(std::index_sequence) { \ + constexpr auto CHUNK_SIZE = _ParamGenerator.chunk_size(); \ + (register_benchmarks_chunk(std::make_index_sequence{}), ...); \ + } \ + } \ + \ + static void register_benchmarks() { \ + constexpr auto NUM_CHUNKS = _ParamGenerator.num_chunks(); \ + internal::register_benchmarks(std::make_index_sequence{}); \ + } + +[[nodiscard]] inline std::pair generate_query( + std::mt19937_64& rand_eng, const hictk::Chromosome& chrom1, const hictk::Chromosome& chrom2, + double avg_height, double avg_width, double height_std, double width_std) { + assert(chrom1 <= chrom2); + + const auto pos1 = std::uniform_int_distribution{0U, chrom1.size() - 1}(rand_eng); + const auto pos2 = std::uniform_int_distribution{0U, chrom2.size() - 1}(rand_eng); + + const auto height = static_cast( + std::clamp(std::normal_distribution{avg_height, height_std}(rand_eng), 1.0, + static_cast(chrom1.size()))); + const auto width = static_cast( + std::clamp(std::normal_distribution{avg_width, width_std}(rand_eng), 1.0, + static_cast(chrom2.size()))); + + auto start1 = height >= pos1 ? std::uint32_t{0} : pos1 - height; + auto start2 = width >= pos2 ? std::uint32_t{0} : pos2 - width; + + if (chrom1 == chrom2 && start1 > start2) { + std::swap(start1, start2); + } + + auto end1 = std::min(start1 + height, chrom1.size()); + auto end2 = std::min(start2 + width, chrom2.size()); + + return std::make_pair(fmt::format(FMT_STRING("{}:{}-{}"), chrom1.name(), start1, end1), + fmt::format(FMT_STRING("{}:{}-{}"), chrom2.name(), start2, end2)); +} + +[[nodiscard]] inline std::vector> generate_queries( + const hictk::Chromosome& chrom1, const hictk::Chromosome& chrom2, std::size_t num_queries, + double avg_height, double avg_width, double height_std, double width_std, std::uint64_t seed) { + std::random_device rd{}; + std::mt19937_64 rand_eng(seed == 0 ? rd() : seed); + + std::vector> queries(num_queries); + + std::generate(queries.begin(), queries.end(), [&]() { + return generate_query(rand_eng, chrom1, chrom2, avg_height, avg_width, height_std, width_std); + }); + + return queries; +} + +[[nodiscard]] inline std::discrete_distribution init_chromosome_selector( + const hictk::Reference& chroms) { + std::vector weights{}; + for (const auto& chrom : chroms) { + if (chrom.is_all()) { + weights.push_back(0); + continue; + } + weights.push_back(chrom.size()); + } + + return {weights.begin(), weights.end()}; +} + +[[nodiscard]] inline std::vector> generate_queries_cis( + const hictk::Reference& chroms, std::size_t num_queries, double avg_height, double avg_width, + double height_std, double width_std, std::uint64_t seed) { + std::random_device rd{}; + std::mt19937_64 rand_eng(seed == 0 ? rd() : seed); + + auto chrom_selector = init_chromosome_selector(chroms); + + std::vector> queries(num_queries); + + std::generate(queries.begin(), queries.end(), [&]() { + const auto& chrom = chroms.at(chrom_selector(rand_eng)); + return generate_query(rand_eng, chrom, chrom, avg_height, avg_width, height_std, width_std); + }); + + return queries; +} + +[[nodiscard]] inline std::vector> generate_queries_trans( + const hictk::Reference& chroms, std::size_t num_queries, double avg_height, double avg_width, + double height_std, double width_std, std::uint64_t seed) { + std::random_device rd{}; + std::mt19937_64 rand_eng(seed == 0 ? rd() : seed); + + auto chrom_selector = init_chromosome_selector(chroms); + + std::vector> queries(num_queries); + + std::generate(queries.begin(), queries.end(), [&]() { + const auto& chrom1 = chroms.at(chrom_selector(rand_eng)); + while (true) { + const auto& chrom2 = chroms.at(chrom_selector(rand_eng)); + if (chrom1 == chrom2) { + continue; + } + return generate_query(rand_eng, chrom1, chrom2, avg_height, avg_width, height_std, width_std); + } + }); + + return queries; +} + +template +[[nodiscard]] inline std::ptrdiff_t count_nnz(const File& file, std::string_view range1, + std::string_view range2, + const hictk::balancing::Method& normalization) { + const auto sel = file.fetch(range1, range2, normalization); + + return std::distance(sel.template begin(), sel.template end()); +} + +template +[[nodiscard]] inline std::ptrdiff_t count_nnz(const File& file, std::size_t max_num_pixels, + const hictk::balancing::Method& normalization) { + const auto sel = file.fetch(normalization); + auto first = sel.template begin(); + auto last = sel.template end(); + + std::ptrdiff_t i{}; + // clang-format off + while (++first != last && ++i != static_cast(max_num_pixels)); // NOLINT + // clang-format on + return i; +} + +template +[[nodiscard]] inline std::ptrdiff_t count_nnz_unsorted( + const File& file, std::string_view range1, std::string_view range2, + const hictk::balancing::Method& normalization) { + const auto sel = file.fetch(range1, range2, normalization); + + return std::distance(sel.template begin(false), sel.template end()); +} + +template +[[nodiscard]] inline std::ptrdiff_t count_nnz_unsorted( + const File& file, std::size_t max_num_pixels, const hictk::balancing::Method& normalization) { + const auto sel = file.fetch(normalization); + auto first = sel.template begin(false); + auto last = sel.template end(); + + std::ptrdiff_t i{}; + // clang-format off + while (++first != last && ++i != static_cast(max_num_pixels)); // NOLINT + // clang-format on + return i; +} + +} // namespace hictk::benchmark diff --git a/benchmark/interaction_fetching/cooler_cis_queries.cpp b/benchmark/interaction_fetching/cooler_cis_queries.cpp new file mode 100644 index 00000000..d79e9229 --- /dev/null +++ b/benchmark/interaction_fetching/cooler_cis_queries.cpp @@ -0,0 +1,56 @@ +// Copyright (C) 2024 Roberto Rossini +// +// SPDX-License-Identifier: MIT + +#include + +#include +#include +#include +#include +#include +#include + +#include "./common.hpp" +#include "hictk/balancing/methods.hpp" +#include "hictk/benchmark/benchmark_installers.hpp" +#include "hictk/cooler/cooler.hpp" + +namespace hictk::benchmark { + +// NOLINTBEGIN(*-avoid-magic-numbers, cert-err58-cpp) +static const TestCaseGenerator test_generator{ + "cooler::File::fetch (cis)", + std::array{"test/data/integration_tests/4DNFIZ1ZVXC8.mcool"}, + std::array{1000, 10000, 100000, 1000000}, + std::array{"chr2L:5,000,000-5,100,000", "chr2L:6,000,000-7,000,000", + "chr2L:10,000,000-15,000,000"}, + std::array{"chr2L:5,000,000-5,100,000", "chr2L:6,000,000-7,000,000", + "chr2L:10,000,000-15,000,000"}, + std::array{balancing::Method::NONE(), balancing::Method::VC()}}; +// NOLINTEND(*-avoid-magic-numbers, cert-err58-cpp) + +template +static void run_benchmark() { + BENCHMARK_ADVANCED("benchmark") + (Catch::Benchmark::Chronometer meter) { + const auto& params = test_generator[I]; + const cooler::File clr( + fmt::format(FMT_STRING("{}::/resolutions/{}"), params.path.string(), params.resolution)); + if (params.normalization == balancing::Method::NONE()) { + meter.measure([&]() { + return count_nnz(clr, params.range1, params.range1, params.normalization); + }); + } else { + meter.measure([&]() { + return count_nnz(clr, params.range1, params.range1, params.normalization); + }); + } + }; +} + +HICTK_REGISTER_BENCHMARKS(test_generator, run_benchmark) + +void register_cooler_cis_queries_benchmarks() { register_benchmarks(); } + +} // namespace hictk::benchmark diff --git a/benchmark/interaction_fetching/cooler_gw_queries.cpp b/benchmark/interaction_fetching/cooler_gw_queries.cpp new file mode 100644 index 00000000..e2ecbbb1 --- /dev/null +++ b/benchmark/interaction_fetching/cooler_gw_queries.cpp @@ -0,0 +1,52 @@ +// Copyright (C) 2024 Roberto Rossini +// +// SPDX-License-Identifier: MIT + +#include + +#include +#include +#include +#include +#include +#include + +#include "./common.hpp" +#include "hictk/balancing/methods.hpp" +#include "hictk/benchmark/benchmark_installers.hpp" +#include "hictk/cooler/cooler.hpp" + +namespace hictk::benchmark { + +// NOLINTBEGIN(*-avoid-magic-numbers, cert-err58-cpp) +static const TestCaseGenerator test_generator{ + "cooler::File::fetch (gw)", + std::array{"test/data/integration_tests/4DNFIZ1ZVXC8.mcool"}, + std::array{1000, 10000, 100000, 1000000}, + std::array{"GW"}, + std::array{"GW"}, + std::array{balancing::Method::NONE(), balancing::Method::VC()}}; +// NOLINTEND(*-avoid-magic-numbers, cert-err58-cpp) + +template +static void run_benchmark() { + BENCHMARK_ADVANCED("benchmark") + (Catch::Benchmark::Chronometer meter) { + constexpr std::size_t max_npixels = 10'000'000; + const auto& params = test_generator[I]; + const cooler::File clr( + fmt::format(FMT_STRING("{}::/resolutions/{}"), params.path.string(), params.resolution)); + if (params.normalization == balancing::Method::NONE()) { + meter.measure( + [&]() { return count_nnz(clr, max_npixels, params.normalization); }); + } else { + meter.measure([&]() { return count_nnz(clr, max_npixels, params.normalization); }); + } + }; +} + +HICTK_REGISTER_BENCHMARKS(test_generator, run_benchmark) + +void register_cooler_gw_queries_benchmarks() { register_benchmarks(); } + +} // namespace hictk::benchmark diff --git a/benchmark/interaction_fetching/cooler_trans_queries.cpp b/benchmark/interaction_fetching/cooler_trans_queries.cpp new file mode 100644 index 00000000..d45e34d4 --- /dev/null +++ b/benchmark/interaction_fetching/cooler_trans_queries.cpp @@ -0,0 +1,55 @@ +// Copyright (C) 2024 Roberto Rossini +// +// SPDX-License-Identifier: MIT + +#include + +#include +#include +#include +#include +#include + +#include "./common.hpp" +#include "hictk/balancing/methods.hpp" +#include "hictk/benchmark/benchmark_installers.hpp" +#include "hictk/cooler/cooler.hpp" + +namespace hictk::benchmark { + +// NOLINTBEGIN(*-avoid-magic-numbers, cert-err58-cpp) +static const TestCaseGenerator test_generator{ + "cooler::File::fetch (trans)", + std::array{"test/data/integration_tests/4DNFIZ1ZVXC8.mcool"}, + std::array{1000, 10000, 100000, 1000000}, + std::array{"chr2L:5,000,000-5,100,000", "chr2L:6,000,000-7,000,000", + "chr2L:10,000,000-15,000,000"}, + std::array{"chrX:10,200,000-10,300,000", "chrX:5,000,000-6,000,000", + "chrX:15,000,000-20,000,000"}, + std::array{balancing::Method::NONE(), balancing::Method::VC()}}; +// NOLINTEND(*-avoid-magic-numbers, cert-err58-cpp) + +template +static void run_benchmark() { + BENCHMARK_ADVANCED("benchmark") + (Catch::Benchmark::Chronometer meter) { + const auto& params = test_generator[I]; + const cooler::File clr( + fmt::format(FMT_STRING("{}::/resolutions/{}"), params.path.string(), params.resolution)); + if (params.normalization == balancing::Method::NONE()) { + meter.measure([&]() { + return count_nnz(clr, params.range1, params.range2, params.normalization); + }); + } else { + meter.measure([&]() { + return count_nnz(clr, params.range1, params.range2, params.normalization); + }); + } + }; +} + +HICTK_REGISTER_BENCHMARKS(test_generator, run_benchmark) + +void register_cooler_trans_queries_benchmarks() { register_benchmarks(); } + +} // namespace hictk::benchmark diff --git a/benchmark/interaction_fetching/file_cis_queries.cpp b/benchmark/interaction_fetching/file_cis_queries.cpp new file mode 100644 index 00000000..472b7baf --- /dev/null +++ b/benchmark/interaction_fetching/file_cis_queries.cpp @@ -0,0 +1,55 @@ +// Copyright (C) 2024 Roberto Rossini +// +// SPDX-License-Identifier: MIT + +#include +#include +#include +#include +#include +#include + +#include "./common.hpp" +#include "hictk/balancing/methods.hpp" +#include "hictk/benchmark/benchmark_installers.hpp" +#include "hictk/file.hpp" + +namespace hictk::benchmark { + +// NOLINTBEGIN(*-avoid-magic-numbers, cert-err58-cpp) +static const TestCaseGenerator test_generator{ + "File::fetch (cis)", + std::array{"test/data/integration_tests/4DNFIZ1ZVXC8.mcool", + "test/data/hic/4DNFIZ1ZVXC8.hic8", + "test/data/hic/4DNFIZ1ZVXC8.hic9"}, + std::array{1000, 10000, 100000, 1000000}, + std::array{"chr2L:5,000,000-5,100,000", "chr2L:6,000,000-7,000,000", + "chr2L:10,000,000-15,000,000"}, + std::array{"chr2L:5,000,000-5,100,000", "chr2L:6,000,000-7,000,000", + "chr2L:10,000,000-15,000,000"}, + std::array{balancing::Method::NONE(), balancing::Method::VC()}}; +// NOLINTEND(*-avoid-magic-numbers, cert-err58-cpp) + +template +static void run_benchmark() { + BENCHMARK_ADVANCED("benchmark") + (Catch::Benchmark::Chronometer meter) { + const auto& params = test_generator[I]; + const File f(params.path.string(), params.resolution); + if (params.normalization == balancing::Method::NONE()) { + meter.measure([&]() { + return count_nnz(f, params.range1, params.range1, params.normalization); + }); + } else { + meter.measure([&]() { + return count_nnz(f, params.range1, params.range1, params.normalization); + }); + } + }; +} + +HICTK_REGISTER_BENCHMARKS(test_generator, run_benchmark) + +void register_file_cis_queries_benchmarks() { register_benchmarks(); } + +} // namespace hictk::benchmark diff --git a/benchmark/interaction_fetching/file_gw_queries.cpp b/benchmark/interaction_fetching/file_gw_queries.cpp new file mode 100644 index 00000000..454fcab5 --- /dev/null +++ b/benchmark/interaction_fetching/file_gw_queries.cpp @@ -0,0 +1,53 @@ +// Copyright (C) 2024 Roberto Rossini +// +// SPDX-License-Identifier: MIT + +#include +#include +#include +#include +#include +#include + +#include "./common.hpp" +#include "hictk/balancing/methods.hpp" +#include "hictk/benchmark/benchmark_installers.hpp" +#include "hictk/file.hpp" + +namespace hictk::benchmark { + +// NOLINTBEGIN(*-avoid-magic-numbers, cert-err58-cpp) +static const TestCaseGenerator test_generator{ + "File::fetch (gw)", + std::array{"test/data/integration_tests/4DNFIZ1ZVXC8.mcool", + "test/data/hic/4DNFIZ1ZVXC8.hic8", + "test/data/hic/4DNFIZ1ZVXC8.hic9"}, + std::array{1000, 10000, 100000, 1000000}, + std::array{"GW"}, + std::array{"GW"}, + std::array{balancing::Method::NONE(), balancing::Method::VC()}}; +// NOLINTEND(*-avoid-magic-numbers, cert-err58-cpp) + +template +static void run_benchmark() { + BENCHMARK_ADVANCED("benchmark") + (Catch::Benchmark::Chronometer meter) { + const auto& params = test_generator[I]; + const File f(params.path.string(), params.resolution); + if (params.normalization == balancing::Method::NONE()) { + meter.measure([&]() { + return count_nnz(f, params.range1, params.range1, params.normalization); + }); + } else { + meter.measure([&]() { + return count_nnz(f, params.range1, params.range1, params.normalization); + }); + } + }; +} + +HICTK_REGISTER_BENCHMARKS(test_generator, run_benchmark) + +void register_file_gw_queries_benchmarks() { register_benchmarks(); } + +} // namespace hictk::benchmark diff --git a/benchmark/interaction_fetching/file_trans_queries.cpp b/benchmark/interaction_fetching/file_trans_queries.cpp new file mode 100644 index 00000000..3126f472 --- /dev/null +++ b/benchmark/interaction_fetching/file_trans_queries.cpp @@ -0,0 +1,55 @@ +// Copyright (C) 2024 Roberto Rossini +// +// SPDX-License-Identifier: MIT + +#include +#include +#include +#include +#include +#include + +#include "./common.hpp" +#include "hictk/balancing/methods.hpp" +#include "hictk/benchmark/benchmark_installers.hpp" +#include "hictk/file.hpp" + +namespace hictk::benchmark { + +// NOLINTBEGIN(*-avoid-magic-numbers, cert-err58-cpp) +static const TestCaseGenerator test_generator{ + "File::fetch (trans)", + std::array{"test/data/integration_tests/4DNFIZ1ZVXC8.mcool", + "test/data/hic/4DNFIZ1ZVXC8.hic8", + "test/data/hic/4DNFIZ1ZVXC8.hic9"}, + std::array{1000, 10000, 100000, 1000000}, + std::array{"chr2L:5,000,000-5,100,000", "chr2L:6,000,000-7,000,000", + "chr2L:10,000,000-15,000,000"}, + std::array{"chrX:10,200,000-10,300,000", "chrX:5,000,000-6,000,000", + "chrX:15,000,000-20,000,000"}, + std::array{balancing::Method::NONE(), balancing::Method::VC()}}; +// NOLINTEND(*-avoid-magic-numbers, cert-err58-cpp) + +template +static void run_benchmark() { + BENCHMARK_ADVANCED("benchmark") + (Catch::Benchmark::Chronometer meter) { + const auto& params = test_generator[I]; + const File f(params.path.string(), params.resolution); + if (params.normalization == balancing::Method::NONE()) { + meter.measure([&]() { + return count_nnz(f, params.range1, params.range1, params.normalization); + }); + } else { + meter.measure([&]() { + return count_nnz(f, params.range1, params.range1, params.normalization); + }); + } + }; +} + +HICTK_REGISTER_BENCHMARKS(test_generator, run_benchmark) + +void register_file_trans_queries_benchmarks() { register_benchmarks(); } + +} // namespace hictk::benchmark diff --git a/benchmark/interaction_fetching/hic_cis_queries.cpp b/benchmark/interaction_fetching/hic_cis_queries.cpp new file mode 100644 index 00000000..096dcff2 --- /dev/null +++ b/benchmark/interaction_fetching/hic_cis_queries.cpp @@ -0,0 +1,54 @@ +// Copyright (C) 2024 Roberto Rossini +// +// SPDX-License-Identifier: MIT + +#include +#include +#include +#include +#include +#include + +#include "./common.hpp" +#include "hictk/balancing/methods.hpp" +#include "hictk/benchmark/benchmark_installers.hpp" +#include "hictk/hic.hpp" + +namespace hictk::benchmark { + +// NOLINTBEGIN(*-avoid-magic-numbers, cert-err58-cpp) +static const TestCaseGenerator test_generator{ + "hic::File::fetch (cis)", + std::array{"test/data/hic/4DNFIZ1ZVXC8.hic8", + "test/data/hic/4DNFIZ1ZVXC8.hic9"}, + std::array{1000, 10000, 100000, 1000000}, + std::array{"chr2L:5,000,000-5,100,000", "chr2L:6,000,000-7,000,000", + "chr2L:10,000,000-15,000,000"}, + std::array{"chr2L:5,000,000-5,100,000", "chr2L:6,000,000-7,000,000", + "chr2L:10,000,000-15,000,000"}, + std::array{balancing::Method::NONE(), balancing::Method::VC()}}; +// NOLINTEND(*-avoid-magic-numbers, cert-err58-cpp) + +template +static void run_benchmark() { + BENCHMARK_ADVANCED("benchmark") + (Catch::Benchmark::Chronometer meter) { + const auto& params = test_generator[I]; + const hic::File f(params.path.string(), params.resolution); + if (params.normalization == balancing::Method::NONE()) { + meter.measure([&]() { + return count_nnz(f, params.range1, params.range1, params.normalization); + }); + } else { + meter.measure([&]() { + return count_nnz(f, params.range1, params.range1, params.normalization); + }); + } + }; +} + +HICTK_REGISTER_BENCHMARKS(test_generator, run_benchmark) + +void register_hic_cis_queries_benchmarks() { register_benchmarks(); } + +} // namespace hictk::benchmark diff --git a/benchmark/interaction_fetching/hic_gw_queries.cpp b/benchmark/interaction_fetching/hic_gw_queries.cpp new file mode 100644 index 00000000..715666f0 --- /dev/null +++ b/benchmark/interaction_fetching/hic_gw_queries.cpp @@ -0,0 +1,52 @@ +// Copyright (C) 2024 Roberto Rossini +// +// SPDX-License-Identifier: MIT + +#include +#include +#include +#include +#include +#include + +#include "./common.hpp" +#include "hictk/balancing/methods.hpp" +#include "hictk/benchmark/benchmark_installers.hpp" +#include "hictk/hic.hpp" + +namespace hictk::benchmark { + +// NOLINTBEGIN(*-avoid-magic-numbers, cert-err58-cpp) +static const TestCaseGenerator test_generator{ + "hic::File::fetch (gw)", + std::array{"test/data/hic/4DNFIZ1ZVXC8.hic8", + "test/data/hic/4DNFIZ1ZVXC8.hic9"}, + std::array{1000, 10000, 100000, 1000000}, + std::array{"GW"}, + std::array{"GW"}, + std::array{balancing::Method::NONE(), balancing::Method::VC()}}; +// NOLINTEND(*-avoid-magic-numbers, cert-err58-cpp) + +template +static void run_benchmark() { + BENCHMARK_ADVANCED("benchmark") + (Catch::Benchmark::Chronometer meter) { + const auto& params = test_generator[I]; + const hic::File f(params.path.string(), params.resolution); + if (params.normalization == balancing::Method::NONE()) { + meter.measure([&]() { + return count_nnz(f, params.range1, params.range1, params.normalization); + }); + } else { + meter.measure([&]() { + return count_nnz(f, params.range1, params.range1, params.normalization); + }); + } + }; +} + +HICTK_REGISTER_BENCHMARKS(test_generator, run_benchmark) + +void register_hic_gw_queries_benchmarks() { register_benchmarks(); } + +} // namespace hictk::benchmark diff --git a/benchmark/interaction_fetching/hic_trans_queries.cpp b/benchmark/interaction_fetching/hic_trans_queries.cpp new file mode 100644 index 00000000..c133073f --- /dev/null +++ b/benchmark/interaction_fetching/hic_trans_queries.cpp @@ -0,0 +1,54 @@ +// Copyright (C) 2024 Roberto Rossini +// +// SPDX-License-Identifier: MIT + +#include +#include +#include +#include +#include +#include + +#include "./common.hpp" +#include "hictk/balancing/methods.hpp" +#include "hictk/benchmark/benchmark_installers.hpp" +#include "hictk/hic.hpp" + +namespace hictk::benchmark { + +// NOLINTBEGIN(*-avoid-magic-numbers, cert-err58-cpp) +static const TestCaseGenerator test_generator{ + "hic::File::fetch (trans)", + std::array{"test/data/hic/4DNFIZ1ZVXC8.hic8", + "test/data/hic/4DNFIZ1ZVXC8.hic9"}, + std::array{1000, 10000, 100000, 1000000}, + std::array{"chr2L:5,000,000-5,100,000", "chr2L:6,000,000-7,000,000", + "chr2L:10,000,000-15,000,000"}, + std::array{"chrX:10,200,000-10,300,000", "chrX:5,000,000-6,000,000", + "chrX:15,000,000-20,000,000"}, + std::array{balancing::Method::NONE(), balancing::Method::VC()}}; +// NOLINTEND(*-avoid-magic-numbers, cert-err58-cpp) + +template +static void run_benchmark() { + BENCHMARK_ADVANCED("benchmark") + (Catch::Benchmark::Chronometer meter) { + const auto& params = test_generator[I]; + const hic::File f(params.path.string(), params.resolution); + if (params.normalization == balancing::Method::NONE()) { + meter.measure([&]() { + return count_nnz(f, params.range1, params.range1, params.normalization); + }); + } else { + meter.measure([&]() { + return count_nnz(f, params.range1, params.range1, params.normalization); + }); + } + }; +} + +HICTK_REGISTER_BENCHMARKS(test_generator, run_benchmark) + +void register_hic_trans_queries_benchmarks() { register_benchmarks(); } + +} // namespace hictk::benchmark diff --git a/benchmark/main.cpp b/benchmark/main.cpp new file mode 100644 index 00000000..cd346dba --- /dev/null +++ b/benchmark/main.cpp @@ -0,0 +1,21 @@ +#include + +#include "hictk/benchmark/benchmark_installers.hpp" + +using namespace hictk::benchmark; + +int main(int argc, char* argv[]) { + register_cooler_cis_queries_benchmarks(); + register_cooler_gw_queries_benchmarks(); + register_cooler_trans_queries_benchmarks(); + + register_file_cis_queries_benchmarks(); + register_file_gw_queries_benchmarks(); + register_file_trans_queries_benchmarks(); + + register_hic_cis_queries_benchmarks(); + register_hic_gw_queries_benchmarks(); + register_hic_trans_queries_benchmarks(); + + return Catch::Session().run(argc, argv); +} diff --git a/benchmark/pixel_formatting/CMakeLists.txt b/benchmark/pixel_formatting/CMakeLists.txt deleted file mode 100644 index 1ad39bf9..00000000 --- a/benchmark/pixel_formatting/CMakeLists.txt +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright (C) 2024 Roberto Rossini -# -# SPDX-License-Identifier: MIT - -find_package(CLI11 REQUIRED) -find_package(Filesystem REQUIRED) - -add_executable(hictk_pixel_formatting_bench pixel_formatting.cpp) - -target_link_libraries( - hictk_pixel_formatting_bench - PRIVATE - hictk_project_warnings - hictk_project_options - PUBLIC - hictk::cooler - hictk::format - hictk::pixel -) - -target_link_system_libraries( - hictk_pixel_formatting_bench - PUBLIC - CLI11::CLI11 - std::filesystem -) diff --git a/benchmark/pixel_formatting/pixel_formatting.cpp b/benchmark/pixel_formatting/pixel_formatting.cpp deleted file mode 100644 index 4243bb99..00000000 --- a/benchmark/pixel_formatting/pixel_formatting.cpp +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright (C) 2024 Roberto Rossini -// -// SPDX-License-Identifier: MIT - -#include -#include - -#include -#include -#include -#include -#include - -using namespace hictk; - -struct Config { - std::filesystem::path uri{}; - bool join{false}; - std::size_t iterations{1}; -}; - -template -[[nodiscard]] static std::ptrdiff_t print_pixels(const std::vector &pixels) { - auto *dev_null = std::fopen("/dev/null", "w"); // NOLINT(cppcoreguidelines-owning-memory) - std::for_each(pixels.begin(), pixels.end(), - [&](const auto &p) { fmt::print(dev_null, FMT_COMPILE("{}\n"), p); }); - std::fclose(dev_null); // NOLINT(cert-err33-c,cppcoreguidelines-owning-memory) - - return static_cast(pixels.size()); -} - -// NOLINTNEXTLINE(bugprone-exception-escape) -int main(int argc, char **argv) noexcept { - const auto *argv0 = argv[0]; // NOLINT(*-pointer-arithmetic) - - CLI::App cli{}; - Config config{}; - cli.add_option("uri", config.uri, "URI to a cooler file."); - cli.add_flag("--bg2,!--coo", config.join, "Join genomic coordinates.")->capture_default_str(); - cli.add_option("--iterations", config.iterations, "Number of iterations.")->capture_default_str(); - - try { - cli.parse(argc, argv); - - cooler::File f(config.uri.string()); - - std::ptrdiff_t size = 0; - std::uint64_t elapsed_time{}; - - std::vector> pixel_buffer{}; - std::vector> thin_pixel_buffer{}; - - std::for_each(f.begin(), f.end(), [&](const auto &tp) { - if (config.join) { - pixel_buffer.emplace_back(Pixel(f.bins(), tp)); - - } else { - thin_pixel_buffer.emplace_back(tp); - } - }); - - for (std::size_t i = 0; i < config.iterations; ++i) { - const auto t0 = std::chrono::system_clock::now(); - size += print_pixels(thin_pixel_buffer); - size += print_pixels(pixel_buffer); - const auto t1 = std::chrono::system_clock::now(); - const auto delta = static_cast( - std::chrono::duration_cast(t1 - t0).count()); - elapsed_time += delta; - } - - const auto elapsed_seconds = static_cast(elapsed_time) / 1.0e9; - const auto throughput = static_cast(size) / elapsed_seconds; - - fmt::print(FMT_STRING("fmt::print({}) throughput: {:.4} num/s\n"), - config.join ? "Pixel" : "ThinPixel", throughput); - - } catch (const CLI::ParseError &e) { - assert(cli); - return cli.exit(e); - } catch (const std::exception &e) { - fmt::print(stderr, FMT_STRING("FAILURE! {} encountered the following error: {}.\n"), argv0, - e.what()); - return 1; - } catch (...) { - fmt::print(stderr, - FMT_STRING("FAILURE! {} encountered the following error: Caught an " - "unhandled exception!\n"), - argv0); - return 1; - } - return 0; -} diff --git a/benchmark/pixels/CMakeLists.txt b/benchmark/pixels/CMakeLists.txt new file mode 100644 index 00000000..b0b90425 --- /dev/null +++ b/benchmark/pixels/CMakeLists.txt @@ -0,0 +1,7 @@ +# Copyright (C) 2024 Roberto Rossini +# +# SPDX-License-Identifier: MIT + +target_sources(hictk_benchmark PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/pixels.cpp") + +target_link_libraries(hictk_benchmark PUBLIC hictk::pixel) diff --git a/benchmark/pixels/common.hpp b/benchmark/pixels/common.hpp new file mode 100644 index 00000000..c7b32973 --- /dev/null +++ b/benchmark/pixels/common.hpp @@ -0,0 +1,39 @@ +// Copyright (C) 2024 Roberto Rossini +// +// SPDX-License-Identifier: MIT + +#pragma once + +#include + +#include "hictk/chromosome.hpp" + +// clang-format off +// NOLINTNEXTLINE(cert-err58-cpp) +inline const std::vector hg38{ + hictk::Chromosome{0, "chr1", 248956422}, + hictk::Chromosome{1, "chr2", 242193529}, + hictk::Chromosome{2, "chr3", 198295559}, + hictk::Chromosome{3, "chr4", 190214555}, + hictk::Chromosome{4, "chr5", 181538259}, + hictk::Chromosome{5, "chr6", 170805979}, + hictk::Chromosome{6, "chr7", 159345973}, + hictk::Chromosome{7, "chr8", 145138636}, + hictk::Chromosome{8, "chr9", 138394717}, + hictk::Chromosome{9, "chr10", 133797422}, + hictk::Chromosome{10, "chr11", 135086622}, + hictk::Chromosome{11, "chr12", 133275309}, + hictk::Chromosome{12, "chr13", 114364328}, + hictk::Chromosome{13, "chr14", 107043718}, + hictk::Chromosome{14, "chr15", 101991189}, + hictk::Chromosome{15, "chr16", 90338345}, + hictk::Chromosome{16, "chr17", 83257441}, + hictk::Chromosome{17, "chr18", 80373285}, + hictk::Chromosome{18, "chr19", 58617616}, + hictk::Chromosome{19, "chr20", 64444167}, + hictk::Chromosome{20, "chr21", 46709983}, + hictk::Chromosome{21, "chr22", 50818468}, + hictk::Chromosome{22, "chrX", 156040895}, + hictk::Chromosome{23, "chrY", 57227415} +}; +// clang-format on diff --git a/benchmark/pixels/pixels.cpp b/benchmark/pixels/pixels.cpp new file mode 100644 index 00000000..19f86c78 --- /dev/null +++ b/benchmark/pixels/pixels.cpp @@ -0,0 +1,279 @@ +// Copyright (C) 2024 Roberto Rossini +// +// SPDX-License-Identifier: MIT + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "./common.hpp" +#include "hictk/bin.hpp" +#include "hictk/bin_table.hpp" +#include "hictk/bin_table_fixed.hpp" +#include "hictk/chromosome.hpp" +#include "hictk/pixel.hpp" + +using namespace hictk; + +using N = std::uint32_t; + +// NOLINTBEGIN(*-avoid-magic-numbers, readability-function-cognitive-complexity) +[[nodiscard]] static std::pair sample_bin_pair(const BinTableFixed& bin_table, + std::mt19937_64& rand_eng, bool cis_pair) { + const auto chrom1 = bin_table.chromosomes().at(std::uniform_int_distribution{ + 0, static_cast(bin_table.num_chromosomes() - 1)}(rand_eng)); + + auto sample_trans_chrom = [&]() { + while (true) { + auto chrom = bin_table.chromosomes().at(std::uniform_int_distribution{ + 0, static_cast(bin_table.num_chromosomes() - 1)}(rand_eng)); + if (chrom != chrom1) { + return chrom; + } + } + }; + + const auto chrom2 = cis_pair ? chrom1 : sample_trans_chrom(); + + auto bin1 = bin_table.at( + chrom1, std::uniform_int_distribution{0, chrom1.size() - 1}(rand_eng)); + auto bin2 = bin_table.at( + chrom2, std::uniform_int_distribution{0, chrom2.size() - 1}(rand_eng)); + if (bin1 > bin2) { + std::swap(bin1, bin2); + } + + return std::make_pair(bin1, bin2); +} + +[[nodiscard]] static std::vector> generate_cis_thin_pixels( + const BinTableFixed& bin_table, std::size_t size) { + std::random_device rd{}; + std::mt19937_64 rand_eng(rd()); + + std::vector> pixels(size); + + std::generate(pixels.begin(), pixels.end(), [&]() { + const auto [bin1, bin2] = sample_bin_pair(bin_table, rand_eng, true); + const auto count = std::uniform_int_distribution{1, 1'000'000}(rand_eng); + return ThinPixel{bin1.id(), bin2.id(), count}; + }); + + return pixels; +} + +[[nodiscard]] static std::vector> generate_trans_thin_pixels( + const BinTableFixed& bin_table, std::size_t size) { + std::random_device rd{}; + std::mt19937_64 rand_eng(rd()); + + std::vector> pixels(size); + + std::generate(pixels.begin(), pixels.end(), [&]() { + const auto [bin1, bin2] = sample_bin_pair(bin_table, rand_eng, false); + const auto count = std::uniform_int_distribution{1, 1'000'000}(rand_eng); + return ThinPixel{bin1.id(), bin2.id(), count}; + }); + + return pixels; +} + +[[nodiscard]] static std::vector> generate_cis_pixels(const BinTableFixed& bin_table, + std::size_t size) { + std::random_device rd{}; + std::mt19937_64 rand_eng(rd()); + + std::vector> pixels(size); + + std::generate(pixels.begin(), pixels.end(), [&]() { + const auto [bin1, bin2] = sample_bin_pair(bin_table, rand_eng, true); + const auto count = std::uniform_int_distribution{1, 1'000'000}(rand_eng); + return Pixel{bin1, bin2, count}; + }); + + return pixels; +} + +[[nodiscard]] static std::vector> generate_trans_pixels(const BinTableFixed& bin_table, + std::size_t size) { + std::random_device rd{}; + std::mt19937_64 rand_eng(rd()); + + std::vector> pixels(size); + + std::generate(pixels.begin(), pixels.end(), [&]() { + const auto [bin1, bin2] = sample_bin_pair(bin_table, rand_eng, false); + const auto count = std::uniform_int_distribution{1, 1'000'000}(rand_eng); + return Pixel{bin1, bin2, count}; + }); + + return pixels; +} + +[[nodiscard]] static std::vector> generate_thin_pixels(std::size_t size) { + const BinTableFixed bin_table({hg38.begin(), hg38.end()}, 1'000); + const auto num_cis_interactions = static_cast(0.7 * static_cast(size)); + + auto cis_pixels = generate_cis_thin_pixels(bin_table, num_cis_interactions); + const auto trans_pixels = generate_trans_thin_pixels(bin_table, size - num_cis_interactions); + + cis_pixels.insert(cis_pixels.end(), trans_pixels.begin(), trans_pixels.end()); + return cis_pixels; +} + +[[nodiscard]] static std::vector> generate_pixels(std::size_t size) { + const BinTableFixed bin_table({hg38.begin(), hg38.end()}, 1'000); + const auto num_cis_interactions = static_cast(0.7 * static_cast(size)); + + auto cis_pixels = generate_cis_pixels(bin_table, num_cis_interactions); + const auto trans_pixels = generate_trans_pixels(bin_table, size - num_cis_interactions); + + cis_pixels.insert(cis_pixels.end(), trans_pixels.begin(), trans_pixels.end()); + return cis_pixels; +} + +TEST_CASE("Pixel") { + BENCHMARK_ADVANCED("Construction") + (Catch::Benchmark::Chronometer meter) { + const auto num_runs = static_cast(meter.runs()); + std::vector>> storage(num_runs); + + const Chromosome chrom{0, "chr1", 123'456'789}; + const Bin bin{0, 0, chrom, 0, 1'000}; + + meter.measure([&storage, &bin](std::size_t i) { storage[i].construct(bin, bin, 1); }); + }; + + BENCHMARK_ADVANCED("Destruction") + (Catch::Benchmark::Chronometer meter) { + const auto num_runs = static_cast(meter.runs()); + std::vector>> storage(num_runs); + + const Chromosome chrom{0, "chr1", 123'456'789}; + const Bin bin{0, 0, chrom, 0, 1'000}; + + for (auto& pixel : storage) { + pixel.construct(bin, bin, 1); + } + + meter.measure([&storage](std::size_t i) { storage[i].destruct(); }); + }; + + BENCHMARK_ADVANCED("from_coo (uint32)") + (Catch::Benchmark::Chronometer meter) { + const BinTable bin_table{hg38.begin(), hg38.end(), 1'000}; + meter.measure([&bin_table]() { + return Pixel::from_coo(bin_table, "123456\t234567\t123"); + }); + }; + + BENCHMARK_ADVANCED("from_coo (double)") + (Catch::Benchmark::Chronometer meter) { + const BinTable bin_table{hg38.begin(), hg38.end(), 1'000}; + meter.measure( + [&bin_table]() { return Pixel::from_coo(bin_table, "123456\t234567\t123.4567"); }); + }; + + BENCHMARK_ADVANCED("from_bg2 (uint32)") + (Catch::Benchmark::Chronometer meter) { + const BinTable bin_table{hg38.begin(), hg38.end(), 1'000}; + meter.measure([&bin_table]() { + return Pixel::from_bg2(bin_table, + "chr7\t1000000\t1001000\tchr12\t1000000\t1001000\t123"); + }); + }; + + BENCHMARK_ADVANCED("from_bg2 (double)") + (Catch::Benchmark::Chronometer meter) { + const BinTable bin_table{hg38.begin(), hg38.end(), 1'000}; + meter.measure([&bin_table]() { + return Pixel::from_bg2(bin_table, + "chr7\t1000000\t1001000\tchr12\t1000000\t1001000\t123.4567"); + }); + }; + + BENCHMARK_ADVANCED("from_validpair") + (Catch::Benchmark::Chronometer meter) { + const BinTable bin_table{hg38.begin(), hg38.end(), 1'000}; + meter.measure([&bin_table]() { + return Pixel::from_validpair( + bin_table, + "NS500537:79:HFYYWBGX2:1:11112:2304:13920\tchr2\t12233\t+\tchr2\t13674\t+" + "\t1\tfrag1\tfrag2\t1\t1\tallele-info"); + }); + }; + + BENCHMARK_ADVANCED("from_4dn_pairs") + (Catch::Benchmark::Chronometer meter) { + const BinTable bin_table{hg38.begin(), hg38.end(), 1'000}; + meter.measure([&bin_table]() { + return Pixel::from_4dn_pairs( + bin_table, + "NS500537:79:HFYYWBGX2:4:11402:3004:17204\tchr3\t17376401\tchr4\t17467489\t+\t+" + "\tUU\t60\t60"); + }); + }; + + BENCHMARK_ADVANCED("sorting") + (Catch::Benchmark::Chronometer meter) { + const auto pixels = generate_pixels(1'000'000); + std::vector>> data(static_cast(meter.runs()), pixels); + meter.measure([&data](std::size_t i) { + std::size_t num_ops{}; + std::sort(data[i].begin(), data[i].end(), [&](const auto& pixel1, const auto& pixel2) { + ++num_ops; + return pixel1 < pixel2; + }); + return num_ops; + }); + }; +} + +TEST_CASE("ThinPixel") { + BENCHMARK_ADVANCED("from_coo w/table (uint32)") + (Catch::Benchmark::Chronometer meter) { + const BinTable bin_table{hg38.begin(), hg38.end(), 1'000}; + meter.measure([&bin_table]() { + return Pixel::from_coo(bin_table, "123456\t234567\t123"); + }); + }; + + BENCHMARK_ADVANCED("from_coo w/table (double)") + (Catch::Benchmark::Chronometer meter) { + const BinTable bin_table{hg38.begin(), hg38.end(), 1'000}; + meter.measure( + [&bin_table]() { return Pixel::from_coo(bin_table, "123456\t234567\t123.4567"); }); + }; + + BENCHMARK_ADVANCED("from_coo wo/table (uint32)") + (Catch::Benchmark::Chronometer meter) { + meter.measure([]() { return ThinPixel::from_coo("123456\t234567\t123"); }); + }; + + BENCHMARK_ADVANCED("from_coo wo/table (double)") + (Catch::Benchmark::Chronometer meter) { + meter.measure([]() { return ThinPixel::from_coo("123456\t234567\t123.4567"); }); + }; + + BENCHMARK_ADVANCED("sorting") + (Catch::Benchmark::Chronometer meter) { + const BinTableFixed bin_table({hg38.begin(), hg38.end()}, 1'000); + const auto pixels = generate_thin_pixels(1'000'000); + std::vector>> data(static_cast(meter.runs()), pixels); + meter.measure([&data](std::size_t i) { + std::size_t num_ops{}; + std::sort(data[i].begin(), data[i].end(), [&](const auto& pixel1, const auto& pixel2) { + ++num_ops; + return pixel1 < pixel2; + }); + return num_ops; + }); + }; +} +// NOLINTEND(*-avoid-magic-numbers, readability-function-cognitive-complexity) diff --git a/benchmark/reference/CMakeLists.txt b/benchmark/reference/CMakeLists.txt new file mode 100644 index 00000000..58072b83 --- /dev/null +++ b/benchmark/reference/CMakeLists.txt @@ -0,0 +1,7 @@ +# Copyright (C) 2024 Roberto Rossini +# +# SPDX-License-Identifier: MIT + +target_sources(hictk_benchmark PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/fetch_chromosomes.cpp") + +target_link_libraries(hictk_benchmark PUBLIC hictk::reference) diff --git a/benchmark/reference/fetch_chromosomes.cpp b/benchmark/reference/fetch_chromosomes.cpp new file mode 100644 index 00000000..60c95edb --- /dev/null +++ b/benchmark/reference/fetch_chromosomes.cpp @@ -0,0 +1,91 @@ +// Copyright (C) 2024 Roberto Rossini +// +// SPDX-License-Identifier: MIT + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "hictk/reference.hpp" + +using namespace hictk; + +// NOLINTBEGIN(*-avoid-magic-numbers, readability-function-cognitive-complexity) +[[nodiscard]] static Reference generate_reference(std::size_t num_chroms) { + std::random_device rd{}; + std::mt19937_64 rand_eng(rd()); + + std::vector names(num_chroms); + std::vector sizes(num_chroms); + + for (std::size_t i = 0; i < num_chroms; ++i) { + names[i] = fmt::format(FMT_STRING("chr{}"), i + 1); + sizes[i] = std::uniform_int_distribution{1'000'000, 500'000'000}(rand_eng); + } + + return {names.begin(), names.end(), sizes.begin()}; +} + +[[nodiscard]] static std::vector generate_chrom_names(const Reference& chroms, + std::size_t size) { + std::random_device rd{}; + std::mt19937_64 rand_eng(rd()); + + std::vector names(size); + std::generate(names.begin(), names.end(), [&]() { + const auto chrom_id = std::uniform_int_distribution{ + 0, static_cast(chroms.size() - 1)}(rand_eng); + return chroms.at(chrom_id).name(); + }); + + return names; +} + +[[nodiscard]] static std::vector generate_chrom_ids(const Reference& chroms, + std::size_t size) { + std::random_device rd{}; + std::mt19937_64 rand_eng(rd()); + + std::vector ids(size); + std::generate(ids.begin(), ids.end(), [&]() { + return std::uniform_int_distribution{ + 0, static_cast(chroms.size() - 1)}(rand_eng); + }); + + return ids; +} + +TEST_CASE("Reference::at(name)") { + const std::vector num_chroms{5, 10, 20, 30, 40, 50, 100, 200, 300, 400, 500, 1000}; + + for (const auto& size : num_chroms) { + BENCHMARK_ADVANCED(fmt::format(FMT_STRING("{} chromosomes)"), size)) + (Catch::Benchmark::Chronometer meter) { + const auto chroms = generate_reference(size); + const auto names = generate_chrom_names(chroms, static_cast(meter.runs())); + + meter.measure([&chroms, &names](std::size_t i) { return chroms.at(names[i]); }); + }; + } +} + +TEST_CASE("Reference::at(id)") { + const std::vector num_chroms{5, 10, 20, 30, 40, 50, 100, 200, 300, 400, 500, 1000}; + + for (const auto& size : num_chroms) { + BENCHMARK_ADVANCED(fmt::format(FMT_STRING("{} chromosomes)"), size)) + (Catch::Benchmark::Chronometer meter) { + const auto chroms = generate_reference(size); + const auto ids = generate_chrom_ids(chroms, static_cast(meter.runs())); + + meter.measure([&chroms, &ids](std::size_t i) { return chroms.at(ids[i]); }); + }; + } +} +// NOLINTEND(*-avoid-magic-numbers, readability-function-cognitive-complexity) diff --git a/src/libhictk/bin_table/include/hictk/bin.hpp b/src/libhictk/bin_table/include/hictk/bin.hpp index 6431d221..160e56a6 100644 --- a/src/libhictk/bin_table/include/hictk/bin.hpp +++ b/src/libhictk/bin_table/include/hictk/bin.hpp @@ -24,8 +24,8 @@ class Bin { public: constexpr Bin() = default; - Bin(const Chromosome &chrom_, std::uint32_t start_, std::uint32_t end) noexcept; - Bin(std::uint64_t id_, std::uint32_t rel_id_, const Chromosome &chrom_, std::uint32_t start_, + Bin(Chromosome chrom_, std::uint32_t start_, std::uint32_t end) noexcept; + Bin(std::uint64_t id_, std::uint32_t rel_id_, Chromosome chrom_, std::uint32_t start_, std::uint32_t end_) noexcept; explicit Bin(GenomicInterval interval) noexcept; Bin(std::uint64_t id_, std::uint32_t rel_id_, GenomicInterval interval) noexcept; diff --git a/src/libhictk/bin_table/include/hictk/impl/bin_impl.hpp b/src/libhictk/bin_table/include/hictk/impl/bin_impl.hpp index 5dc5c72b..754b964a 100644 --- a/src/libhictk/bin_table/include/hictk/impl/bin_impl.hpp +++ b/src/libhictk/bin_table/include/hictk/impl/bin_impl.hpp @@ -11,12 +11,12 @@ namespace hictk { -inline Bin::Bin(const Chromosome &chrom_, std::uint32_t start_, std::uint32_t end_) noexcept - : Bin(Bin::null_id, Bin::rel_null_id, chrom_, start_, end_) {} +inline Bin::Bin(Chromosome chrom_, std::uint32_t start_, std::uint32_t end_) noexcept + : Bin(Bin::null_id, Bin::rel_null_id, std::move(chrom_), start_, end_) {} -inline Bin::Bin(std::uint64_t id_, std::uint32_t rel_id_, const Chromosome &chrom_, - std::uint32_t start_, std::uint32_t end_) noexcept - : _id(id_), _rel_id(rel_id_), _interval(chrom_, start_, end_) {} +inline Bin::Bin(std::uint64_t id_, std::uint32_t rel_id_, Chromosome chrom_, std::uint32_t start_, + std::uint32_t end_) noexcept + : Bin(id_, rel_id_, {std::move(chrom_), start_, end_}) {} inline Bin::Bin(GenomicInterval interval) noexcept : Bin(Bin::null_id, Bin::rel_null_id, std::move(interval)) {} diff --git a/src/libhictk/formatting/include/hictk/fmt/bin.hpp b/src/libhictk/formatting/include/hictk/fmt/bin.hpp index 28fe04b1..d91aab1f 100644 --- a/src/libhictk/formatting/include/hictk/fmt/bin.hpp +++ b/src/libhictk/formatting/include/hictk/fmt/bin.hpp @@ -42,7 +42,9 @@ struct formatter { return it; } - format_context::iterator format(const hictk::Bin &b, format_context &ctx) const { + + template + auto format(const hictk::Bin &b, FormatContext &ctx) const { if (presentation == bed) { return format_to(ctx.out(), FMT_STRING("{:bed}"), b.interval()); } diff --git a/src/libhictk/formatting/include/hictk/fmt/chromosome.hpp b/src/libhictk/formatting/include/hictk/fmt/chromosome.hpp index ec5d7158..a6c7e309 100644 --- a/src/libhictk/formatting/include/hictk/fmt/chromosome.hpp +++ b/src/libhictk/formatting/include/hictk/fmt/chromosome.hpp @@ -37,7 +37,8 @@ struct formatter { return it; } - format_context::iterator format(const hictk::Chromosome& c, format_context& ctx) const { + template + auto format(const hictk::Chromosome& c, FormatContext& ctx) const { return presentation == tsv ? format_to(ctx.out(), FMT_STRING("{}\t{}"), c.name(), c.size()) : format_to(ctx.out(), FMT_STRING("{}:{}"), c.name(), c.size()); } diff --git a/src/libhictk/formatting/include/hictk/fmt/genomic_interval.hpp b/src/libhictk/formatting/include/hictk/fmt/genomic_interval.hpp index 3c554f2e..14e5f7cc 100644 --- a/src/libhictk/formatting/include/hictk/fmt/genomic_interval.hpp +++ b/src/libhictk/formatting/include/hictk/fmt/genomic_interval.hpp @@ -36,7 +36,9 @@ struct formatter { return it; } - format_context::iterator format(const hictk::GenomicInterval &gi, format_context &ctx) const { + + template + auto format(const hictk::GenomicInterval &gi, FormatContext &ctx) const { const std::string_view name = !gi ? "null" : gi.chrom().name(); if (presentation == bed) { diff --git a/src/libhictk/pixel/include/hictk/impl/pixel_impl.hpp b/src/libhictk/pixel/include/hictk/impl/pixel_impl.hpp index 227e4c05..53d0f1d6 100644 --- a/src/libhictk/pixel/include/hictk/impl/pixel_impl.hpp +++ b/src/libhictk/pixel/include/hictk/impl/pixel_impl.hpp @@ -230,7 +230,7 @@ inline bool PixelCoordinates::operator>=(const PixelCoordinates &other) const no inline bool PixelCoordinates::is_intra() const noexcept { return bin1.chrom() == bin2.chrom(); } template -inline Pixel::Pixel(Bin bin, N count_) noexcept : Pixel(bin, std::move(bin), count_) {} +inline Pixel::Pixel(const Bin &bin, N count_) noexcept : Pixel(bin, bin, count_) {} template inline Pixel::Pixel(Bin bin1_, Bin bin2_, N count_) noexcept diff --git a/src/libhictk/pixel/include/hictk/pixel.hpp b/src/libhictk/pixel/include/hictk/pixel.hpp index bf09903a..6f720c22 100644 --- a/src/libhictk/pixel/include/hictk/pixel.hpp +++ b/src/libhictk/pixel/include/hictk/pixel.hpp @@ -69,7 +69,7 @@ struct Pixel { N count{}; // NOLINT Pixel() = default; - explicit Pixel(Bin bin, N count_ = 0) noexcept; + explicit Pixel(const Bin &bin, N count_ = 0) noexcept; Pixel(Bin bin1_, Bin bin2_, N count_ = 0) noexcept; explicit Pixel(PixelCoordinates coords_, N count_ = 0) noexcept; Pixel(const Chromosome &chrom, std::uint32_t start, std::uint32_t end, N count_ = 0) noexcept;