From 7234995fee8145fedb01ed775ff4763c9036969c Mon Sep 17 00:00:00 2001 From: Roberto Rossini <71787608+robomics@users.noreply.github.com> Date: Mon, 4 Dec 2023 13:55:05 +0100 Subject: [PATCH] Refactor hictk dump to better support --cis/trans-only options --- src/hictk/CMakeLists.txt | 3 + src/hictk/dump/dump.cpp | 431 +++++++++---------------------- src/hictk/dump/dump.hpp | 34 +++ src/hictk/dump/dump_common.cpp | 176 +++++++++++++ src/hictk/dump/dump_multiple.cpp | 3 + src/hictk/dump/dump_single.cpp | 3 + 6 files changed, 334 insertions(+), 316 deletions(-) create mode 100644 src/hictk/dump/dump.hpp create mode 100644 src/hictk/dump/dump_common.cpp create mode 100644 src/hictk/dump/dump_multiple.cpp create mode 100644 src/hictk/dump/dump_single.cpp diff --git a/src/hictk/CMakeLists.txt b/src/hictk/CMakeLists.txt index ea4e57f9..c841eb07 100644 --- a/src/hictk/CMakeLists.txt +++ b/src/hictk/CMakeLists.txt @@ -29,6 +29,9 @@ target_sources( ${CMAKE_CURRENT_SOURCE_DIR}/convert/cool_to_hic.cpp ${CMAKE_CURRENT_SOURCE_DIR}/convert/hic_to_cool.cpp ${CMAKE_CURRENT_SOURCE_DIR}/dump/dump.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/dump/dump_common.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/dump/dump_multiple.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/dump/dump_single.cpp ${CMAKE_CURRENT_SOURCE_DIR}/fix_mcool/fix_mcool.cpp ${CMAKE_CURRENT_SOURCE_DIR}/load/load.cpp ${CMAKE_CURRENT_SOURCE_DIR}/merge/merge.cpp diff --git a/src/hictk/dump/dump.cpp b/src/hictk/dump/dump.cpp index 409f54be..8251514f 100644 --- a/src/hictk/dump/dump.cpp +++ b/src/hictk/dump/dump.cpp @@ -2,6 +2,8 @@ // // SPDX-License-Identifier: MIT +#include "./dump.hpp" + #include #include "hictk/balancing/methods.hpp" @@ -12,385 +14,179 @@ namespace hictk::tools { -static void print(const Pixel& pixel) { - fmt::print(FMT_COMPILE("{:bg2}\t{:.16g}\n"), pixel.coords, pixel.count); -} -static void print(const ThinPixel& pixel) { - fmt::print(FMT_COMPILE("{:d}\t{:d}\t{:.16g}\n"), pixel.bin1_id, pixel.bin2_id, pixel.count); +template +static void dump_pixels(PixelIt first_pixel, PixelIt last_pixel, + const std::shared_ptr& bins, bool join) { + if (!join) { + return print_pixels(first_pixel, last_pixel); + } + auto jsel = transformers::JoinGenomicCoords(first_pixel, last_pixel, bins); + return print_pixels(jsel.begin(), jsel.end()); } -template -static void dump_bins(const File& f, std::string_view range) { - if (range == "all") { - for (const auto& bin : f.bins()) { - fmt::print(FMT_COMPILE("{:s}\t{:d}\t{:d}\n"), bin.chrom().name(), bin.start(), bin.end()); - } - return; +static void dump_pixels_gw(File& f, std::string_view normalization, bool join, bool sorted) { + if (f.is_hic()) { + f.get().optimize_cache_size_for_iteration(); } - const auto coords = GenomicInterval::parse_ucsc(f.chromosomes(), std::string{range}); - auto [first_bin, last_bin] = f.bins().find_overlap(coords); - std::for_each(first_bin, last_bin, [](const Bin& bin) { - fmt::print(FMT_COMPILE("{:s}\t{:d}\t{:d}\n"), bin.chrom().name(), bin.start(), bin.end()); - }); -} - -[[nodiscard]] static std::pair parse_bedpe(std::string_view line) { - auto next_token = [&]() { - assert(!line.empty()); - const auto pos1 = line.find('\t'); - const auto pos2 = line.find('\t', pos1 + 1); - const auto pos3 = line.find('\t', pos2 + 1); - - auto tok = std::string{line.substr(0, pos3)}; - tok[pos1] = ':'; - tok[pos2] = '-'; - line.remove_prefix(pos3 + 1); - return tok; - }; - const auto range1 = next_token(); - const auto range2 = next_token(); - - return std::make_pair(range1, range2); -} + if (f.is_hic()) { + const auto& ff = f.get(); + const auto sel = ff.fetch(normalization); + dump_pixels(sel.template begin(sorted), sel.template end(), ff.bins_ptr(), + join); + return; + } -template -static void print_pixels(PixelIt first, PixelIt last) { - std::for_each(first, last, [&](const auto& pixel) { print(pixel); }); + const auto& ff = f.get(); + const auto sel = ff.fetch(normalization); + dump_pixels(sel.template begin(), sel.template end(), ff.bins_ptr(), join); } -static void dump_pixels(const cooler::File& f, std::string_view range1, std::string_view range2, - std::string_view normalization, bool join, [[maybe_unused]] bool sorted) { - auto weights = f.read_weights(balancing::Method{normalization}); - if (range1 == "all") { - assert(range2 == "all"); - auto sel = f.fetch(weights); - if (!join) { - return print_pixels(sel.template begin(), sel.template end()); - } - - auto jsel = - transformers::JoinGenomicCoords(sel.begin(), sel.end(), f.bins_ptr()); - return print_pixels(jsel.begin(), jsel.end()); - } - - auto sel = f.fetch(range1, range2, weights); - if (!join) { - return print_pixels(sel.template begin(), sel.template end()); +static void dump_pixels_chrom_chrom(File& f, std::string_view range1, std::string_view range2, + std::string_view normalization, bool join, bool sorted) { + if (f.is_hic()) { + const auto& ff = f.get(); + const auto sel = ff.fetch(range1, range2, balancing::Method{normalization}); + dump_pixels(sel.template begin(sorted), sel.template end(), ff.bins_ptr(), + join); + return; } - auto jsel = transformers::JoinGenomicCoords(sel.begin(), sel.end(), f.bins_ptr()); - print_pixels(jsel.begin(), jsel.end()); + const auto& ff = f.get(); + const auto sel = ff.fetch(range1, range2, balancing::Method{normalization}); + dump_pixels(sel.template begin(), sel.template end(), ff.bins_ptr(), join); } -static void dump_pixels(hic::File& f, std::string_view range1, std::string_view range2, +static void dump_pixels(File& f, std::string_view range1, std::string_view range2, std::string_view normalization, bool join, bool sorted) { auto norm = balancing::Method{std::string{normalization}}; if (range1 == "all") { assert(range2 == "all"); - f.optimize_cache_size_for_iteration(); - auto sel = f.fetch(norm); - if (!join) { - return print_pixels(sel.template begin(sorted), sel.template end()); - } - auto jsel = - transformers::JoinGenomicCoords(sel.begin(sorted), sel.end(), f.bins_ptr()); - return print_pixels(jsel.begin(), jsel.end()); + return dump_pixels_gw(f, normalization, join, sorted); } - auto sel = f.fetch(range1, range2, norm); - if (!join) { - return print_pixels(sel.template begin(sorted), sel.template end()); - } - - auto jsel = - transformers::JoinGenomicCoords(sel.begin(sorted), sel.end(), f.bins_ptr()); - print_pixels(jsel.begin(), jsel.end()); + return dump_pixels_chrom_chrom(f, range1, range2, normalization, join, sorted); } -static void dump_pixels_trans_only_sorted(const cooler::File& f, std::string_view normalization, - bool join) { - auto weights = f.read_weights(balancing::Method{normalization}); - - using It = decltype(f.fetch("chr1", "chr2").template begin()); - - const auto chroms = f.chromosomes(); - for (std::uint32_t i = 0; i < chroms.size(); ++i) { - std::vector heads{}; - std::vector tails{}; - for (std::uint32_t j = i + 1; j < chroms.size(); ++j) { - const auto& chrom1 = chroms[i]; - const auto& chrom2 = chroms[j]; - - if (chrom1.is_all() || chrom2.is_all()) { - continue; - } - - auto sel = f.fetch(chrom1.name(), chrom2.name(), weights); - heads.emplace_back(sel.begin()); - tails.emplace_back(sel.end()); - } - - internal::PixelMerger merger(std::move(heads), std::move(tails)); - - if (!join) { - print_pixels(merger.begin(), merger.end()); +static void process_query_cis_only(File& f, std::string_view normalization, bool join, + bool sorted) { + for (const auto& chrom : f.chromosomes()) { + if (chrom.is_all()) { continue; } - - auto jsel = transformers::JoinGenomicCoords(merger.begin(), merger.end(), f.bins_ptr()); - print_pixels(jsel.begin(), jsel.end()); + dump_pixels(f, chrom.name(), chrom.name(), normalization, join, sorted); } } -static void dump_pixels_trans_only_sorted(hic::File& f, std::string_view normalization, bool join) { +static void dump_pixels_trans_only_sorted(File& f, std::string_view normalization, bool join) { auto norm = balancing::Method{std::string{normalization}}; - std::vector selectors; - - for (std::uint32_t chrom1_id = 0; chrom1_id < f.chromosomes().size(); ++chrom1_id) { - const auto& chrom1 = f.chromosomes().at(chrom1_id); - if (chrom1.is_all()) { - continue; - } - for (std::uint32_t chrom2_id = chrom1_id + 1; chrom2_id < f.chromosomes().size(); ++chrom2_id) { - const auto& chrom2 = f.chromosomes().at(chrom2_id); - if (chrom2.is_all()) { - continue; - } - try { - auto sel = f.fetch(chrom1.name(), chrom2.name(), norm); - if (!sel.empty()) { - selectors.emplace_back(std::move(sel)); - } - } catch (const std::exception& e) { - const std::string_view msg{e.what()}; - const auto missing_norm = msg.find("unable to find") != std::string_view::npos && - msg.find("normalization vector") != std::string_view::npos; - if (!missing_norm) { - throw; - } - } - } - } - - if (selectors.empty()) { - throw std::runtime_error( - fmt::format(FMT_STRING("unable to find {} normalization vectors at {} ({})"), - norm.to_string(), f.resolution(), hic::MatrixUnit::BP)); - } - - hic::PixelSelectorAll sel(std::move(selectors)); - - if (!join) { - print_pixels(sel.template begin(), sel.template end()); - } - - auto jsel = transformers::JoinGenomicCoords(sel.template begin(), - sel.template end(), f.bins_ptr()); - print_pixels(jsel.begin(), jsel.end()); -} - -static void process_query(File& f, std::string_view table, std::string_view range1, - std::string_view range2, std::string_view normalization, bool join, - bool sorted) { - if (table == "bins") { - return dump_bins(f, range1); - } - - assert(table == "pixels"); - std::visit([&](auto& ff) { return dump_pixels(ff, range1, range2, normalization, join, sorted); }, - f.get()); -} - -static void process_query_cis_only(File& f, std::string_view normalization, bool join, - bool sorted) { std::visit( - [&](auto& ff) { - for (const auto& chrom : ff.chromosomes()) { - if (chrom.is_all()) { - continue; - } - dump_pixels(ff, chrom.name(), chrom.name(), normalization, join, sorted); - } - }, - f.get()); -} + [&](const auto& ff) { + using It = decltype(ff.fetch("chr1", "chr2").template begin()); + std::vector heads{}; + std::vector tails{}; -static void process_query_trans_only_unsorted(File& f, std::string_view normalization, bool join) { - std::visit( - [&](auto& ff) { - const auto& chromosomes = ff.chromosomes(); - for (std::uint32_t chrom1_id = 0; chrom1_id < chromosomes.size(); ++chrom1_id) { - const auto& chrom1 = chromosomes[chrom1_id]; + for (std::uint32_t chrom1_id = 0; chrom1_id < ff.chromosomes().size(); ++chrom1_id) { + const auto& chrom1 = ff.chromosomes().at(chrom1_id); if (chrom1.is_all()) { continue; } - for (std::uint32_t chrom2_id = chrom1_id + 1; chrom2_id < chromosomes.size(); + for (std::uint32_t chrom2_id = chrom1_id + 1; chrom2_id < ff.chromosomes().size(); ++chrom2_id) { - const auto& chrom2 = chromosomes[chrom2_id]; + const auto& chrom2 = ff.chromosomes().at(chrom2_id); if (chrom2.is_all()) { continue; } - - dump_pixels(ff, chrom1.name(), chrom2.name(), normalization, join, false); + try { + const auto sel = ff.fetch(chrom1.name(), chrom2.name(), norm); + heads.emplace_back(sel.template begin()); + tails.emplace_back(sel.template end()); + } catch (const std::exception& e) { + const std::string_view msg{e.what()}; + const auto missing_norm = msg.find("unable to find") != std::string_view::npos && + msg.find("normalization vector") != std::string_view::npos; + if (!missing_norm) { + throw; + } + } } } - }, - f.get()); -} -static void process_query_trans_only_sorted(File& f, std::string_view normalization, bool join) { - std::visit([&](auto& ff) { dump_pixels_trans_only_sorted(ff, normalization, join); }, f.get()); -} - -static void process_query_trans_only(File& f, std::string_view normalization, bool join, - bool sorted) { - if (sorted) { - return process_query_trans_only_sorted(f, normalization, join); - } - return process_query_trans_only_unsorted(f, normalization, join); -} + if (heads.empty()) { + throw std::runtime_error( + fmt::format(FMT_STRING("unable to find {} normalization vectors at {} ({})"), + norm.to_string(), ff.bin_size(), hic::MatrixUnit::BP)); + } -static int dump_chroms(std::string_view uri, std::string_view format, std::uint32_t resolution) { - Reference ref{}; + transformers::PixelMerger merger(heads, tails); - if (format == "mcool") { - ref = cooler::MultiResFile{std::string{uri}}.chromosomes(); - } else if (format == "scool") { - ref = cooler::SingleCellFile{std::string{uri}}.chromosomes(); - } else { - ref = File{std::string{uri}, resolution}.chromosomes(); - } + if (!join) { + print_pixels(merger.begin(), merger.end()); + } - for (const Chromosome& chrom : ref) { - if (!chrom.is_all()) { - fmt::print(FMT_COMPILE("{:s}\t{:d}\n"), chrom.name(), chrom.size()); - } - } - return 0; + auto jsel = transformers::JoinGenomicCoords(merger.begin(), merger.end(), f.bins_ptr()); + print_pixels(jsel.begin(), jsel.end()); + }, + f.get()); } -static phmap::btree_set get_normalizations(std::string_view uri, - std::string_view format, - std::uint32_t resolution) { - assert(format != "mcool"); - assert(format != "hic" || resolution != 0); - if (format == "scool") { - const auto cell_ids = cooler::SingleCellFile{uri}.cells(); - if (cell_ids.empty()) { - return {}; - } - - const auto scool_uri = fmt::format(FMT_STRING("{}::/cells/{}"), uri, *cell_ids.begin()); - return get_normalizations(scool_uri, "cool", 0); - } - - phmap::btree_set norms{}; - if (uri == "hic" && resolution == 0) { - const hic::File hf{std::string{uri}, resolution}; - - for (const auto& norm : hf.avail_normalizations()) { - norms.emplace(std::string{norm.to_string()}); +static void dump_pixels_trans_only_unsorted(File& f, std::string_view normalization, bool join) { + const auto& chromosomes = f.chromosomes(); + for (std::uint32_t chrom1_id = 0; chrom1_id < chromosomes.size(); ++chrom1_id) { + const auto& chrom1 = chromosomes[chrom1_id]; + if (chrom1.is_all()) { + continue; } - return norms; - } - - const auto norms_ = File{std::string{uri}, resolution}.avail_normalizations(); - std::transform(norms_.begin(), norms_.end(), std::inserter(norms, norms.begin()), - [](const auto& n) { return std::string{n.to_string()}; }); - - return norms; -} + for (std::uint32_t chrom2_id = chrom1_id + 1; chrom2_id < chromosomes.size(); ++chrom2_id) { + const auto& chrom2 = chromosomes[chrom2_id]; + if (chrom2.is_all()) { + continue; + } -static int dump_normalizations(std::string_view uri, std::string_view format, - std::uint32_t resolution) { - phmap::btree_set norms{}; - std::vector resolutions{}; - if (format == "mcool") { - resolutions = cooler::MultiResFile{uri}.resolutions(); - if (resolutions.empty()) { - return 0; - } - } else if (format == "hic" && resolution == 0) { - resolutions = hic::utils::list_resolutions(std::string{uri}); - if (resolutions.empty()) { - return 0; + dump_pixels(f, chrom1.name(), chrom2.name(), normalization, join, false); } } - - if (resolutions.empty()) { - norms = get_normalizations(uri, format, resolution); - } else { - format = format == "hic" ? "hic" : "cool"; - std::for_each(resolutions.begin(), resolutions.end(), - [&](const auto res) { norms.merge(get_normalizations(uri, format, res)); }); - } - - if (!norms.empty()) { - fmt::print(FMT_STRING("{}\n"), fmt::join(norms, "\n")); - } - return 0; } -static int dump_resolutions(std::string_view uri, std::string_view format, - std::uint32_t resolution) { - std::vector resolutions{}; - - if (format == "hic") { - resolutions = hic::utils::list_resolutions(uri); - if (resolution != 0) { - const auto res_found = - std::find(resolutions.begin(), resolutions.end(), resolution) != resolutions.end(); - resolutions.clear(); - if (res_found) { - resolutions.push_back(resolution); - } - } - } else if (format == "mcool") { - resolutions = cooler::MultiResFile{uri}.resolutions(); - } else if (format == "scool") { - resolutions.push_back(cooler::SingleCellFile{uri}.bin_size()); - } else { - assert(format == "cool"); - resolutions.push_back(cooler::File{uri}.bin_size()); +static void process_query(File& f, std::string_view table, std::string_view range1, + std::string_view range2, std::string_view normalization, bool join, + bool sorted) { + if (table == "bins") { + return dump_bins(f, range1); } - if (!resolutions.empty()) { - fmt::print(FMT_STRING("{}\n"), fmt::join(resolutions, "\n")); - } - return 0; + assert(table == "pixels"); + dump_pixels(f, range1, range2, normalization, join, sorted); } -static int dump_cells(std::string_view uri, std::string_view format) { - if (format != "scool") { - throw std::runtime_error(fmt::format(FMT_STRING("\"{}\" is not a .scool file"), uri)); - } - const auto cells = cooler::SingleCellFile{uri}.cells(); - if (!cells.empty()) { - fmt::print(FMT_STRING("{}\n"), fmt::join(cells, "\n")); +static void process_query_trans_only(File& f, std::string_view normalization, bool join, + bool sorted) { + if (sorted) { + dump_pixels_trans_only_sorted(f, normalization, join); + return; } - return 0; + dump_pixels_trans_only_unsorted(f, normalization, join); } -static int dump_tables(const DumpConfig& c) { +static void dump_tables(const DumpConfig& c) { hictk::File f{c.uri, c.resolution, c.matrix_type, c.matrix_unit}; if (c.query_file.empty() && !c.cis_only && !c.trans_only) { process_query(f, c.table, c.range1, c.range2, c.normalization, c.join, c.sorted); - return 0; + return; } if (c.cis_only) { assert(c.table == "pixels"); process_query_cis_only(f, c.normalization, c.join, c.sorted); - return 0; + return; } if (c.trans_only) { assert(c.table == "pixels"); process_query_trans_only(f, c.normalization, c.join, c.sorted); - return 0; + return; } const auto read_from_stdin = c.query_file == "-"; @@ -407,29 +203,32 @@ static int dump_tables(const DumpConfig& c) { const auto [range1, range2] = parse_bedpe(line); process_query(f, c.table, range1, range2, c.normalization, c.join, c.sorted); } - - return 0; } int dump_subcmd(const DumpConfig& c) { if (c.table == "bins" || c.table == "pixels") { - return dump_tables(c); + dump_tables(c); + return 0; } if (c.table == "chroms") { - return dump_chroms(c.uri, c.format, c.resolution); + dump_chroms(c.uri, c.format, c.resolution); + return 0; } if (c.table == "resolutions") { - return dump_resolutions(c.uri, c.format, c.resolution); + dump_resolutions(c.uri, c.format, c.resolution); + return 0; } if (c.table == "normalizations") { - return dump_normalizations(c.uri, c.format, c.resolution); + dump_normalizations(c.uri, c.format, c.resolution); + return 0; } assert(c.table == "cells"); - return dump_cells(c.uri, c.format); + dump_cells(c.uri, c.format); + return 0; } } // namespace hictk::tools diff --git a/src/hictk/dump/dump.hpp b/src/hictk/dump/dump.hpp new file mode 100644 index 00000000..1ca191f3 --- /dev/null +++ b/src/hictk/dump/dump.hpp @@ -0,0 +1,34 @@ +// Copyright (C) 2023 Roberto Rossini +// +// SPDX-License-Identifier: MIT + +#pragma once + +#include +#include +#include +#include +#include + +#include "hictk/file.hpp" +#include "hictk/pixel.hpp" + +namespace hictk::tools { + +void print(const Pixel& pixel); +void print(const ThinPixel& pixel); + +void dump_bins(const File& f, std::string_view range); +void dump_cells(std::string_view uri, std::string_view format); +void dump_chroms(std::string_view uri, std::string_view format, std::uint32_t resolution); +void dump_normalizations(std::string_view uri, std::string_view format, std::uint32_t resolution); +void dump_resolutions(std::string_view uri, std::string_view format, std::uint32_t resolution); + +[[nodiscard]] std::pair parse_bedpe(std::string_view line); + +template +inline void print_pixels(PixelIt first, PixelIt last) { + std::for_each(first, last, [&](const auto& pixel) { print(pixel); }); +} + +} // namespace hictk::tools diff --git a/src/hictk/dump/dump_common.cpp b/src/hictk/dump/dump_common.cpp new file mode 100644 index 00000000..bafa1f30 --- /dev/null +++ b/src/hictk/dump/dump_common.cpp @@ -0,0 +1,176 @@ +// Copyright (C) 2023 Roberto Rossini +// +// SPDX-License-Identifier: MIT + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "./dump.hpp" +#include "hictk/cooler.hpp" +#include "hictk/file.hpp" +#include "hictk/hic.hpp" +#include "hictk/pixel.hpp" + +namespace hictk::tools { +void print(const Pixel& pixel) { + fmt::print(FMT_COMPILE("{:bg2}\t{:.16g}\n"), pixel.coords, pixel.count); +} +void print(const ThinPixel& pixel) { + fmt::print(FMT_COMPILE("{:d}\t{:d}\t{:.16g}\n"), pixel.bin1_id, pixel.bin2_id, pixel.count); +} + +void dump_bins(const File& f, std::string_view range) { + if (range == "all") { + for (const auto& bin : f.bins()) { + fmt::print(FMT_COMPILE("{:s}\t{:d}\t{:d}\n"), bin.chrom().name(), bin.start(), bin.end()); + } + return; + } + + const auto coords = GenomicInterval::parse_ucsc(f.chromosomes(), std::string{range}); + auto [first_bin, last_bin] = f.bins().find_overlap(coords); + std::for_each(first_bin, last_bin, [](const Bin& bin) { + fmt::print(FMT_COMPILE("{:s}\t{:d}\t{:d}\n"), bin.chrom().name(), bin.start(), bin.end()); + }); +} + +void dump_cells(std::string_view uri, std::string_view format) { + if (format != "scool") { + throw std::runtime_error(fmt::format(FMT_STRING("\"{}\" is not a .scool file"), uri)); + } + const auto cells = cooler::SingleCellFile{uri}.cells(); + std::for_each(cells.begin(), cells.end(), + [&](const auto& cell) { fmt::print(FMT_COMPILE("{}\n"), cell); }); +} + +void dump_chroms(std::string_view uri, std::string_view format, std::uint32_t resolution) { + Reference ref{}; + + if (format == "mcool") { + ref = cooler::MultiResFile{std::string{uri}}.chromosomes(); + } else if (format == "scool") { + ref = cooler::SingleCellFile{std::string{uri}}.chromosomes(); + } else { + ref = File{std::string{uri}, resolution}.chromosomes(); + } + + for (const Chromosome& chrom : ref) { + if (!chrom.is_all()) { + fmt::print(FMT_COMPILE("{:s}\t{:d}\n"), chrom.name(), chrom.size()); + } + } +} + +static phmap::btree_set get_normalizations(std::string_view uri, + std::string_view format, + std::uint32_t resolution) { + assert(format != "mcool"); + assert(format != "hic" || resolution != 0); + if (format == "scool") { + const auto cell_ids = cooler::SingleCellFile{uri}.cells(); + if (cell_ids.empty()) { + return {}; + } + + const auto scool_uri = fmt::format(FMT_STRING("{}::/cells/{}"), uri, *cell_ids.begin()); + return get_normalizations(scool_uri, "cool", 0); + } + + phmap::btree_set norms{}; + if (uri == "hic" && resolution == 0) { + const hic::File hf{std::string{uri}, resolution}; + + for (const auto& norm : hf.avail_normalizations()) { + norms.emplace(std::string{norm.to_string()}); + } + return norms; + } + + const auto norms_ = File{std::string{uri}, resolution}.avail_normalizations(); + std::transform(norms_.begin(), norms_.end(), std::inserter(norms, norms.begin()), + [](const auto& n) { return std::string{n.to_string()}; }); + + return norms; +} + +void dump_normalizations(std::string_view uri, std::string_view format, std::uint32_t resolution) { + phmap::btree_set norms{}; + std::vector resolutions{}; + if (format == "mcool") { + resolutions = cooler::MultiResFile{uri}.resolutions(); + if (resolutions.empty()) { + return; + } + } else if (format == "hic" && resolution == 0) { + resolutions = hic::utils::list_resolutions(std::string{uri}); + if (resolutions.empty()) { + return; + } + } + + if (resolutions.empty()) { + norms = get_normalizations(uri, format, resolution); + } else { + format = format == "hic" ? "hic" : "cool"; + std::for_each(resolutions.begin(), resolutions.end(), + [&](const auto res) { norms.merge(get_normalizations(uri, format, res)); }); + } + + if (!norms.empty()) { + fmt::print(FMT_STRING("{}\n"), fmt::join(norms, "\n")); + } +} + +void dump_resolutions(std::string_view uri, std::string_view format, std::uint32_t resolution) { + std::vector resolutions{}; + + if (format == "hic") { + resolutions = hic::utils::list_resolutions(uri); + if (resolution != 0) { + const auto res_found = + std::find(resolutions.begin(), resolutions.end(), resolution) != resolutions.end(); + resolutions.clear(); + if (res_found) { + resolutions.push_back(resolution); + } + } + } else if (format == "mcool") { + resolutions = cooler::MultiResFile{uri}.resolutions(); + } else if (format == "scool") { + resolutions.push_back(cooler::SingleCellFile{uri}.bin_size()); + } else { + assert(format == "cool"); + resolutions.push_back(cooler::File{uri}.bin_size()); + } + + if (!resolutions.empty()) { + fmt::print(FMT_STRING("{}\n"), fmt::join(resolutions, "\n")); + } +} + +std::pair parse_bedpe(std::string_view line) { + auto next_token = [&]() { + assert(!line.empty()); + const auto pos1 = line.find('\t'); + const auto pos2 = line.find('\t', pos1 + 1); + const auto pos3 = line.find('\t', pos2 + 1); + + auto tok = std::string{line.substr(0, pos3)}; + tok[pos1] = ':'; + tok[pos2] = '-'; + line.remove_prefix(pos3 + 1); + return tok; + }; + const auto range1 = next_token(); + const auto range2 = next_token(); + + return std::make_pair(range1, range2); +} +} // namespace hictk::tools diff --git a/src/hictk/dump/dump_multiple.cpp b/src/hictk/dump/dump_multiple.cpp new file mode 100644 index 00000000..5b5e08cc --- /dev/null +++ b/src/hictk/dump/dump_multiple.cpp @@ -0,0 +1,3 @@ +// +// Created by roby on 12/1/23. +// diff --git a/src/hictk/dump/dump_single.cpp b/src/hictk/dump/dump_single.cpp new file mode 100644 index 00000000..5b5e08cc --- /dev/null +++ b/src/hictk/dump/dump_single.cpp @@ -0,0 +1,3 @@ +// +// Created by roby on 12/1/23. +//