Skip to content

Commit

Permalink
Update hictk dump to support dumping resolutions, cells and normaliza…
Browse files Browse the repository at this point in the history
…tions
  • Loading branch information
robomics committed Sep 17, 2023
1 parent d4eb1bc commit 46af0f7
Show file tree
Hide file tree
Showing 12 changed files with 234 additions and 50 deletions.
50 changes: 29 additions & 21 deletions src/hictk/cli/cli_dump.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,10 @@ void Cli::make_dump_subcommand() {
"uri",
c.uri,
"Path to a .hic, .cool or .mcool file (Cooler URI syntax supported).")
->check(IsValidHiCFile | IsValidCoolerFile)
->check(IsValidHiCFile |
IsValidCoolerFile |
IsValidMultiresCoolerFile |
IsValidSingleCellCoolerFile)
->required();

sc.add_option(
Expand All @@ -57,7 +60,8 @@ void Cli::make_dump_subcommand() {
"-t,--table",
c.table,
"Name of the table to dump.\n")
->check(CLI::IsMember({"chroms", "bins", "pixels"}))
->check(CLI::IsMember({"chroms", "bins", "pixels", "normalizations",
"resolutions", "cells"}))
->capture_default_str();

sc.add_option(
Expand Down Expand Up @@ -106,6 +110,7 @@ void Cli::make_dump_subcommand() {

// clang-format on

sc.get_option("--range2")->needs(sc.get_option("--range"));
sc.get_option("--query-file")->excludes(sc.get_option("--range"));
sc.get_option("--query-file")->excludes(sc.get_option("--range2"));

Expand All @@ -119,38 +124,40 @@ void Cli::validate_dump_subcommand() const {
std::vector<std::string> errors;
const auto& c = std::get<DumpConfig>(_config);

if (!errors.empty()) {
throw std::runtime_error(
fmt::format(FMT_STRING("the following error(s) where encountered while validating CLI "
"arguments and input file(s):\n - {}"),
fmt::join(errors, "\n - ")));
}
const auto& subcmd = *_cli.get_subcommand("dump");

const auto is_hic = hic::utils::is_hic_file(c.uri);
const auto is_cooler = cooler::utils::is_cooler(c.uri);
const auto is_mcooler = cooler::utils::is_multires_file(c.uri);
const auto is_scool = cooler::utils::is_scool_file(c.uri);

if (is_hic && c.resolution == 0 && c.table != "chroms") {
errors.emplace_back("--resolution is mandatory when file is in .hic format.");
if ((is_hic || is_mcooler) && c.resolution == 0 && (c.table == "pixels" || c.table == "bins")) {
errors.emplace_back("--resolution is mandatory when file is in .hic or .mcool format.");
}

const auto resolution_parsed = !_cli.get_subcommand("dump")->get_option("--resolution")->empty();
const auto resolution_parsed = !subcmd.get_option("--resolution")->empty();

if ((is_cooler || is_mcooler) && resolution_parsed) {
warnings.emplace_back("--resolution is ignored when file is in .cool or .mcool format.");
if ((is_cooler || is_scool) && resolution_parsed) {
warnings.emplace_back("--resolution is ignored when file is in .[s]cool format.");
}

const auto weight_type_parsed =
!_cli.get_subcommand("dump")->get_option("--weight-type")->empty();
const auto weight_type_parsed = !subcmd.get_option("--weight-type")->empty();

if (is_hic && weight_type_parsed) {
warnings.emplace_back("--weight-type is ignored when file is in .hic format.");
}

const auto matrix_type_parsed =
!_cli.get_subcommand("dump")->get_option("--matrix-type")->empty();
const auto matrix_unit_parsed =
!_cli.get_subcommand("dump")->get_option("--matrix-unit")->empty();
const auto range_parsed = !subcmd.get_option("--range")->empty();
if (range_parsed && c.table != "bins" && c.table != "pixels") {
warnings.emplace_back("--range and --range2 are ignore when --table is not bins or pixels");
}
const auto query_file_parsed = !subcmd.get_option("--query-file")->empty();
if (query_file_parsed && c.table != "bins" && c.table != "pixels") {
warnings.emplace_back("--query-file is ignored when --table is not bins or pixels");
}

const auto matrix_type_parsed = !subcmd.get_option("--matrix-type")->empty();
const auto matrix_unit_parsed = !subcmd.get_option("--matrix-unit")->empty();

if (!is_hic && (matrix_type_parsed || matrix_unit_parsed)) {
warnings.emplace_back(
Expand Down Expand Up @@ -181,9 +188,10 @@ void Cli::transform_args_dump_subcommand() {
c.verbosity = static_cast<std::uint8_t>(spdlog::level::critical) - c.verbosity;

c.format = infer_input_format(c.uri);
if (c.format == "hic" && c.resolution == 0) {
assert(c.table == "chroms");
if (c.format == "hic" && c.resolution == 0 && c.table == "chroms") {
c.resolution = hic::utils::list_resolutions(c.uri).back();
} else if (c.format == "mcool" && c.resolution == 0 && c.table == "chroms") {
c.resolution = cooler::utils::list_resolutions(c.uri).back();
}

if (_cli.get_subcommand("dump")->get_option("--range2")->empty()) {
Expand Down
4 changes: 2 additions & 2 deletions src/hictk/cli/cli_validate.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// Copyright (C) 2023 Roberto Rossini <roberros@uio.no>
//
// Created by roby on 7/13/23.
//
// SPDX-License-Identifier: MIT

#include <fmt/format.h>
#include <fmt/std.h>
Expand Down
166 changes: 145 additions & 21 deletions src/hictk/dump/dump.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <variant>

#include "hictk/balancing/methods.hpp"
#include "hictk/cooler.hpp"
#include "hictk/file.hpp"
#include "hictk/tools/config.hpp"
#include "hictk/transformers.hpp"
Expand All @@ -18,23 +19,6 @@ static void print(const ThinPixel<double>& pixel) {
fmt::print(FMT_COMPILE("{:d}\t{:d}\t{:.16g}\n"), pixel.bin1_id, pixel.bin2_id, pixel.count);
}

static void dump_chroms(const File& f, std::string_view range) {
if (range == "all") {
for (const Chromosome& chrom : f.chromosomes()) {
if (!chrom.is_all()) {
fmt::print(FMT_COMPILE("{:s}\t{:d}\n"), chrom.name(), chrom.size());
}
}
return;
}

const auto coords = GenomicInterval::parse_ucsc(f.chromosomes(), std::string{range});
auto it = f.chromosomes().find(coords.chrom());
if (it != f.chromosomes().end()) {
fmt::print(FMT_COMPILE("{:s}\t{:d}\n"), it->name(), it->size());
}
}

template <typename File>
static void dump_bins(const File& f, std::string_view range) {
if (range == "all") {
Expand Down Expand Up @@ -127,9 +111,6 @@ static void dump_pixels(hic::File& f, std::string_view range1, std::string_view
static void process_query(File& f, std::string_view table, std::string_view range1,
std::string_view range2, std::string_view normalization, bool join,
bool sorted) {
if (table == "chroms") {
return dump_chroms(f, range1);
}
if (table == "bins") {
return dump_bins(f, range1);
}
Expand All @@ -139,7 +120,128 @@ static void process_query(File& f, std::string_view table, std::string_view rang
f.get());
}

int dump_subcmd(const DumpConfig& c) {
static int dump_chroms(std::string_view uri, std::string_view format, std::uint32_t resolution) {
Reference ref{};

if (format == "mcool") {
ref = cooler::MultiResFile{std::string{uri}}.chromosomes();
} else if (format == "scool") {
ref = cooler::SingleCellFile{std::string{uri}}.chromosomes();
} else {
ref = File{std::string{uri}, resolution}.chromosomes();
}

for (const Chromosome& chrom : ref) {
if (!chrom.is_all()) {
fmt::print(FMT_COMPILE("{:s}\t{:d}\n"), chrom.name(), chrom.size());
}
}
return 0;
}

static phmap::btree_set<std::string> get_normalizations(std::string_view uri,
std::string_view format,
std::uint32_t resolution) {
assert(format != "mcool");
assert(format != "hic" || resolution != 0);
if (format == "scool") {
const auto cell_ids = cooler::SingleCellFile{uri}.cells();
if (cell_ids.empty()) {
return {};
}

const auto scool_uri = fmt::format(FMT_STRING("{}::/cells/{}"), uri, *cell_ids.begin());
return get_normalizations(scool_uri, "cool", 0);
}

phmap::btree_set<std::string> norms{};
if (uri == "hic" && resolution == 0) {
const hic::File hf{std::string{uri}, resolution};

for (const auto& norm : hf.avail_normalizations()) {
norms.emplace(std::string{norm.to_string()});
}
return norms;
}

const auto norms_ = File{std::string{uri}, resolution}.avail_normalizations();
std::transform(norms_.begin(), norms_.end(), std::inserter(norms, norms.begin()),
[](const auto& n) { return std::string{n.to_string()}; });

return norms;
}

static int dump_normalizations(std::string_view uri, std::string_view format,
std::uint32_t resolution) {
phmap::btree_set<std::string> norms{};
std::vector<std::uint32_t> resolutions{};
if (format == "mcool") {
resolutions = cooler::MultiResFile{uri}.resolutions();
if (resolutions.empty()) {
return 0;
}
} else if (format == "hic" && resolution == 0) {
resolutions = hic::utils::list_resolutions(std::string{uri});
if (resolutions.empty()) {
return 0;
}
}

if (resolutions.empty()) {
norms = get_normalizations(uri, format, resolution);
} else {
format = format == "hic" ? "hic" : "cool";
std::for_each(resolutions.begin(), resolutions.end(),
[&](const auto res) { norms.merge(get_normalizations(uri, format, res)); });
}

if (!norms.empty()) {
fmt::print(FMT_STRING("{}\n"), fmt::join(norms, "\n"));
}
return 0;
}

static int dump_resolutions(std::string_view uri, std::string_view format,
std::uint32_t resolution) {
std::vector<std::uint32_t> resolutions{};

if (format == "hic") {
resolutions = hic::utils::list_resolutions(uri);
if (resolution != 0) {
const auto res_found =
std::find(resolutions.begin(), resolutions.end(), resolution) != resolutions.end();
resolutions.clear();
if (res_found) {
resolutions.push_back(resolution);
}
}
} else if (format == "mcool") {
resolutions = cooler::MultiResFile{uri}.resolutions();
} else if (format == "scool") {
resolutions.push_back(cooler::SingleCellFile{uri}.bin_size());
} else {
assert(format == "cool");
resolutions.push_back(cooler::File{uri}.bin_size());
}

if (!resolutions.empty()) {
fmt::print(FMT_STRING("{}\n"), fmt::join(resolutions, "\n"));
}
return 0;
}

static int dump_cells(std::string_view uri, std::string_view format) {
if (format != "scool") {
throw std::runtime_error(fmt::format(FMT_STRING("\"{}\" is not a .scool file"), uri));
}
const auto cells = cooler::SingleCellFile{uri}.cells();
if (!cells.empty()) {
fmt::print(FMT_STRING("{}\n"), fmt::join(cells, "\n"));
}
return 0;
}

static int dump_tables(const DumpConfig& c) {
hictk::File f{c.uri, c.resolution, c.matrix_type, c.matrix_unit};

if (c.query_file.empty()) {
Expand All @@ -164,4 +266,26 @@ int dump_subcmd(const DumpConfig& c) {

return 0;
}

int dump_subcmd(const DumpConfig& c) {
if (c.table == "bins" || c.table == "pixels") {
return dump_tables(c);
}

if (c.table == "chroms") {
return dump_chroms(c.uri, c.format, c.resolution);
}

if (c.table == "resolutions") {
return dump_resolutions(c.uri, c.format, c.resolution);
}

if (c.table == "normalizations") {
return dump_normalizations(c.uri, c.format, c.resolution);
}

assert(c.table == "cells");

return dump_cells(c.uri, c.format);
}
} // namespace hictk::tools
33 changes: 30 additions & 3 deletions src/hictk/include/hictk/tools/cli.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <string_view>

#include "config.hpp"
#include "hictk/cooler.hpp"
#include "hictk/cooler/utils.hpp"
#include "hictk/hic/utils.hpp"

Expand All @@ -23,6 +24,9 @@ class CoolerFileValidator : public CLI::Validator {
if (hictk::cooler::utils::is_multires_file(uri)) {
return "URI points to a .mcool file: " + uri;
}
if (hictk::cooler::utils::is_scool_file(uri)) {
return "URI points to a .scool file: " + uri;
}
const auto path = cooler::parse_cooler_uri(uri).file_path;
if (!std::filesystem::exists(path)) {
return "No such file: " + path;
Expand Down Expand Up @@ -50,6 +54,22 @@ class MultiresCoolerFileValidator : public CLI::Validator {
}
};

class SingleCellCoolerFileValidator : public CLI::Validator {
public:
inline SingleCellCoolerFileValidator() : Validator("Single-cell-cooler") {
func_ = [](std::string& uri) -> std::string {
const auto path = cooler::parse_cooler_uri(uri).file_path;
if (!std::filesystem::exists(path)) {
return "No such file: " + path;
}
if (!hictk::cooler::utils::is_scool_file(uri)) {
return "Not a valid single-cell cooler: " + uri;
}
return "";
};
}
};

class HiCFileValidator : public CLI::Validator {
public:
inline HiCFileValidator() : Validator("HiC") {
Expand Down Expand Up @@ -145,9 +165,10 @@ class Formatter : public CLI::Formatter {
};

// clang-format off
inline const auto IsValidCoolerFile = CoolerFileValidator(); // NOLINT(cert-err58-cpp)
inline const auto IsValidMultiresCoolerFile = MultiresCoolerFileValidator(); // NOLINT(cert-err58-cpp)
inline const auto IsValidHiCFile = HiCFileValidator(); // NOLINT(cert-err58-cpp)
inline const auto IsValidCoolerFile = CoolerFileValidator(); // NOLINT(cert-err58-cpp)
inline const auto IsValidMultiresCoolerFile = MultiresCoolerFileValidator(); // NOLINT(cert-err58-cpp)
inline const auto IsValidSingleCellCoolerFile = SingleCellCoolerFileValidator(); // NOLINT(cert-err58-cpp)
inline const auto IsValidHiCFile = HiCFileValidator(); // NOLINT(cert-err58-cpp)
// clang-format on

// clang-format off
Expand Down Expand Up @@ -225,6 +246,9 @@ class Cli {
if (cooler::utils::is_multires_file(p.string())) {
return "mcool";
}
if (cooler::utils::is_scool_file(p.string())) {
return "scool";
}
assert(hic::utils::is_hic_file(p));
return "hic";
}
Expand All @@ -250,6 +274,9 @@ class Cli {
if (format == "cool") {
return {cooler::File(p.string()).bin_size()};
}
if (format == "scool") {
return {cooler::SingleCellFile{p.string()}.bin_size()};
}
if (format == "mcool") {
return cooler::utils::list_resolutions(p, true);
}
Expand Down
1 change: 1 addition & 0 deletions src/libhictk/cooler/include/hictk/cooler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@

#include "hictk/cooler/cooler.hpp"
#include "hictk/cooler/multires_cooler.hpp"
#include "hictk/cooler/singlecell_cooler.hpp"
3 changes: 2 additions & 1 deletion src/libhictk/cooler/include/hictk/cooler/cooler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,8 @@ class File {
balancing::Weights::Type type,
bool rescale = false) const;

bool has_weights(const balancing::Method &normalization) const;
[[nodiscard]] std::vector<balancing::Method> avail_normalizations() const;
[[nodiscard]] bool has_normalization(const balancing::Method &normalization) const;
std::shared_ptr<const balancing::Weights> read_weights(const balancing::Method &normalization,
bool rescale = false) const;
std::shared_ptr<const balancing::Weights> read_weights(const balancing::Method &normalization,
Expand Down
Loading

0 comments on commit 46af0f7

Please sign in to comment.