Skip to content

Commit

Permalink
Initial implementation of hictk rename-chromosomes
Browse files Browse the repository at this point in the history
  • Loading branch information
robomics committed Nov 21, 2023
1 parent 7109d59 commit e4da38b
Show file tree
Hide file tree
Showing 8 changed files with 241 additions and 0 deletions.
2 changes: 2 additions & 0 deletions src/hictk/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ target_sources(
${CMAKE_CURRENT_SOURCE_DIR}/cli/cli_fix_mcool.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cli/cli_load.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cli/cli_merge.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cli/cli_rename_chromosomes.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cli/cli_validate.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cli/cli_zoomify.cpp
${CMAKE_CURRENT_SOURCE_DIR}/balance/balance.cpp
Expand All @@ -32,6 +33,7 @@ target_sources(
${CMAKE_CURRENT_SOURCE_DIR}/fix_mcool/fix_mcool.cpp
${CMAKE_CURRENT_SOURCE_DIR}/load/load.cpp
${CMAKE_CURRENT_SOURCE_DIR}/merge/merge.cpp
${CMAKE_CURRENT_SOURCE_DIR}/rename_chromosomes/rename_chromosomes.cpp
${CMAKE_CURRENT_SOURCE_DIR}/validate/validate.cpp
${CMAKE_CURRENT_SOURCE_DIR}/zoomify/zoomify.cpp)

Expand Down
10 changes: 10 additions & 0 deletions src/hictk/cli/cli.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ auto Cli::parse_arguments() -> Config {
_subcommand = subcommand::load;
} else if (_cli.get_subcommand("merge")->parsed()) {
_subcommand = subcommand::merge;
} else if (_cli.get_subcommand("rename-chromosomes")->parsed()) {
_subcommand = subcommand::rename_chromosomes;

Check warning on line 41 in src/hictk/cli/cli.cpp

View check run for this annotation

Codecov / codecov/patch

src/hictk/cli/cli.cpp#L41

Added line #L41 was not covered by tests
} else if (_cli.get_subcommand("validate")->parsed()) {
_subcommand = subcommand::validate;
} else if (_cli.get_subcommand("zoomify")->parsed()) {
Expand Down Expand Up @@ -85,6 +87,8 @@ std::string_view Cli::subcommand_to_str(subcommand s) noexcept {
return "load";
case merge:
return "merge";
case rename_chromosomes:
return "rename-chromosomes";

Check warning on line 91 in src/hictk/cli/cli.cpp

View check run for this annotation

Codecov / codecov/patch

src/hictk/cli/cli.cpp#L90-L91

Added lines #L90 - L91 were not covered by tests
case validate:
return "validate";
case zoomify:
Expand All @@ -107,6 +111,7 @@ void Cli::make_cli() {
make_fix_mcool_subcommand();
make_load_subcommand();
make_merge_subcommand();
make_rename_chromosomes_subcommand();
make_validate_subcommand();
make_zoomify_subcommand();
}
Expand All @@ -131,6 +136,9 @@ void Cli::validate_args() const {
case merge:
validate_merge_subcommand();
break;
case rename_chromosomes:
validate_rename_chromosomes_subcommand();
break;

Check warning on line 141 in src/hictk/cli/cli.cpp

View check run for this annotation

Codecov / codecov/patch

src/hictk/cli/cli.cpp#L139-L141

Added lines #L139 - L141 were not covered by tests
case validate:
break;
case zoomify:
Expand Down Expand Up @@ -161,6 +169,8 @@ void Cli::transform_args() {
case merge:
transform_args_merge_subcommand();
break;
case rename_chromosomes:
break;

Check warning on line 173 in src/hictk/cli/cli.cpp

View check run for this annotation

Codecov / codecov/patch

src/hictk/cli/cli.cpp#L172-L173

Added lines #L172 - L173 were not covered by tests
case validate:
break;
case zoomify:
Expand Down
99 changes: 99 additions & 0 deletions src/hictk/cli/cli_rename_chromosomes.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
// Copyright (C) 2023 Roberto Rossini <roberros@uio.no>
//
// SPDX-License-Identifier: MIT

#include <fmt/std.h>

#include <CLI/CLI.hpp>
#include <cassert>
#include <string>

#include "hictk/tools/cli.hpp"
#include "hictk/tools/config.hpp"

namespace hictk::tools {
void Cli::make_rename_chromosomes_subcommand() {
auto& sc =
*_cli.add_subcommand("rename-chromosomes", "Rename chromosomes found in a Cooler file.")
->fallthrough()
->preparse_callback([this]([[maybe_unused]] std::size_t i) {
assert(_config.index() == 0);
_config = RenameChromosomesConfig{};
});

Check warning on line 22 in src/hictk/cli/cli_rename_chromosomes.cpp

View check run for this annotation

Codecov / codecov/patch

src/hictk/cli/cli_rename_chromosomes.cpp#L20-L22

Added lines #L20 - L22 were not covered by tests

_config = RenameChromosomesConfig{};
auto& c = std::get<RenameChromosomesConfig>(_config);

// clang-format off
sc.add_option(
"uri",
c.uri,
"Path to a or .[ms]cool file (Cooler URI syntax supported).")
->required();

sc.add_option(
"--name-mappings",
c.path_to_name_mappings,
"Path to a two column TSV with pairs of chromosomes to be renamed.\n"
"The first column should contain the original chromosome name,\n"
"while the second column should contain the destination name to use when renaming."
);

sc.add_flag(
"--add-chr-prefix",
c.add_chr_prefix,
"Prefix chromosome names with \"chr\".")
->capture_default_str();

sc.add_flag(
"--remove-chr-prefix",
c.remove_chr_prefix,
"Remove prefix \"chr\" from chromosome names.")
->capture_default_str();
// clang-format on

sc.get_option("--name-mappings")->excludes(sc.get_option("--add-chr-prefix"));
sc.get_option("--name-mappings")->excludes(sc.get_option("--remove-chr-prefix"));
sc.get_option("--add-chr-prefix")->excludes(sc.get_option("--remove-chr-prefix"));

_config = std::monostate{};
}

void Cli::validate_rename_chromosomes_subcommand() const {
assert(_cli.get_subcommand("rename-chromosomes")->parsed());

Check warning on line 63 in src/hictk/cli/cli_rename_chromosomes.cpp

View check run for this annotation

Codecov / codecov/patch

src/hictk/cli/cli_rename_chromosomes.cpp#L62-L63

Added lines #L62 - L63 were not covered by tests

const auto& c = std::get<RenameChromosomesConfig>(_config);

Check warning on line 65 in src/hictk/cli/cli_rename_chromosomes.cpp

View check run for this annotation

Codecov / codecov/patch

src/hictk/cli/cli_rename_chromosomes.cpp#L65

Added line #L65 was not covered by tests

std::vector<std::string> errors;

Check warning on line 67 in src/hictk/cli/cli_rename_chromosomes.cpp

View check run for this annotation

Codecov / codecov/patch

src/hictk/cli/cli_rename_chromosomes.cpp#L67

Added line #L67 was not covered by tests

if (!cooler::utils::is_cooler(c.uri) && !cooler::utils::is_multires_file(c.uri) &&
!cooler::utils::is_scool_file(c.uri)) {
errors.emplace_back(
fmt::format(FMT_STRING("File \"{}\" does not appear to be a Cooler file."), c.uri));

Check warning on line 72 in src/hictk/cli/cli_rename_chromosomes.cpp

View check run for this annotation

Codecov / codecov/patch

src/hictk/cli/cli_rename_chromosomes.cpp#L69-L72

Added lines #L69 - L72 were not covered by tests
}

const auto& sc = *_cli.get_subcommand("rename-chromosomes");
if (sc.get_option("--name-mappings")->empty() && sc.get_option("--add-chr-prefix")->empty() &&
sc.get_option("--remove-chr-prefix")->empty()) {
errors.emplace_back(

Check warning on line 78 in src/hictk/cli/cli_rename_chromosomes.cpp

View check run for this annotation

Codecov / codecov/patch

src/hictk/cli/cli_rename_chromosomes.cpp#L75-L78

Added lines #L75 - L78 were not covered by tests
"please specify exactly one of --name-mappings, --add-chr-prefix, --remove-chr-prefix");
}

if (!errors.empty()) {
throw std::runtime_error(
fmt::format(FMT_STRING("the following error(s) where encountered while validating CLI "

Check warning on line 84 in src/hictk/cli/cli_rename_chromosomes.cpp

View check run for this annotation

Codecov / codecov/patch

src/hictk/cli/cli_rename_chromosomes.cpp#L82-L84

Added lines #L82 - L84 were not covered by tests
"arguments and input file(s):\n - {}\n"),
fmt::join(errors, "\n - ")));

Check warning on line 86 in src/hictk/cli/cli_rename_chromosomes.cpp

View check run for this annotation

Codecov / codecov/patch

src/hictk/cli/cli_rename_chromosomes.cpp#L86

Added line #L86 was not covered by tests
}
}

void Cli::transform_args_rename_chromosomes_subcommand() {
assert(_cli.get_subcommand("rename-chromosomes")->parsed());
auto& c = std::get<RenameChromosomesConfig>(_config);

Check warning on line 92 in src/hictk/cli/cli_rename_chromosomes.cpp

View check run for this annotation

Codecov / codecov/patch

src/hictk/cli/cli_rename_chromosomes.cpp#L90-L92

Added lines #L90 - L92 were not covered by tests

// in spdlog, high numbers correspond to low log levels
assert(c.verbosity > 0 && c.verbosity < 5);
c.verbosity = static_cast<std::uint8_t>(spdlog::level::critical) - c.verbosity;

Check warning on line 96 in src/hictk/cli/cli_rename_chromosomes.cpp

View check run for this annotation

Codecov / codecov/patch

src/hictk/cli/cli_rename_chromosomes.cpp#L95-L96

Added lines #L95 - L96 were not covered by tests
}

} // namespace hictk::tools
4 changes: 4 additions & 0 deletions src/hictk/include/hictk/tools/cli.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ class Cli {
fix_mcool,
load,
merge,
rename_chromosomes,
validate,
zoomify,
};
Expand All @@ -224,6 +225,7 @@ class Cli {
void make_fix_mcool_subcommand();
void make_load_subcommand();
void make_merge_subcommand();
void make_rename_chromosomes_subcommand();
void make_validate_subcommand();
void make_zoomify_subcommand();
void make_cli();
Expand All @@ -234,6 +236,7 @@ class Cli {
void validate_fix_mcool_subcommand() const;
void validate_load_subcommand() const;
void validate_merge_subcommand() const;
void validate_rename_chromosomes_subcommand() const;
void validate_zoomify_subcommand() const;
void validate_args() const;

Expand All @@ -243,6 +246,7 @@ class Cli {
void transform_args_fix_mcool_subcommand();
void transform_args_load_subcommand();
void transform_args_merge_subcommand();
void transform_args_rename_chromosomes_subcommand();
void transform_args_zoomify_subcommand();
void transform_args();
};
Expand Down
9 changes: 9 additions & 0 deletions src/hictk/include/hictk/tools/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,14 @@ struct MergeConfig {
std::uint8_t verbosity{4};
};

struct RenameChromosomesConfig {
std::string uri{};
std::filesystem::path path_to_name_mappings{};
bool add_chr_prefix{false};
bool remove_chr_prefix{false};
std::uint8_t verbosity{4};
};

struct ValidateConfig {
std::string uri{};
bool validate_index{false};
Expand Down Expand Up @@ -153,6 +161,7 @@ using Config = std::variant<std::monostate,
FixMcoolConfig,
LoadConfig,
MergeConfig,
RenameChromosomesConfig,
ValidateConfig,
ZoomifyConfig>;
// clang-format on
Expand Down
1 change: 1 addition & 0 deletions src/hictk/include/hictk/tools/tools.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ namespace hictk::tools {
[[nodiscard]] int fix_mcool_subcmd(const FixMcoolConfig& c);
[[nodiscard]] int load_subcmd(const LoadConfig& c);
[[nodiscard]] int merge_subcmd(const MergeConfig& c);
[[nodiscard]] int rename_chromosomes_subcmd(const RenameChromosomesConfig& c);
[[nodiscard]] int validate_subcmd(const ValidateConfig& c);
[[nodiscard]] int zoomify_subcmd(const ZoomifyConfig& c);

Expand Down
2 changes: 2 additions & 0 deletions src/hictk/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ int main(int argc, char** argv) noexcept {
return load_subcmd(std::get<LoadConfig>(config));
case sc::merge:
return merge_subcmd(std::get<MergeConfig>(config));
case sc::rename_chromosomes:
return rename_chromosomes_subcmd(std::get<RenameChromosomesConfig>(config));

Check warning on line 115 in src/hictk/main.cpp

View check run for this annotation

Codecov / codecov/patch

src/hictk/main.cpp#L114-L115

Added lines #L114 - L115 were not covered by tests
case sc::validate:
return validate_subcmd(std::get<ValidateConfig>(config));
case sc::zoomify:
Expand Down
114 changes: 114 additions & 0 deletions src/hictk/rename_chromosomes/rename_chromosomes.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
// Copyright (C) 2023 Roberto Rossini <roberros@uio.no>
//
// SPDX-License-Identifier: MIT

#include <fmt/format.h>

#include "hictk/cooler/multires_cooler.hpp"
#include "hictk/cooler/singlecell_cooler.hpp"
#include "hictk/cooler/utils.hpp"
#include "hictk/hic/utils.hpp"
#include "hictk/tools/config.hpp"

namespace hictk::tools {

[[nodiscard]] static phmap::flat_hash_map<std::string, std::string>
generate_mappings_add_chr_prefix_prefix(std::string_view uri) {
const auto chroms = cooler::File{uri}.chromosomes();
phmap::flat_hash_map<std::string, std::string> mappings(chroms.size());
for (const auto& chrom : chroms) {
mappings.emplace(std::string{chrom.name()}, "chr" + std::string{chrom.name()});

Check warning on line 20 in src/hictk/rename_chromosomes/rename_chromosomes.cpp

View check run for this annotation

Codecov / codecov/patch

src/hictk/rename_chromosomes/rename_chromosomes.cpp#L16-L20

Added lines #L16 - L20 were not covered by tests
}
return mappings;

Check warning on line 22 in src/hictk/rename_chromosomes/rename_chromosomes.cpp

View check run for this annotation

Codecov / codecov/patch

src/hictk/rename_chromosomes/rename_chromosomes.cpp#L22

Added line #L22 was not covered by tests
}

[[nodiscard]] static phmap::flat_hash_map<std::string, std::string>
generate_mappings_remove_chr_prefix_prefix(std::string_view uri) {
const auto chroms = cooler::File{uri}.chromosomes();
phmap::flat_hash_map<std::string, std::string> mappings(chroms.size());
for (const auto& chrom : chroms) {
const auto match = chrom.name().find("chr") == 0;
if (match) {
mappings.emplace(std::string{chrom.name()}, std::string{chrom.name().substr(3)});

Check warning on line 32 in src/hictk/rename_chromosomes/rename_chromosomes.cpp

View check run for this annotation

Codecov / codecov/patch

src/hictk/rename_chromosomes/rename_chromosomes.cpp#L26-L32

Added lines #L26 - L32 were not covered by tests
}
}
return mappings;

Check warning on line 35 in src/hictk/rename_chromosomes/rename_chromosomes.cpp

View check run for this annotation

Codecov / codecov/patch

src/hictk/rename_chromosomes/rename_chromosomes.cpp#L35

Added line #L35 was not covered by tests
}

[[nodiscard]] static phmap::flat_hash_map<std::string, std::string> read_mappings_from_file(

Check warning on line 38 in src/hictk/rename_chromosomes/rename_chromosomes.cpp

View check run for this annotation

Codecov / codecov/patch

src/hictk/rename_chromosomes/rename_chromosomes.cpp#L38

Added line #L38 was not covered by tests
const std::filesystem::path& path) {
if (path.empty()) {
return {};

Check warning on line 41 in src/hictk/rename_chromosomes/rename_chromosomes.cpp

View check run for this annotation

Codecov / codecov/patch

src/hictk/rename_chromosomes/rename_chromosomes.cpp#L40-L41

Added lines #L40 - L41 were not covered by tests
}

std::ifstream ifs;
ifs.exceptions(std::ios::badbit);
ifs.open(path);

Check warning on line 46 in src/hictk/rename_chromosomes/rename_chromosomes.cpp

View check run for this annotation

Codecov / codecov/patch

src/hictk/rename_chromosomes/rename_chromosomes.cpp#L44-L46

Added lines #L44 - L46 were not covered by tests

phmap::flat_hash_map<std::string, std::string> mappings{};
std::string buff{};

Check warning on line 49 in src/hictk/rename_chromosomes/rename_chromosomes.cpp

View check run for this annotation

Codecov / codecov/patch

src/hictk/rename_chromosomes/rename_chromosomes.cpp#L48-L49

Added lines #L48 - L49 were not covered by tests

for (std::size_t i = 0; std::getline(ifs, buff); ++i) {
if (buff.empty()) {
continue;

Check warning on line 53 in src/hictk/rename_chromosomes/rename_chromosomes.cpp

View check run for this annotation

Codecov / codecov/patch

src/hictk/rename_chromosomes/rename_chromosomes.cpp#L51-L53

Added lines #L51 - L53 were not covered by tests
}
const auto sep_pos = buff.find('\t');
if (sep_pos == std::string::npos) {
throw std::runtime_error(fmt::format(
FMT_STRING("Found invalid record \"{}\" in file {} at line {}"), buff, path, i));

Check warning on line 58 in src/hictk/rename_chromosomes/rename_chromosomes.cpp

View check run for this annotation

Codecov / codecov/patch

src/hictk/rename_chromosomes/rename_chromosomes.cpp#L55-L58

Added lines #L55 - L58 were not covered by tests
}
auto old_name = buff.substr(0, sep_pos);
auto new_name = buff.substr(sep_pos + 1);

Check warning on line 61 in src/hictk/rename_chromosomes/rename_chromosomes.cpp

View check run for this annotation

Codecov / codecov/patch

src/hictk/rename_chromosomes/rename_chromosomes.cpp#L60-L61

Added lines #L60 - L61 were not covered by tests

if (old_name.empty() || new_name.empty()) {
throw std::runtime_error(fmt::format(
FMT_STRING("Found invalid record \"{}\" in file {} at line {}"), buff, path, i));

Check warning on line 65 in src/hictk/rename_chromosomes/rename_chromosomes.cpp

View check run for this annotation

Codecov / codecov/patch

src/hictk/rename_chromosomes/rename_chromosomes.cpp#L63-L65

Added lines #L63 - L65 were not covered by tests
}

mappings.emplace(std::move(old_name), std::move(new_name));

Check warning on line 68 in src/hictk/rename_chromosomes/rename_chromosomes.cpp

View check run for this annotation

Codecov / codecov/patch

src/hictk/rename_chromosomes/rename_chromosomes.cpp#L68

Added line #L68 was not covered by tests
}

return mappings;

Check warning on line 71 in src/hictk/rename_chromosomes/rename_chromosomes.cpp

View check run for this annotation

Codecov / codecov/patch

src/hictk/rename_chromosomes/rename_chromosomes.cpp#L71

Added line #L71 was not covered by tests
}

[[nodiscard]] static phmap::flat_hash_map<std::string, std::string> generate_name_mappings(

Check warning on line 74 in src/hictk/rename_chromosomes/rename_chromosomes.cpp

View check run for this annotation

Codecov / codecov/patch

src/hictk/rename_chromosomes/rename_chromosomes.cpp#L74

Added line #L74 was not covered by tests
std::string_view uri, const std::filesystem::path& name_mappings_path, bool add_chr_prefix,
bool remove_chr_prefix) {
if (!name_mappings_path.empty()) {
return read_mappings_from_file(name_mappings_path);

Check warning on line 78 in src/hictk/rename_chromosomes/rename_chromosomes.cpp

View check run for this annotation

Codecov / codecov/patch

src/hictk/rename_chromosomes/rename_chromosomes.cpp#L77-L78

Added lines #L77 - L78 were not covered by tests
}
if (add_chr_prefix) {
return generate_mappings_add_chr_prefix_prefix(uri);

Check warning on line 81 in src/hictk/rename_chromosomes/rename_chromosomes.cpp

View check run for this annotation

Codecov / codecov/patch

src/hictk/rename_chromosomes/rename_chromosomes.cpp#L80-L81

Added lines #L80 - L81 were not covered by tests
}

assert(remove_chr_prefix);
return generate_mappings_remove_chr_prefix_prefix(uri);

Check warning on line 85 in src/hictk/rename_chromosomes/rename_chromosomes.cpp

View check run for this annotation

Codecov / codecov/patch

src/hictk/rename_chromosomes/rename_chromosomes.cpp#L84-L85

Added lines #L84 - L85 were not covered by tests
}

int rename_chromosomes_subcmd(const RenameChromosomesConfig& c) {
if (cooler::utils::is_cooler(c.uri)) {
const auto mappings = generate_name_mappings(c.uri, c.path_to_name_mappings, c.add_chr_prefix,
c.remove_chr_prefix);
cooler::utils::rename_chromosomes(c.uri, mappings);
} else if (cooler::utils::is_multires_file(c.uri)) {
const auto resolutions = cooler::MultiResFile(c.uri).resolutions();

Check warning on line 94 in src/hictk/rename_chromosomes/rename_chromosomes.cpp

View check run for this annotation

Codecov / codecov/patch

src/hictk/rename_chromosomes/rename_chromosomes.cpp#L88-L94

Added lines #L88 - L94 were not covered by tests
const auto mappings = generate_name_mappings(
fmt::format(FMT_STRING("{}::/resolutions/{}"), c.uri, resolutions.front()),
c.path_to_name_mappings, c.add_chr_prefix, c.remove_chr_prefix);
for (const auto& res : resolutions) {
cooler::utils::rename_chromosomes(fmt::format(FMT_STRING("{}::/resolutions/{}"), c.uri, res),

Check warning on line 99 in src/hictk/rename_chromosomes/rename_chromosomes.cpp

View check run for this annotation

Codecov / codecov/patch

src/hictk/rename_chromosomes/rename_chromosomes.cpp#L96-L99

Added lines #L96 - L99 were not covered by tests
mappings);
}
} else {
assert(cooler::utils::is_scool_file(c.uri));
const auto cell_id = *cooler::SingleCellFile(c.uri).cells().begin();
const auto uri = fmt::format(FMT_STRING("{}::/cells/{}"), c.uri, cell_id);

Check warning on line 105 in src/hictk/rename_chromosomes/rename_chromosomes.cpp

View check run for this annotation

Codecov / codecov/patch

src/hictk/rename_chromosomes/rename_chromosomes.cpp#L102-L105

Added lines #L102 - L105 were not covered by tests
const auto mappings =
generate_name_mappings(uri, c.path_to_name_mappings, c.add_chr_prefix, c.remove_chr_prefix);
cooler::utils::rename_chromosomes(uri, mappings);

Check warning on line 108 in src/hictk/rename_chromosomes/rename_chromosomes.cpp

View check run for this annotation

Codecov / codecov/patch

src/hictk/rename_chromosomes/rename_chromosomes.cpp#L107-L108

Added lines #L107 - L108 were not covered by tests
}

return 1;

Check warning on line 111 in src/hictk/rename_chromosomes/rename_chromosomes.cpp

View check run for this annotation

Codecov / codecov/patch

src/hictk/rename_chromosomes/rename_chromosomes.cpp#L111

Added line #L111 was not covered by tests
}

} // namespace hictk::tools

0 comments on commit e4da38b

Please sign in to comment.