Skip to content

Commit

Permalink
Merge pull request #77 from paulsengroup/impl-rename-chroms
Browse files Browse the repository at this point in the history
Initial implementation of utilities to rename chromosomes in cooler files
  • Loading branch information
robomics authored Dec 16, 2023
2 parents a6c1b48 + b87c75a commit 69c7017
Show file tree
Hide file tree
Showing 24 changed files with 630 additions and 3 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/codecov.yml
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,8 @@ jobs:
test/scripts/hictk_merge.sh build/src/hictk/hictk
test/scripts/hictk_rename_chromosomes.sh build/src/hictk/hictk
test/scripts/hictk_validate.sh build/src/hictk/hictk
test/scripts/hictk_zoomify.sh build/src/hictk/hictk
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/macos-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,10 @@ jobs:
run: |
test/scripts/hictk_merge.sh bin/hictk
- name: Test hictk rename-chroms
run: |
test/scripts/hictk_rename_chromosomes.sh bin/hictk
- name: Test hictk validate
run: |
test/scripts/hictk_validate.sh bin/hictk
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/ubuntu-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,10 @@ jobs:
run: |
test/scripts/hictk_merge.sh bin/hictk
- name: Test hictk rename-chroms
run: |
test/scripts/hictk_rename_chromosomes.sh bin/hictk
- name: Test hictk validate
run: |
test/scripts/hictk_validate.sh bin/hictk
Expand Down
2 changes: 2 additions & 0 deletions src/hictk/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ target_sources(
${CMAKE_CURRENT_SOURCE_DIR}/cli/cli_fix_mcool.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cli/cli_load.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cli/cli_merge.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cli/cli_rename_chromosomes.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cli/cli_validate.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cli/cli_zoomify.cpp
${CMAKE_CURRENT_SOURCE_DIR}/balance/balance.cpp
Expand All @@ -35,6 +36,7 @@ target_sources(
${CMAKE_CURRENT_SOURCE_DIR}/fix_mcool/fix_mcool.cpp
${CMAKE_CURRENT_SOURCE_DIR}/load/load.cpp
${CMAKE_CURRENT_SOURCE_DIR}/merge/merge.cpp
${CMAKE_CURRENT_SOURCE_DIR}/rename_chromosomes/rename_chromosomes.cpp
${CMAKE_CURRENT_SOURCE_DIR}/validate/validate.cpp
${CMAKE_CURRENT_SOURCE_DIR}/zoomify/zoomify.cpp)

Expand Down
11 changes: 11 additions & 0 deletions src/hictk/cli/cli.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ auto Cli::parse_arguments() -> Config {
_subcommand = subcommand::load;
} else if (_cli.get_subcommand("merge")->parsed()) {
_subcommand = subcommand::merge;
} else if (_cli.get_subcommand("rename-chromosomes")->parsed()) {
_subcommand = subcommand::rename_chromosomes;
} else if (_cli.get_subcommand("validate")->parsed()) {
_subcommand = subcommand::validate;
} else if (_cli.get_subcommand("zoomify")->parsed()) {
Expand Down Expand Up @@ -85,6 +87,8 @@ std::string_view Cli::subcommand_to_str(subcommand s) noexcept {
return "load";
case merge:
return "merge";
case rename_chromosomes:
return "rename-chromosomes";
case validate:
return "validate";
case zoomify:
Expand All @@ -107,6 +111,7 @@ void Cli::make_cli() {
make_fix_mcool_subcommand();
make_load_subcommand();
make_merge_subcommand();
make_rename_chromosomes_subcommand();
make_validate_subcommand();
make_zoomify_subcommand();
}
Expand All @@ -131,6 +136,9 @@ void Cli::validate_args() const {
case merge:
validate_merge_subcommand();
break;
case rename_chromosomes:
validate_rename_chromosomes_subcommand();
break;
case validate:
break;
case zoomify:
Expand Down Expand Up @@ -161,6 +169,9 @@ void Cli::transform_args() {
case merge:
transform_args_merge_subcommand();
break;
case rename_chromosomes:
transform_args_rename_chromosomes_subcommand();
break;
case validate:
break;
case zoomify:
Expand Down
107 changes: 107 additions & 0 deletions src/hictk/cli/cli_rename_chromosomes.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
// Copyright (C) 2023 Roberto Rossini <roberros@uio.no>
//
// SPDX-License-Identifier: MIT

#include <fmt/std.h>

#include <CLI/CLI.hpp>
#include <cassert>
#include <string>

#include "hictk/tools/cli.hpp"
#include "hictk/tools/config.hpp"

namespace hictk::tools {
void Cli::make_rename_chromosomes_subcommand() {
auto& sc =
*_cli.add_subcommand("rename-chromosomes", "Rename chromosomes found in a Cooler file.")
->fallthrough()
->preparse_callback([this]([[maybe_unused]] std::size_t i) {
assert(_config.index() == 0);
_config = RenameChromosomesConfig{};
});

_config = RenameChromosomesConfig{};
auto& c = std::get<RenameChromosomesConfig>(_config);

// clang-format off
sc.add_option(
"uri",
c.uri,
"Path to a or .[ms]cool file (Cooler URI syntax supported).")
->required();

sc.add_option(
"--name-mappings",
c.path_to_name_mappings,
"Path to a two column TSV with pairs of chromosomes to be renamed.\n"
"The first column should contain the original chromosome name,\n"
"while the second column should contain the destination name to use when renaming."
);

sc.add_flag(
"--add-chr-prefix",
c.add_chr_prefix,
"Prefix chromosome names with \"chr\".")
->capture_default_str();

sc.add_flag(
"--remove-chr-prefix",
c.remove_chr_prefix,
"Remove prefix \"chr\" from chromosome names.")
->capture_default_str();

sc.add_option(
"-v,--verbosity",
c.verbosity,
"Set verbosity of output to the console.")
->check(CLI::Range(1, 4))
->capture_default_str();
// clang-format on

sc.get_option("--name-mappings")->excludes(sc.get_option("--add-chr-prefix"));
sc.get_option("--name-mappings")->excludes(sc.get_option("--remove-chr-prefix"));
sc.get_option("--add-chr-prefix")->excludes(sc.get_option("--remove-chr-prefix"));
sc.alias("rename-chroms");

_config = std::monostate{};
}

void Cli::validate_rename_chromosomes_subcommand() const {
assert(_cli.get_subcommand("rename-chromosomes")->parsed());

const auto& c = std::get<RenameChromosomesConfig>(_config);

std::vector<std::string> errors;

if (!cooler::utils::is_cooler(c.uri) && !cooler::utils::is_multires_file(c.uri) &&
!cooler::utils::is_scool_file(c.uri)) {
errors.emplace_back(
fmt::format(FMT_STRING("File \"{}\" does not appear to be a Cooler file."), c.uri));
}

const auto& sc = *_cli.get_subcommand("rename-chromosomes");
if (sc.get_option("--name-mappings")->empty() && sc.get_option("--add-chr-prefix")->empty() &&
sc.get_option("--remove-chr-prefix")->empty()) {
errors.emplace_back(
"please specify exactly one of --name-mappings, --add-chr-prefix, --remove-chr-prefix");
}

if (!errors.empty()) {
throw std::runtime_error(
fmt::format(FMT_STRING("the following error(s) where encountered while validating CLI "
"arguments and input file(s):\n - {}\n"),
fmt::join(errors, "\n - ")));
}
}

void Cli::transform_args_rename_chromosomes_subcommand() {
assert(_cli.get_subcommand("rename-chromosomes")->parsed());
auto& c = std::get<RenameChromosomesConfig>(_config);

// in spdlog, high numbers correspond to low log levels
assert(c.verbosity > 0 && c.verbosity < 5);
c.verbosity = static_cast<std::uint8_t>(spdlog::level::critical) - c.verbosity;
}

} // namespace hictk::tools
4 changes: 4 additions & 0 deletions src/hictk/include/hictk/tools/cli.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ class Cli {
fix_mcool,
load,
merge,
rename_chromosomes,
validate,
zoomify,
};
Expand All @@ -224,6 +225,7 @@ class Cli {
void make_fix_mcool_subcommand();
void make_load_subcommand();
void make_merge_subcommand();
void make_rename_chromosomes_subcommand();
void make_validate_subcommand();
void make_zoomify_subcommand();
void make_cli();
Expand All @@ -234,6 +236,7 @@ class Cli {
void validate_fix_mcool_subcommand() const;
void validate_load_subcommand() const;
void validate_merge_subcommand() const;
void validate_rename_chromosomes_subcommand() const;
void validate_zoomify_subcommand() const;
void validate_args() const;

Expand All @@ -243,6 +246,7 @@ class Cli {
void transform_args_fix_mcool_subcommand();
void transform_args_load_subcommand();
void transform_args_merge_subcommand();
void transform_args_rename_chromosomes_subcommand();
void transform_args_zoomify_subcommand();
void transform_args();
};
Expand Down
9 changes: 9 additions & 0 deletions src/hictk/include/hictk/tools/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,14 @@ struct MergeConfig {
std::uint8_t verbosity{4};
};

struct RenameChromosomesConfig {
std::string uri{};
std::filesystem::path path_to_name_mappings{};
bool add_chr_prefix{false};
bool remove_chr_prefix{false};
std::uint8_t verbosity{4};
};

struct ValidateConfig {
std::string uri{};
bool validate_index{false};
Expand Down Expand Up @@ -160,6 +168,7 @@ using Config = std::variant<std::monostate,
FixMcoolConfig,
LoadConfig,
MergeConfig,
RenameChromosomesConfig,
ValidateConfig,
ZoomifyConfig>;
// clang-format on
Expand Down
1 change: 1 addition & 0 deletions src/hictk/include/hictk/tools/tools.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ namespace hictk::tools {
[[nodiscard]] int fix_mcool_subcmd(const FixMcoolConfig& c);
[[nodiscard]] int load_subcmd(const LoadConfig& c);
[[nodiscard]] int merge_subcmd(const MergeConfig& c);
[[nodiscard]] int rename_chromosomes_subcmd(const RenameChromosomesConfig& c);
[[nodiscard]] int validate_subcmd(const ValidateConfig& c);
[[nodiscard]] int zoomify_subcmd(const ZoomifyConfig& c);

Expand Down
2 changes: 2 additions & 0 deletions src/hictk/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ int main(int argc, char** argv) noexcept {
return load_subcmd(std::get<LoadConfig>(config));
case sc::merge:
return merge_subcmd(std::get<MergeConfig>(config));
case sc::rename_chromosomes:
return rename_chromosomes_subcmd(std::get<RenameChromosomesConfig>(config));
case sc::validate:
return validate_subcmd(std::get<ValidateConfig>(config));
case sc::zoomify:
Expand Down
Loading

0 comments on commit 69c7017

Please sign in to comment.