-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Initial implementation of hictk rename-chromosomes
- Loading branch information
Showing
8 changed files
with
241 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
// Copyright (C) 2023 Roberto Rossini <roberros@uio.no> | ||
// | ||
// SPDX-License-Identifier: MIT | ||
|
||
#include <fmt/std.h> | ||
|
||
#include <CLI/CLI.hpp> | ||
#include <cassert> | ||
#include <string> | ||
|
||
#include "hictk/tools/cli.hpp" | ||
#include "hictk/tools/config.hpp" | ||
|
||
namespace hictk::tools { | ||
void Cli::make_rename_chromosomes_subcommand() { | ||
auto& sc = | ||
*_cli.add_subcommand("rename-chromosomes", "Rename chromosomes found in a Cooler file.") | ||
->fallthrough() | ||
->preparse_callback([this]([[maybe_unused]] std::size_t i) { | ||
assert(_config.index() == 0); | ||
_config = RenameChromosomesConfig{}; | ||
}); | ||
|
||
_config = RenameChromosomesConfig{}; | ||
auto& c = std::get<RenameChromosomesConfig>(_config); | ||
|
||
// clang-format off | ||
sc.add_option( | ||
"uri", | ||
c.uri, | ||
"Path to a or .[ms]cool file (Cooler URI syntax supported).") | ||
->required(); | ||
|
||
sc.add_option( | ||
"--name-mappings", | ||
c.path_to_name_mappings, | ||
"Path to a two column TSV with pairs of chromosomes to be renamed.\n" | ||
"The first column should contain the original chromosome name,\n" | ||
"while the second column should contain the destination name to use when renaming." | ||
); | ||
|
||
sc.add_flag( | ||
"--add-chr-prefix", | ||
c.add_chr_prefix, | ||
"Prefix chromosome names with \"chr\".") | ||
->capture_default_str(); | ||
|
||
sc.add_flag( | ||
"--remove-chr-prefix", | ||
c.remove_chr_prefix, | ||
"Remove prefix \"chr\" from chromosome names.") | ||
->capture_default_str(); | ||
// clang-format on | ||
|
||
sc.get_option("--name-mappings")->excludes(sc.get_option("--add-chr-prefix")); | ||
sc.get_option("--name-mappings")->excludes(sc.get_option("--remove-chr-prefix")); | ||
sc.get_option("--add-chr-prefix")->excludes(sc.get_option("--remove-chr-prefix")); | ||
|
||
_config = std::monostate{}; | ||
} | ||
|
||
void Cli::validate_rename_chromosomes_subcommand() const { | ||
assert(_cli.get_subcommand("rename-chromosomes")->parsed()); | ||
|
||
const auto& c = std::get<RenameChromosomesConfig>(_config); | ||
|
||
std::vector<std::string> errors; | ||
|
||
if (!cooler::utils::is_cooler(c.uri) && !cooler::utils::is_multires_file(c.uri) && | ||
!cooler::utils::is_scool_file(c.uri)) { | ||
errors.emplace_back( | ||
fmt::format(FMT_STRING("File \"{}\" does not appear to be a Cooler file."), c.uri)); | ||
} | ||
|
||
const auto& sc = *_cli.get_subcommand("rename-chromosomes"); | ||
if (sc.get_option("--name-mappings")->empty() && sc.get_option("--add-chr-prefix")->empty() && | ||
sc.get_option("--remove-chr-prefix")->empty()) { | ||
errors.emplace_back( | ||
"please specify exactly one of --name-mappings, --add-chr-prefix, --remove-chr-prefix"); | ||
} | ||
|
||
if (!errors.empty()) { | ||
throw std::runtime_error( | ||
fmt::format(FMT_STRING("the following error(s) where encountered while validating CLI " | ||
"arguments and input file(s):\n - {}\n"), | ||
fmt::join(errors, "\n - "))); | ||
} | ||
} | ||
|
||
void Cli::transform_args_rename_chromosomes_subcommand() { | ||
assert(_cli.get_subcommand("rename-chromosomes")->parsed()); | ||
auto& c = std::get<RenameChromosomesConfig>(_config); | ||
|
||
// in spdlog, high numbers correspond to low log levels | ||
assert(c.verbosity > 0 && c.verbosity < 5); | ||
c.verbosity = static_cast<std::uint8_t>(spdlog::level::critical) - c.verbosity; | ||
} | ||
|
||
} // namespace hictk::tools |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
// Copyright (C) 2023 Roberto Rossini <roberros@uio.no> | ||
// | ||
// SPDX-License-Identifier: MIT | ||
|
||
#include <fmt/format.h> | ||
|
||
#include "hictk/cooler/multires_cooler.hpp" | ||
#include "hictk/cooler/singlecell_cooler.hpp" | ||
#include "hictk/cooler/utils.hpp" | ||
#include "hictk/hic/utils.hpp" | ||
#include "hictk/tools/config.hpp" | ||
|
||
namespace hictk::tools { | ||
|
||
[[nodiscard]] static phmap::flat_hash_map<std::string, std::string> | ||
generate_mappings_add_chr_prefix_prefix(std::string_view uri) { | ||
const auto chroms = cooler::File{uri}.chromosomes(); | ||
phmap::flat_hash_map<std::string, std::string> mappings(chroms.size()); | ||
for (const auto& chrom : chroms) { | ||
mappings.emplace(std::string{chrom.name()}, "chr" + std::string{chrom.name()}); | ||
} | ||
return mappings; | ||
} | ||
|
||
[[nodiscard]] static phmap::flat_hash_map<std::string, std::string> | ||
generate_mappings_remove_chr_prefix_prefix(std::string_view uri) { | ||
const auto chroms = cooler::File{uri}.chromosomes(); | ||
phmap::flat_hash_map<std::string, std::string> mappings(chroms.size()); | ||
for (const auto& chrom : chroms) { | ||
const auto match = chrom.name().find("chr") == 0; | ||
if (match) { | ||
mappings.emplace(std::string{chrom.name()}, std::string{chrom.name().substr(3)}); | ||
} | ||
} | ||
return mappings; | ||
} | ||
|
||
[[nodiscard]] static phmap::flat_hash_map<std::string, std::string> read_mappings_from_file( | ||
const std::filesystem::path& path) { | ||
if (path.empty()) { | ||
return {}; | ||
} | ||
|
||
std::ifstream ifs; | ||
ifs.exceptions(std::ios::badbit); | ||
ifs.open(path); | ||
|
||
phmap::flat_hash_map<std::string, std::string> mappings{}; | ||
std::string buff{}; | ||
|
||
for (std::size_t i = 0; std::getline(ifs, buff); ++i) { | ||
if (buff.empty()) { | ||
continue; | ||
} | ||
const auto sep_pos = buff.find('\t'); | ||
if (sep_pos == std::string::npos) { | ||
throw std::runtime_error(fmt::format( | ||
FMT_STRING("Found invalid record \"{}\" in file {} at line {}"), buff, path, i)); | ||
} | ||
auto old_name = buff.substr(0, sep_pos); | ||
auto new_name = buff.substr(sep_pos + 1); | ||
|
||
if (old_name.empty() || new_name.empty()) { | ||
throw std::runtime_error(fmt::format( | ||
FMT_STRING("Found invalid record \"{}\" in file {} at line {}"), buff, path, i)); | ||
} | ||
|
||
mappings.emplace(std::move(old_name), std::move(new_name)); | ||
} | ||
|
||
return mappings; | ||
} | ||
|
||
[[nodiscard]] static phmap::flat_hash_map<std::string, std::string> generate_name_mappings( | ||
std::string_view uri, const std::filesystem::path& name_mappings_path, bool add_chr_prefix, | ||
bool remove_chr_prefix) { | ||
if (!name_mappings_path.empty()) { | ||
return read_mappings_from_file(name_mappings_path); | ||
} | ||
if (add_chr_prefix) { | ||
return generate_mappings_add_chr_prefix_prefix(uri); | ||
} | ||
|
||
assert(remove_chr_prefix); | ||
return generate_mappings_remove_chr_prefix_prefix(uri); | ||
} | ||
|
||
int rename_chromosomes_subcmd(const RenameChromosomesConfig& c) { | ||
if (cooler::utils::is_cooler(c.uri)) { | ||
const auto mappings = generate_name_mappings(c.uri, c.path_to_name_mappings, c.add_chr_prefix, | ||
c.remove_chr_prefix); | ||
cooler::utils::rename_chromosomes(c.uri, mappings); | ||
} else if (cooler::utils::is_multires_file(c.uri)) { | ||
const auto resolutions = cooler::MultiResFile(c.uri).resolutions(); | ||
const auto mappings = generate_name_mappings( | ||
fmt::format(FMT_STRING("{}::/resolutions/{}"), c.uri, resolutions.front()), | ||
c.path_to_name_mappings, c.add_chr_prefix, c.remove_chr_prefix); | ||
for (const auto& res : resolutions) { | ||
cooler::utils::rename_chromosomes(fmt::format(FMT_STRING("{}::/resolutions/{}"), c.uri, res), | ||
mappings); | ||
} | ||
} else { | ||
assert(cooler::utils::is_scool_file(c.uri)); | ||
const auto cell_id = *cooler::SingleCellFile(c.uri).cells().begin(); | ||
const auto uri = fmt::format(FMT_STRING("{}::/cells/{}"), c.uri, cell_id); | ||
const auto mappings = | ||
generate_name_mappings(uri, c.path_to_name_mappings, c.add_chr_prefix, c.remove_chr_prefix); | ||
cooler::utils::rename_chromosomes(uri, mappings); | ||
} | ||
|
||
return 1; | ||
} | ||
|
||
} // namespace hictk::tools |