Skip to content

Commit

Permalink
Refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
robomics committed Jan 21, 2024
1 parent b5282a9 commit ddbaab1
Show file tree
Hide file tree
Showing 20 changed files with 218 additions and 158 deletions.
2 changes: 1 addition & 1 deletion src/hictk/balance/balance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ static void write_weights_hic(const hic::File& hf, const BalanceConfig& c,
}
}

auto jt = run_juicer_tools_add_norm(c.juicer_tools_jar, tmpfile, hf.url(), c.juicer_tools_xmx);
auto jt = run_juicer_tools_add_norm(c.juicer_tools_jar, tmpfile, hf.path(), c.juicer_tools_xmx);
jt->wait();
if (jt->exit_code() != 0) {
throw std::runtime_error(
Expand Down
2 changes: 1 addition & 1 deletion src/hictk/validate/validate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ static void validate_hic(const hic::File& hf, const Chromosome& chrom1, const Ch
fmt::format(FMT_STRING("### FAILURE: \"{}\" is not a valid .hic file:\n"
"Validation failed for {}:{} map at {} resolution:\n"
"{}"),
hf.url(), chrom1.name(), chrom2.name(), hf.resolution(), e.what()));
hf.path(), chrom1.name(), chrom2.name(), hf.resolution(), e.what()));
}
}

Expand Down
4 changes: 2 additions & 2 deletions src/libhictk/file/include/hictk/impl/file_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ inline std::string File::uri() const {
[&](auto& fp) {
using T = std::decay_t<decltype(fp)>;
if constexpr (std::is_same_v<hic::File, T>) {
return fp.url();
return fp.path();
} else {
return fp.uri();
}
Expand All @@ -234,7 +234,7 @@ inline std::string File::path() const {
[&](auto& fp) {
using T = std::decay_t<decltype(fp)>;
if constexpr (std::is_same_v<hic::File, T>) {
return fp.url();
return fp.path();
} else {
return fp.path();
}
Expand Down
2 changes: 1 addition & 1 deletion src/libhictk/hic/include/hictk/hic.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class File {
MatrixUnit unit_ = MatrixUnit::BP, std::uint64_t block_cache_capacity = 0);
[[nodiscard]] bool has_resolution(std::uint32_t resolution) const;

[[nodiscard]] const std::string &url() const noexcept;
[[nodiscard]] const std::string &path() const noexcept;
[[nodiscard]] const std::string &name() const noexcept;
[[nodiscard]] std::int32_t version() const noexcept;
[[nodiscard]] const Reference &chromosomes() const noexcept;
Expand Down
2 changes: 1 addition & 1 deletion src/libhictk/hic/include/hictk/hic/file_reader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class HiCFileReader {
public:
HiCFileReader() = default;
explicit HiCFileReader(std::string url);
[[nodiscard]] inline const std::string &url() const noexcept;
[[nodiscard]] inline const std::string &path() const noexcept;
[[nodiscard]] const HiCHeader &header() const noexcept;

[[nodiscard]] std::int32_t version() const noexcept;
Expand Down
8 changes: 5 additions & 3 deletions src/libhictk/hic/include/hictk/hic/file_writer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,14 +100,14 @@ class HiCFileWriter {
std::uint64_t nnz{};
};

HiCHeader _header{};
filestream::FileStream _fs{};
std::filesystem::path _tmpdir{};

using BinTables = phmap::flat_hash_map<std::uint32_t, std::shared_ptr<const BinTable>>;
using BlockIndex = phmap::btree_map<BlockIndexKey, phmap::btree_set<MatrixBlockMetadata>>;
using BlockMappers = phmap::flat_hash_map<std::uint32_t, HiCInteractionToBlockMapper>;

HiCHeader _header{};
BinTables _bin_tables{};
BlockIndex _block_index{};
BlockMappers _block_mappers{};
Expand Down Expand Up @@ -149,7 +149,7 @@ class HiCFileWriter {
const std::filesystem::path& tmpdir = std::filesystem::temp_directory_path(),
std::uint32_t compression_lvl = 12, std::size_t buffer_size = 32'000'000);

[[nodiscard]] std::string_view url() const noexcept;
[[nodiscard]] std::string_view path() const noexcept;
[[nodiscard]] const Reference& chromosomes() const noexcept;
[[nodiscard]] const BinTable& bins(std::uint32_t resolution) const;
[[nodiscard]] const std::vector<std::uint32_t>& resolutions() const noexcept;
Expand Down Expand Up @@ -202,7 +202,7 @@ class HiCFileWriter {
void finalize();

private:
[[nodiscard]] static HiCHeader read_header(std::string_view path);
[[nodiscard]] static HiCHeader read_header(filestream::FileStream& fs);
[[nodiscard]] static HiCHeader init_header(std::string_view path, Reference chromosomes,
std::vector<std::uint32_t> resolutions,
std::string_view assembly);
Expand Down Expand Up @@ -234,6 +234,8 @@ class HiCFileWriter {
void read_norm_vectors();
[[nodiscard]] std::vector<float> read_norm_vector(const NormalizationVectorIndexBlock& blk);

void read_offsets();

// Methods to be called from worker threads
auto merge_and_compress_blocks_thr(
HiCInteractionToBlockMapper& mapper, std::mutex& mapper_mtx,
Expand Down
1 change: 0 additions & 1 deletion src/libhictk/hic/include/hictk/hic/filestream.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ class FileStream {
static FileStream create(std::string path);

[[nodiscard]] const std::string &path() const noexcept;
[[nodiscard]] const std::string &url() const noexcept;
[[nodiscard]] std::size_t size() const;

void seekg(std::streamoff offset, std::ios::seekdir way = std::ios::beg);
Expand Down
2 changes: 1 addition & 1 deletion src/libhictk/hic/include/hictk/hic/footer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class HiCFooter {
[[nodiscard]] constexpr HiCFooterMetadata &metadata() noexcept;
[[nodiscard]] const Index &index() const noexcept;

[[nodiscard]] constexpr const std::string &url() const noexcept;
[[nodiscard]] constexpr const std::string &path() const noexcept;
[[nodiscard]] constexpr MatrixType matrix_type() const noexcept;
[[nodiscard]] balancing::Method normalization() const noexcept;
[[nodiscard]] constexpr MatrixUnit unit() const noexcept;
Expand Down
9 changes: 7 additions & 2 deletions src/libhictk/hic/include/hictk/hic/header.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
#include <string>
#include <vector>

#include "hictk/hic/binary_buffer.hpp"
#include "hictk/hic/filestream.hpp"
#include "hictk/reference.hpp"

namespace hictk::hic::internal {
Expand All @@ -28,8 +30,11 @@ struct HiCHeader {
phmap::flat_hash_map<std::string, std::string> attributes{};

constexpr explicit operator bool() const noexcept;
bool operator==(const HiCHeader &other) const noexcept;
bool operator!=(const HiCHeader &other) const noexcept;
bool operator==(const HiCHeader& other) const noexcept;
bool operator!=(const HiCHeader& other) const noexcept;

[[nodiscard]] std::string serialize(BinaryBuffer& buffer, bool clear = true) const;
[[nodiscard]] static HiCHeader deserialize(filestream::FileStream& fs);
};

} // namespace hictk::hic::internal
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ inline void BinaryBuffer::read(T &buff) {

template <typename T, typename std::enable_if<std::is_fundamental<T>::value>::type *>
inline void BinaryBuffer::read(std::vector<T> &buff) {
read(reinterpret_cast<char*>(buff.data()), sizeof(T) * buff.size());
read(reinterpret_cast<char *>(buff.data()), sizeof(T) * buff.size());
}

inline void BinaryBuffer::read(std::string &buff, std::size_t n) {
Expand Down
77 changes: 3 additions & 74 deletions src/libhictk/hic/include/hictk/hic/impl/file_reader_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ inline filestream::FileStream HiCFileReader::openStream(std::string url) {
}
}

inline const std::string &HiCFileReader::url() const noexcept { return _fs->url(); }
inline const std::string &HiCFileReader::path() const noexcept { return _fs->path(); }
inline const HiCHeader &HiCFileReader::header() const noexcept { return *_header; }

inline std::int32_t HiCFileReader::version() const noexcept {
Expand Down Expand Up @@ -242,79 +242,8 @@ inline Index HiCFileReader::read_index(std::int64_t fileOffset, const Chromosome

inline bool HiCFileReader::checkMagicString() { return checkMagicString(*_fs); }

// reads the header, storing the positions of the normalization vectors and returning the
// masterIndexPosition pointer
inline HiCHeader HiCFileReader::readHeader(filestream::FileStream &fs) {
if (!checkMagicString(fs)) {
throw std::runtime_error(fmt::format(
FMT_STRING("Hi-C magic string is missing. {} does not appear to be a hic file"), fs.url()));
}

HiCHeader header{fs.url()};

fs.read(header.version);
if (header.version < 6) {
throw std::runtime_error(fmt::format(
FMT_STRING(".hic version 5 and older are no longer supported. Found version {}"),
header.version));
}
fs.read(header.footerPosition);
if (header.footerPosition < 0 || header.footerPosition >= static_cast<std::int64_t>(fs.size())) {
throw std::runtime_error(
fmt::format(FMT_STRING("file appears to be corrupted: expected master index offset to "
"be between 0 and {}, found {}"),
fs.size(), header.footerPosition));
}

fs.getline(header.genomeID, '\0');
if (header.genomeID.empty()) {
header.genomeID = "unknown";
}

if (header.version > 8) {
fs.read(header.normVectorIndexPosition);
fs.read(header.normVectorIndexLength);
}

const auto nAttributes = fs.read<std::int32_t>();

// reading attribute-value dictionary
for (std::int32_t i = 0; i < nAttributes; i++) {
auto key = fs.getline('\0'); // key
auto value = fs.getline('\0'); // value
header.attributes.emplace(std::move(key), std::move(value));
}

// Read chromosomes
auto numChromosomes = static_cast<std::uint32_t>(fs.read<std::int32_t>());
std::vector<std::string> chrom_names(numChromosomes);
std::vector<std::uint32_t> chrom_sizes(numChromosomes);
for (std::size_t i = 0; i < chrom_names.size(); ++i) {
fs.getline(chrom_names[i], '\0');
chrom_sizes[i] = static_cast<std::uint32_t>(
header.version > 8 ? fs.read<std::int64_t>()
: static_cast<std::int64_t>(fs.read<std::int32_t>()));
}

if (chrom_names.empty()) {
throw std::runtime_error("unable to read chromosomes");
}

header.chromosomes = Reference(chrom_names.begin(), chrom_names.end(), chrom_sizes.begin());

// Read resolutions
const auto numResolutions = static_cast<std::size_t>(fs.read<std::int32_t>());
if (numResolutions == 0) {
throw std::runtime_error("unable to read the list of available resolutions");
}
header.resolutions.resize(numResolutions);
std::generate(header.resolutions.begin(), header.resolutions.end(), [&]() {
const auto res = fs.read<std::int32_t>();
assert(res > 0);
return static_cast<std::uint32_t>(res);
});

return header;
return HiCHeader::deserialize(fs);
}

inline void HiCFileReader::readAndInflate(const BlockIndex &idx, std::string &plainTextBuffer) {
Expand Down Expand Up @@ -521,7 +450,7 @@ inline HiCFooter HiCFileReader::read_footer(std::uint32_t chrom1_id, std::uint32

// clang-format off
HiCFooterMetadata metadata{
_fs->url(),
_fs->path(),
matrix_type,
wanted_norm,
wanted_unit,
Expand Down
Loading

0 comments on commit ddbaab1

Please sign in to comment.