Skip to content

Commit

Permalink
Merge pull request #78 from paulsengroup/fix-pixel-merger
Browse files Browse the repository at this point in the history
Refactor internal::PixelMerger
  • Loading branch information
robomics authored Dec 5, 2023
2 parents 69dde0c + 9debaee commit d9727c0
Show file tree
Hide file tree
Showing 10 changed files with 413 additions and 236 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include "hictk/cooler/cooler.hpp"
#include "hictk/pixel.hpp"
#include "hictk/transformers/pixel_merger.hpp"
#include "hictk/type_traits.hpp"

namespace hictk::balancing {
Expand Down Expand Up @@ -265,7 +266,7 @@ template <typename File>
tails.emplace_back(sel.template end<double>());
}

internal::PixelMerger<PixelIt> merger{heads, tails};
transformers::PixelMerger<PixelIt> merger{heads, tails};

SparseMatrix m{};
std::for_each(merger.begin(), merger.end(), [&](const ThinPixel<double>& p) {
Expand Down Expand Up @@ -384,7 +385,7 @@ inline auto ICE::construct_sparse_matrix_chunked_trans(const File& f, std::size_
tails.emplace_back(sel.template end<double>());
}

internal::PixelMerger<PixelIt> merger{heads, tails};
transformers::PixelMerger<PixelIt> merger{heads, tails};

SparseMatrixChunked m(tmpfile, chunk_size);
std::for_each(merger.begin(), merger.end(), [&](const ThinPixel<double>& p) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <vector>

#include "hictk/cooler/cooler.hpp"
#include "hictk/transformers/pixel_merger.hpp"

namespace hictk::cooler::utils {

Expand Down Expand Up @@ -112,19 +113,19 @@ inline void merge(const std::vector<PixelIt>& heads, const std::vector<PixelIt>&
bool overwrite_if_exists, std::size_t chunk_size, std::size_t update_frequency) {
using N = remove_cvref_t<decltype(heads.front()->count)>;

hictk::internal::PixelMerger merger{heads, tails};
hictk::transformers::PixelMerger merger{heads, tails};
std::vector<ThinPixel<N>> buffer(chunk_size);
buffer.clear();

auto dest = File::create<N>(dest_uri, chromosomes, bin_size, overwrite_if_exists);

std::size_t pixels_processed{};
auto t0 = std::chrono::steady_clock::now();
for (std::size_t i = 0; true; ++i) {
auto pixel = merger.next();
if (!pixel) {
break;
}
auto first = merger.begin();
auto last = merger.end();
for (std::size_t i = 0; first != last; ++i) {
const auto pixel = *first;
std::ignore = ++first;

if (i == update_frequency) {
const auto bin1 = dest.bins().at(pixel.bin1_id);
Expand Down
126 changes: 0 additions & 126 deletions src/libhictk/pixel/include/hictk/impl/pixel_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -357,130 +357,4 @@ inline auto Pixel<N>::from_4dn_pairs(const hictk::BinTable &bins, std::string_vi
}
}

namespace internal {
template <typename PixelIt>
inline bool PixelMerger<PixelIt>::Node::operator<(const Node &other) const noexcept {
assert(!!pixel);
assert(!!other.pixel);
if (pixel.bin1_id != other.pixel.bin1_id) {
return pixel.bin1_id < other.pixel.bin1_id;
}
return pixel.bin2_id < other.pixel.bin2_id;
}

template <typename PixelIt>
inline bool PixelMerger<PixelIt>::Node::operator>(const Node &other) const noexcept {
assert(!!pixel);
assert(!!other.pixel);
if (pixel.bin1_id != other.pixel.bin1_id) {
return pixel.bin1_id > other.pixel.bin1_id;
}
return pixel.bin2_id > other.pixel.bin2_id;
}

template <typename PixelIt>
inline bool PixelMerger<PixelIt>::Node::operator==(const Node &other) const noexcept {
return pixel.bin1_id == other.pixel.bin1_id && pixel.bin2_id == other.pixel.bin2_id;
}

template <typename PixelIt>
inline bool PixelMerger<PixelIt>::Node::operator!=(const Node &other) const noexcept {
return !(*this == other);
}

template <typename PixelIt>
inline PixelMerger<PixelIt>::PixelMerger(std::vector<PixelIt> heads, std::vector<PixelIt> tails) {
assert(heads.size() == tails.size());
for (std::size_t i = 0; i < heads.size(); ++i) {
auto &first = heads[i];
auto &last = tails[i];
if (first != last) {
_heads.emplace_back(std::move(first));
_tails.emplace_back(std::move(last));
_pqueue.emplace(Node{std::move(*_heads.back()++), _pqueue.size()});
}
}
}
template <typename PixelIt>
inline auto PixelMerger<PixelIt>::begin() -> iterator {
return iterator{*this};
}

template <typename PixelIt>
inline auto PixelMerger<PixelIt>::end() const noexcept -> iterator {
return {};
}

template <typename PixelIt>
inline void PixelMerger<PixelIt>::replace_top_node(std::size_t i) {
assert(_pqueue.top().i == i);
_pqueue.pop();
if (auto &it = _heads[i]; it != _tails[i]) {
_pqueue.emplace(Node{*it, i});
++it;
}
}

template <typename PixelIt>
inline auto PixelMerger<PixelIt>::next() -> ThinPixel<N> {
if (_pqueue.empty()) {
return {};
}

auto current_node = _pqueue.top();
replace_top_node(current_node.i);

while (!_pqueue.empty()) {
const auto next_node = _pqueue.top();
if (next_node != current_node) {
break;
}
current_node.pixel.count += next_node.pixel.count;
replace_top_node(next_node.i);
}
++_i;
return current_node.pixel;
}

template <typename PixelIt>
inline PixelMerger<PixelIt>::iterator::iterator(PixelMerger<PixelIt> &merger)
: _merger(&merger), _value(merger.next()) {}

template <typename PixelIt>
inline bool PixelMerger<PixelIt>::iterator::operator==(const iterator &other) const noexcept {
if (!_merger || !other._merger) {
return _merger == other._merger;
}

return _merger == other._merger && _merger->_i == other._merger->_i;
}

template <typename PixelIt>
inline bool PixelMerger<PixelIt>::iterator::operator!=(const iterator &other) const noexcept {
return !(*this == other);
}

template <typename PixelIt>
inline auto PixelMerger<PixelIt>::iterator::operator*() const noexcept -> const ThinPixel<N> & {
return _value;
}

template <typename PixelIt>
inline auto PixelMerger<PixelIt>::iterator::operator->() const noexcept -> const ThinPixel<N> * {
return &(**this);
}

template <typename PixelIt>
inline auto PixelMerger<PixelIt>::iterator::operator++() -> iterator & {
assert(!!_merger);
_value = _merger->next();
if (!_value) {
_merger = nullptr;
}

return *this;
}

} // namespace internal

} // namespace hictk
59 changes: 0 additions & 59 deletions src/libhictk/pixel/include/hictk/pixel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@

#include <cstdint>
#include <limits>
#include <queue>
#include <string_view>
#include <type_traits>

#include "hictk/bin_table.hpp"

Expand Down Expand Up @@ -91,63 +89,6 @@ struct Pixel {
static auto from_4dn_pairs(const BinTable &bins, std::string_view line) -> Pixel;
};

namespace internal {

/// This class is basically a wrapper around a priority queue of objects of type Pair
/// Pair consist of a pixel and an index. The index represent from which iterator the
/// pixel was read. This allows us to know from which iterator we should read the next pixel (i.e.
/// the same iterator from which the top pixel originated)
template <typename PixelIt>
class PixelMerger {
using N = decltype(std::declval<PixelIt>()->count);
struct Node {
ThinPixel<N> pixel{}; // NOLINT
std::size_t i{}; // NOLINT

bool operator<(const Node &other) const noexcept;
bool operator>(const Node &other) const noexcept;
bool operator==(const Node &other) const noexcept;
bool operator!=(const Node &other) const noexcept;
};

std::priority_queue<Node, std::vector<Node>, std::greater<>> _pqueue{};

std::vector<PixelIt> _heads{};
std::vector<PixelIt> _tails{};
std::size_t _i{};

public:
class iterator;

PixelMerger() = delete;
PixelMerger(std::vector<PixelIt> head, std::vector<PixelIt> tail);

auto begin() -> iterator;
auto end() const noexcept -> iterator;
[[nodiscard]] auto next() -> ThinPixel<N>;

class iterator {
PixelMerger *_merger{};
ThinPixel<N> _value{};

public:
iterator() = default;
explicit iterator(PixelMerger &merger);

[[nodiscard]] bool operator==(const iterator &other) const noexcept;
[[nodiscard]] bool operator!=(const iterator &other) const noexcept;

auto operator*() const noexcept -> const ThinPixel<N> &;
auto operator->() const noexcept -> const ThinPixel<N> *;

[[nodiscard]] auto operator++() -> iterator &;
};

private:
void replace_top_node(std::size_t i);
};
} // namespace internal

} // namespace hictk

#include "./impl/pixel_impl.hpp"
1 change: 1 addition & 0 deletions src/libhictk/transformers/include/hictk/transformers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@

#include "hictk/transformers/coarsen.hpp"
#include "hictk/transformers/join_genomic_coords.hpp"
#include "hictk/transformers/pixel_merger.hpp"
#include "hictk/transformers/stats.hpp"
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#include "hictk/bin_table.hpp"
#include "hictk/pixel.hpp"
#include "hictk/transformers/pixel_merger.hpp"

namespace hictk::transformers {

Expand Down Expand Up @@ -201,19 +202,18 @@ inline void CoarsenPixels<PixelIt>::iterator::coarsen_chunk_pass2(const ColumnMe
}
}

using PixelMerger = internal::PixelMerger<RowIt>;
using PixelMerger = transformers::PixelMerger<RowIt>;
PixelMerger merger(heads, tails);
auto first = merger.begin();
auto last = merger.end();

if (_buffer.use_count() != 1) {
_buffer = std::make_shared<BufferT>();
}
_buffer->clear();
_buffer->emplace_back(merger.next());
while (!!_buffer->back()) {
const auto value = merger.next();
if (!value) {
break; // all pixels have been processed
}
_buffer->emplace_back(*first);
while (!!_buffer->back() && ++first != last) {
const auto value = *first;

// Insert or merge pixels
auto &last_pixel = _buffer->back();
Expand Down
Loading

0 comments on commit d9727c0

Please sign in to comment.