From 2855a0ae2848778be207766195535a7e444bb5c2 Mon Sep 17 00:00:00 2001 From: Roberto Rossini <71787608+robomics@users.noreply.github.com> Date: Sat, 23 Dec 2023 17:12:18 +0100 Subject: [PATCH 1/2] Fix performance regression in hictk::balance Regression is caused by calling operator()() of MargsVect inside a loop. This operator now involves a non-negligible amount of computation when processing high-resolution matrices. --- .../balancing/include/hictk/balancing/impl/ice_impl.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/libhictk/balancing/include/hictk/balancing/impl/ice_impl.hpp b/src/libhictk/balancing/include/hictk/balancing/impl/ice_impl.hpp index 68bde4ed..e7a5e14f 100644 --- a/src/libhictk/balancing/include/hictk/balancing/impl/ice_impl.hpp +++ b/src/libhictk/balancing/include/hictk/balancing/impl/ice_impl.hpp @@ -414,8 +414,9 @@ inline void ICE::min_nnz_filtering(MargsVector& marg, const MatrixT& matrix, nonstd::span biases, std::size_t min_nnz, BS::thread_pool* tpool) { matrix.marginalize_nnz(marg, tpool); + const auto marg_ = marg(); for (std::size_t i = 0; i < biases.size(); ++i) { - if (marg()[i] < static_cast(min_nnz)) { + if (marg_[i] < static_cast(min_nnz)) { biases[i] = 0; } } From c8f2e06c11dbb58249ff7498e52a9e74dd19fec3 Mon Sep 17 00:00:00 2001 From: Roberto Rossini <71787608+robomics@users.noreply.github.com> Date: Sat, 23 Dec 2023 20:47:19 +0100 Subject: [PATCH 2/2] Reduce memory usage when balancing chunked matrices --- .../balancing/impl/sparse_matrix_impl.hpp | 23 ++----------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/src/libhictk/balancing/include/hictk/balancing/impl/sparse_matrix_impl.hpp b/src/libhictk/balancing/include/hictk/balancing/impl/sparse_matrix_impl.hpp index ca77c1b4..aa9d16ef 100644 --- a/src/libhictk/balancing/include/hictk/balancing/impl/sparse_matrix_impl.hpp +++ b/src/libhictk/balancing/include/hictk/balancing/impl/sparse_matrix_impl.hpp @@ -370,17 +370,10 @@ inline void SparseMatrixChunked::marginalize(MargsVector& marg, BS::thread_pool* fs.exceptions(_fs.exceptions()); fs.open(_path, std::ios::in | std::ios::binary); auto matrix = _matrix; - MargsVector marg_local(marg.size()); for (const auto offset : nonstd::span(_index).subspan(istart, iend - istart)) { fs.seekg(offset); matrix.deserialize(fs, *zstd_dctx); - matrix.marginalize(marg_local, nullptr, false); - } - - for (std::size_t i = 0; i < marg_local.size(); ++i) { - if (marg_local[i] != 0) { - marg.add(i, marg_local[i]); - } + matrix.marginalize(marg, nullptr, false); } }; @@ -413,17 +406,11 @@ inline void SparseMatrixChunked::marginalize_nnz(MargsVector& marg, BS::thread_p fs.exceptions(_fs.exceptions()); fs.open(_path, std::ios::in | std::ios::binary); auto matrix = _matrix; - MargsVector marg_local(marg.size()); for (const auto offset : nonstd::span(_index).subspan(istart, iend - istart)) { fs.seekg(offset); matrix.deserialize(fs, *zstd_dctx); matrix.marginalize_nnz(marg, nullptr, false); } - for (std::size_t i = 0; i < marg_local.size(); ++i) { - if (marg_local[i] != 0) { - marg.add(i, marg_local[i]); - } - } }; assert(!marg.empty()); @@ -457,16 +444,10 @@ inline void SparseMatrixChunked::times_outer_product_marg(MargsVector& marg, fs.exceptions(_fs.exceptions()); fs.open(_path, std::ios::in | std::ios::binary); auto matrix = _matrix; - MargsVector marg_local(marg.size()); for (const auto offset : nonstd::span(_index).subspan(istart, iend - istart)) { fs.seekg(offset); matrix.deserialize(fs, *zstd_dctx); - matrix.times_outer_product_marg(marg_local, biases, weights, nullptr, false); - } - for (std::size_t i = 0; i < marg.size(); ++i) { - if (marg_local[i] != 0) { - marg.add(i, marg_local[i]); - } + matrix.times_outer_product_marg(marg, biases, weights, nullptr, false); } };