From 77b7e1f9b0b529eb8ddf3dacae68c6c28d6ef122 Mon Sep 17 00:00:00 2001 From: Christopher Woods Date: Thu, 27 Jul 2023 18:13:17 +0100 Subject: [PATCH] Backported all of the changes --- corelib/src/libs/SireIO/filetrajectory.cpp | 41 ++++++++++---- corelib/src/libs/SireIO/filetrajectory.h | 4 ++ corelib/src/libs/SireIO/pdb2.cpp | 63 +++++++++++++++++++--- corelib/src/libs/SireMol/selectorm.hpp | 16 +++++- doc/source/changelog.rst | 15 +++++- src/sire/mol/_trajectory.py | 2 +- 6 files changed, 120 insertions(+), 21 deletions(-) diff --git a/corelib/src/libs/SireIO/filetrajectory.cpp b/corelib/src/libs/SireIO/filetrajectory.cpp index a3157647c..66b2b1608 100644 --- a/corelib/src/libs/SireIO/filetrajectory.cpp +++ b/corelib/src/libs/SireIO/filetrajectory.cpp @@ -98,15 +98,20 @@ SIREIO_EXPORT QDataStream &operator>>(QDataStream &ds, FileTrajectory &file) return ds; } -FileTrajectory::FileTrajectory() : TrajectoryData() +FileTrajectory::FileTrajectory() + : TrajectoryData(), last_loaded_frame_index(-1) { } -FileTrajectory::FileTrajectory(const MoleculeParser &p) : TrajectoryData(), parser(p) +FileTrajectory::FileTrajectory(const MoleculeParser &p) + : TrajectoryData(), parser(p), last_loaded_frame_index(-1) { } -FileTrajectory::FileTrajectory(const FileTrajectory &other) : TrajectoryData(other), parser(other.parser) +FileTrajectory::FileTrajectory(const FileTrajectory &other) + : TrajectoryData(other), parser(other.parser), + last_loaded_frame(other.last_loaded_frame), + last_loaded_frame_index(other.last_loaded_frame_index) { } @@ -178,25 +183,43 @@ Frame FileTrajectory::getFrame(int i) const { i = SireID::Index(i).map(this->nFrames()); + // It is often the case that we will repeatedly load the same frame + // Check for this here + QMutexLocker lkr(const_cast(&(this->frame_mutex))); + FileTrajectory *nonconst_this = const_cast(this); + + if (i == last_loaded_frame_index) + { + return this->last_loaded_frame; + } + + // Ok - we are loading a different frame - see if this has been cached auto cached_data = CentralCache::get(QString("FileTrajectory::%1::%2").arg(qint64(this)).arg(i)); - QMutexLocker lkr(cached_data->mutex()); + QMutexLocker lkr2(cached_data->mutex()); if (not cached_data->isEmpty()) { try { - return cached_data->data().value(); + nonconst_this->last_loaded_frame = cached_data->data().value(); + lkr2.unlock(); + nonconst_this->last_loaded_frame_index = i; + return this->last_loaded_frame; } catch (...) { } } - // something went wrong - read the frame again - auto frame = parser.read().getFrame(i); - cached_data->setData(QVariant::fromValue(frame), frame.numBytes()); - return frame; + // something went wrong - either the frame was corrupt or we + // haven't loaded it yet + nonconst_this->last_loaded_frame = parser.read().getFrame(i); + nonconst_this->last_loaded_frame_index = i; + cached_data->setData(QVariant::fromValue(this->last_loaded_frame), + this->last_loaded_frame.numBytes()); + lkr2.unlock(); + return this->last_loaded_frame; } bool FileTrajectory::isEditable() const diff --git a/corelib/src/libs/SireIO/filetrajectory.h b/corelib/src/libs/SireIO/filetrajectory.h index eba8cad32..9dbcc71fe 100644 --- a/corelib/src/libs/SireIO/filetrajectory.h +++ b/corelib/src/libs/SireIO/filetrajectory.h @@ -83,6 +83,10 @@ namespace SireIO private: MoleculeParserPtr parser; + + QMutex frame_mutex; + SireMol::Frame last_loaded_frame; + int last_loaded_frame_index; }; } // namespace SireIO diff --git a/corelib/src/libs/SireIO/pdb2.cpp b/corelib/src/libs/SireIO/pdb2.cpp index 62aa6ebe6..b5724dfc4 100644 --- a/corelib/src/libs/SireIO/pdb2.cpp +++ b/corelib/src/libs/SireIO/pdb2.cpp @@ -2263,9 +2263,12 @@ int PDB2::parseWholeMolecule(const SireMol::Molecule &molecule, QVector PDBAtom atom(atoms(i), is_ter[i], map, errors); last_atom = atom; + // Set the serial number. + atom.setSerial(1 + iline++); + // Generate a PDB atom data record. auto record = atom.toPDBRecord(); - record.replace(6, 5, QString::number(1 + iline++).rightJustified(5, ' ')); + record.replace(6, 5, QString::number(atom.getNumber()).rightJustified(5, ' ')); last_record = record; // If this record follows a TER, yet belongs to the same chain, then @@ -2280,11 +2283,18 @@ int PDB2::parseWholeMolecule(const SireMol::Molecule &molecule, QVector atom_lines.append(record); prev_ter = false; + // Work out the serial number for the TER record. + auto ter_serial = 1 + atom.getNumber(); + + // Cap serial number to formatting width. + if (ter_serial > 99999) + ter_serial = 99999; + // Add a TER record for this atom. if (is_ter[i]) { atom_lines.append(QString("TER %1 %2 %3\%4\%5") - .arg(iline + 1, 5) + .arg(ter_serial, 5) .arg(record.mid(17, 3)) .arg(record.at(21)) .arg(record.mid(22, 4)) @@ -2292,13 +2302,23 @@ int PDB2::parseWholeMolecule(const SireMol::Molecule &molecule, QVector prev_ter = true; prev_chain = atom.getChainID(); + iline++; } } } // Now append all of the post-TER records. for (auto &line : post_ter_lines) - atom_lines.append(line.replace(6, 5, QString::number(1 + iline++).rightJustified(5, ' '))); + { + // Work out the serial number. + auto serial = 1 + iline++; + + // Cap serial number to formatting width. + if (serial > 99999) + serial = 99999; + + atom_lines.append(line.replace(6, 5, QString::number(serial).rightJustified(5, ' '))); + } return iline; } @@ -2440,9 +2460,12 @@ int PDB2::parseMolecule(const SireMol::MoleculeView &sire_mol, QVector PDBAtom atom(molecule.atom(atomidx), is_ter[i], map, errors); last_atom = atom; + // Set the serial number. + atom.setSerial(1 + iline++); + // Generate a PDB atom data record. auto record = atom.toPDBRecord(); - record.replace(6, 5, QString::number(1 + iline++).rightJustified(5, ' ')); + record.replace(6, 5, QString::number(atom.getNumber()).rightJustified(5, ' ')); last_record = record; // If this record follows a TER, yet belongs to the same chain, then @@ -2457,11 +2480,18 @@ int PDB2::parseMolecule(const SireMol::MoleculeView &sire_mol, QVector atom_lines.append(record); prev_ter = false; + // Work out the serial number for the TER record. + auto ter_serial = 1 + atom.getNumber(); + + // Cap serial number to formatting width. + if (ter_serial > 99999) + ter_serial = 99999; + // Add a TER record for this atom. if (is_ter[i]) { atom_lines.append(QString("TER %1 %2 %3\%4\%5") - .arg(iline + 1, 5) + .arg(ter_serial, 5) .arg(record.mid(17, 3)) .arg(record.at(21)) .arg(record.mid(22, 4)) @@ -2469,6 +2499,7 @@ int PDB2::parseMolecule(const SireMol::MoleculeView &sire_mol, QVector prev_ter = true; prev_chain = atom.getChainID(); + iline++; } } } @@ -2479,12 +2510,21 @@ int PDB2::parseMolecule(const SireMol::MoleculeView &sire_mol, QVector if (last_record.isEmpty()) atom_lines.append("TER"); else + { + // Work out the serial number for the TER record. + auto ter_serial = 1 + iline++; + + // Cap serial number to formatting width. + if (ter_serial > 99999) + ter_serial = 99999; + atom_lines.append(QString("TER %1 %2 %3\%4\%5") - .arg(iline + 1, 5) + .arg(ter_serial, 5) .arg(last_record.mid(17, 3)) .arg(last_record.at(21)) .arg(last_record.mid(22, 4)) .arg(last_record.at(26))); + } prev_ter = true; prev_chain = last_atom.getChainID(); @@ -2493,7 +2533,16 @@ int PDB2::parseMolecule(const SireMol::MoleculeView &sire_mol, QVector // Now append all of the post-TER records. for (auto &line : post_ter_lines) - atom_lines.append(line.replace(6, 5, QString::number(1 + iline++).rightJustified(5, ' '))); + { + // Work out the serial number. + auto serial = 1 + iline++; + + // Cap serial number to formatting width. + if (serial > 99999) + serial = 99999; + + atom_lines.append(line.replace(6, 5, QString::number(serial).rightJustified(5, ' '))); + } return iline; } diff --git a/corelib/src/libs/SireMol/selectorm.hpp b/corelib/src/libs/SireMol/selectorm.hpp index d956209c8..d7ca39e0c 100644 --- a/corelib/src/libs/SireMol/selectorm.hpp +++ b/corelib/src/libs/SireMol/selectorm.hpp @@ -376,9 +376,21 @@ namespace SireMol vws.detach(); const int n = vws.count(); + // make sure that the frame has been loaded into the cache + // before we loop in parallel - this will stop contention + // from threads that are copying data from the thread that + // wants to load the frame + if (n == 0) + return; + + vws[0].loadFrame(frame, map); + + if (n == 1) + return; + if (_usesParallel(vws, map)) { - tbb::parallel_for(tbb::blocked_range(0, n), [&](tbb::blocked_range r) + tbb::parallel_for(tbb::blocked_range(1, n), [&](tbb::blocked_range r) { for (int i = r.begin(); i < r.end(); ++i) { @@ -387,7 +399,7 @@ namespace SireMol } else { - for (int i = 0; i < n; ++i) + for (int i = 1; i < n; ++i) { vws[i].loadFrame(frame, map); } diff --git a/doc/source/changelog.rst b/doc/source/changelog.rst index f9cfca00d..8b21b603d 100644 --- a/doc/source/changelog.rst +++ b/doc/source/changelog.rst @@ -15,6 +15,12 @@ organisation on `GitHub `__. `2023.3.1 `__ - July 2023 ----------------------------------------------------------------------------------------- +* Fixed a bug in ``analyse_freenrg`` which produced incorrect TI results + when not all lambda windows were run for equal lengths of time. + +* Make sure atom serial number in PDB files are capped when renumbering when + TER records are present. + * Fixed a bug in the AmberRst parser where velocities were written with the wrong unit (A ps-1 instead of AKMA time). Also added the correct labels to the AmberRst file. @@ -24,8 +30,13 @@ organisation on `GitHub `__. * Fixed a bug in the writing of DCD headers, meaning that the files couldn't be read by other DCD reader software (written non-compliant header) -* Fixed a bug in ``analyse_freenrg`` which produced incorrect TI results - when not all lambda windows were run for equal lengths of time. +* Fixed a bug in the trajectory measure code, where the ProgressBar class was + not being properly imported (`fix_88 `__). + +* Fixed a deadlock in the file trajectory loading code. This was because multiple threads + trying to read the same frame lead to starvation of the thread that had progressed to + read the frame. Now a single thread loads the frame, with subsequent threads using + this cached load (`fix_88 `__). * Optimised the speed of viewing large molecules in NGLView, plus of searching for water molecules. Added a new ``is_water`` function. Optimised the diff --git a/src/sire/mol/_trajectory.py b/src/sire/mol/_trajectory.py index c302a87fb..2bb769e91 100644 --- a/src/sire/mol/_trajectory.py +++ b/src/sire/mol/_trajectory.py @@ -677,7 +677,7 @@ def _custom_measures(self, func, to_pandas): colnames.append(key) columns.append(np.zeros(nframes, dtype=float)) - from ..utils import Console + from ..base import ProgressBar with ProgressBar( total=nframes, text="Looping through frames"