Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RMSD with multicore and timeout functionality #113

Merged
merged 30 commits into from
Jun 23, 2024
Merged
Changes from 1 commit
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
8ecd2c2
First implementation of rmsd function with timeout and multicore func…
Jnelen Mar 11, 2024
4ecdbae
Merge branch 'develop' into timeout
RMeli Mar 11, 2024
56a1eae
Merge branch 'develop' into timeout
Jnelen Mar 12, 2024
52e1e21
Apply suggestions from code review
Jnelen Mar 12, 2024
1353fb5
Fix Flake8 problems
Jnelen Mar 12, 2024
0fe91a7
Slight improvements to _rmsd_timeout
Jnelen Mar 12, 2024
56a215b
Verbose implementation of improved functionality:
Jnelen Mar 12, 2024
8dcc22b
Transition from using a Queue to a shared Value
Jnelen Mar 13, 2024
1fe4f9a
Finish transition to multiprocessing Values
Jnelen Mar 13, 2024
7e0c311
Suppress MyPy errors at line 494 and 538
Jnelen Mar 14, 2024
713b833
Add basic tests for prmsdwrapper
Jnelen Mar 17, 2024
9491130
Apply suggestions from code review
Jnelen Mar 17, 2024
28c41a3
Initial version of the multicore rmsdwrapper with timeout feature usi…
Jnelen Apr 7, 2024
fa25924
Revert rmsd.py and test_rmsd.py
Jnelen Apr 8, 2024
0ff8b65
Merge latest develop commit
Jnelen Apr 9, 2024
dc1b0db
Add importskip if Pebble isn't installed
Jnelen Apr 9, 2024
1258956
Include Pebble installation for the RDKit CI tests
Jnelen Apr 10, 2024
54e28a1
Adress the a lot of the review comments
Jnelen Apr 11, 2024
ead245b
add chunksize check when timeout is set with prmsdwrapper
Jnelen Apr 14, 2024
cef2b0d
Add more tests for prmsdwrapper
Jnelen Apr 22, 2024
1a0527b
Simplify num_worker determination
Jnelen May 16, 2024
c0b0fa0
Merge branch 'develop' into timeout
RMeli May 28, 2024
20d1965
Incorporate suggested changes from review
Jnelen May 30, 2024
bca1889
Merge branch 'timeout' of https://github.com/Jnelen/spyrmsd into timeout
Jnelen May 30, 2024
5729f6b
Improve documentation regarding chunksize enforcement
Jnelen May 31, 2024
abe6353
Merge branch 'RMeli:develop' into timeout
Jnelen Jun 21, 2024
dbade96
Update the Changelog
Jnelen Jun 21, 2024
70765d5
Update tests/test_parallel.py
Jnelen Jun 21, 2024
73ed019
Fix formatting
Jnelen Jun 21, 2024
10a47be
More extensive testing
Jnelen Jun 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
180 changes: 179 additions & 1 deletion spyrmsd/rmsd.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ def rmsdwrapper(
cache: bool = True,
) -> Any:
"""
Compute RMSD between two molecule.
Compute RMSD between two molecules.

Parameters
----------
Expand Down Expand Up @@ -373,3 +373,181 @@ def rmsdwrapper(
)

return RMSDlist


import os
from concurrent.futures import ProcessPoolExecutor
from functools import partial
from multiprocessing import Process, Queue
Jnelen marked this conversation as resolved.
Show resolved Hide resolved


def _rmsd_queue(
molref: molecule.Molecule,
mols: Union[molecule.Molecule, List[molecule.Molecule]],
queue: Queue,
symmetry: bool = True,
center: bool = False,
minimize: bool = False,
strip: bool = True,
cache: bool = True,
) -> Any:
"""
Compute RMSD between two molecules and put it in a queue.
RMeli marked this conversation as resolved.
Show resolved Hide resolved

Parameters
----------
molref: molecule.Molecule
Reference molecule
mols: Union[molecule.Molecule, List[molecule.Molecule]]
Molecules to compare to reference molecule
queue: Queue
The queue where to put the result
symmetry: bool, optional
Symmetry-corrected RMSD (using graph isomorphism)
center: bool, optional
Center molecules at origin
minimize: bool, optional
Minimised RMSD (using the quaternion polynomial method)
strip: bool, optional
Strip hydrogen atoms

Returns
-------
None
The RMSD result is put in the queue.
"""
queue.put(
rmsdwrapper(
molref=molref,
mols=mols,
symmetry=symmetry,
center=center,
minimize=minimize,
strip=strip,
cache=cache,
)
)


def rmsd_timeout(
molref: molecule.Molecule,
mols: Union[molecule.Molecule, List[molecule.Molecule]],
symmetry: bool = True,
center: bool = False,
minimize: bool = False,
strip: bool = True,
cache: bool = True,
timeout: Optional[float] = 5, ## Do you think this is a good default timeout value?
Jnelen marked this conversation as resolved.
Show resolved Hide resolved
) -> Any:
"""
Compute RMSD between two molecules with a timeout.

Parameters
----------
molref: molecule.Molecule
Reference molecule
mols: Union[molecule.Molecule, List[molecule.Molecule]]
Molecules to compare to reference molecule
symmetry: bool, optional
Symmetry-corrected RMSD (using graph isomorphism)
center: bool, optional
Center molecules at origin
minimize: bool, optional
Minimised RMSD (using the quaternion polynomial method)
strip: bool, optional
Strip hydrogen atoms
timeout: float, optional
After how many seconds to stop the RMSD calculations

Returns
-------
List[float]
RMSDs
"""

if not isinstance(mols, list):
mols = [mols]

## Inspired by https://superfastpython.com/task-with-timeout-child-process/
Jnelen marked this conversation as resolved.
Show resolved Hide resolved
queue = Queue()
Jnelen marked this conversation as resolved.
Show resolved Hide resolved
process = Process(
target=_rmsd_queue,
args=(molref, mols, queue, symmetry, center, minimize, strip, cache),
Jnelen marked this conversation as resolved.
Show resolved Hide resolved
)

process.start()
process.join(timeout=timeout)

## Check if the process finished running successfully
Jnelen marked this conversation as resolved.
Show resolved Hide resolved
if not process.exitcode == 0:
Jnelen marked this conversation as resolved.
Show resolved Hide resolved
## Actually terminate the process
Jnelen marked this conversation as resolved.
Show resolved Hide resolved
process.terminate()

## Match the length of the mols list (Maybe this should be handled differently?)
Jnelen marked this conversation as resolved.
Show resolved Hide resolved
return [None] * len(mols)
Jnelen marked this conversation as resolved.
Show resolved Hide resolved
else:
## Retrieve the result from the finished job
Jnelen marked this conversation as resolved.
Show resolved Hide resolved
return queue.get()
Jnelen marked this conversation as resolved.
Show resolved Hide resolved


def rmsd_parallel(
molrefs: List[molecule.Molecule],
mols: List[molecule.Molecule],
RMeli marked this conversation as resolved.
Show resolved Hide resolved
num_workers: int = 1,
RMeli marked this conversation as resolved.
Show resolved Hide resolved
symmetry: bool = True,
center: bool = False,
minimize: bool = False,
strip: bool = True,
cache: bool = True,
timeout: Optional[float] = None,
) -> Any:
"""
Compute RMSD between two molecules with a timeout.

Parameters
----------
molrefs: molecule.Molecule
Reference molecule
mols: Union[molecule.Molecule, List[molecule.Molecule]]
Molecules to compare to reference molecule
num_workers: int
Amount of processor to use for the parallel calculations
symmetry: bool, optional
Symmetry-corrected RMSD (using graph isomorphism)
center: bool, optional
Center molecules at origin
minimize: bool, optional
Minimised RMSD (using the quaternion polynomial method)
strip: bool, optional
Strip hydrogen atoms
timeout: float, optional
After how many seconds to stop the RMSD calculations

Returns
-------
List[float]
RMSDs
"""

## Ensure the num_workers is less or equal than the max number of CPUs
if num_workers > os.cpu_count():
## Maybe we should raise some kind of a warning here?
num_workers = os.cpu_count()
Jnelen marked this conversation as resolved.
Show resolved Hide resolved

## Ensure molrefs and mols have the same len
Jnelen marked this conversation as resolved.
Show resolved Hide resolved
if not len(molrefs) == len(mols):
raise ValueError("The input mol lists have different lengths")

with ProcessPoolExecutor(max_workers=num_workers) as executor:
RMeli marked this conversation as resolved.
Show resolved Hide resolved
rsmd_partial = partial(
rmsd_timeout,
symmetry=symmetry,
center=center,
minimize=minimize,
strip=strip,
cache=cache,
timeout=timeout,
)
results = executor.map(rsmd_partial, molrefs, mols)

return list(results)
Loading