RMeli · RMeli · Jun 23, 2024 · Mar 11, 2024 · Mar 11, 2024 · Mar 12, 2024
diff --git a/spyrmsd/rmsd.py b/spyrmsd/rmsd.py
@@ -1,3 +1,8 @@
+import os
+from concurrent.futures import ProcessPoolExecutor
+from functools import partial
+from multiprocessing import Process, Value
+from multiprocessing.sharedctypes import Synchronized
 from typing import Any, List, Optional, Tuple, Union
 
 import numpy as np
@@ -307,7 +312,7 @@ def rmsdwrapper(
     cache: bool = True,
 ) -> Any:
     """
-    Compute RMSD between two molecule.
+    Compute RMSD between two molecules.
 
     Parameters
     ----------
@@ -373,3 +378,189 @@ def rmsdwrapper(
             )
 
     return RMSDlist
+
+
+def _rmsd_process(
+    molref: molecule.Molecule,
+    mols: Union[molecule.Molecule, List[molecule.Molecule]],
+    result: Synchronized,
+    symmetry: bool = True,
+    center: bool = False,
+    minimize: bool = False,
+    strip: bool = True,
+    cache: bool = True,
+) -> Any:
+    """
+    Compute RMSD between two molecules and put it in a queue.
+
+    Parameters
+    ----------
+    molref: molecule.Molecule
+        Reference molecule
+    mols: Union[molecule.Molecule, List[molecule.Molecule]]
+        Molecules to compare to reference molecule
+    result: Value
+        The shared memory Value where the RMSD result is stored
+    symmetry: bool, optional
+        Symmetry-corrected RMSD (using graph isomorphism)
+    center: bool, optional
+        Center molecules at origin
+    minimize: bool, optional
+        Minimised RMSD (using the quaternion polynomial method)
+    strip: bool, optional
+        Strip hydrogen atoms
+
+    Returns
+    -------
+    None
+        Stores the output RMSD in the shared "result" variable.
+    """
+    result.value = rmsdwrapper(
+        molref=molref,
+        mols=mols,
+        symmetry=symmetry,
+        center=center,
+        minimize=minimize,
+        strip=strip,
+        cache=cache,
+    )[0]
+
+
+def _rmsd_timeout(
+    molref: molecule.Molecule,
+    mols: Union[molecule.Molecule, List[molecule.Molecule]],
+    symmetry: bool = True,
+    center: bool = False,
+    minimize: bool = False,
+    strip: bool = True,
+    cache: bool = True,
+    timeout: Optional[float] = None,
+) -> Any:
+    """
+    Compute RMSD between two molecules with a timeout.
+
+    Parameters
+    ----------
+    molref: molecule.Molecule
+        Reference molecule
+    mols: Union[molecule.Molecule, List[molecule.Molecule]]
+        Molecules to compare to reference molecule
+    symmetry: bool, optional
+        Symmetry-corrected RMSD (using graph isomorphism)
+    center: bool, optional
+        Center molecules at origin
+    minimize: bool, optional
+        Minimised RMSD (using the quaternion polynomial method)
+    strip: bool, optional
+        Strip hydrogen atoms
+    timeout: float, optional
+        After how many seconds to stop the RMSD calculations
+
+    Returns
+    -------
+    List[float]
+        RMSDs
+
+    Notes
+    -----
+    Timeout implementation inspired by https://superfastpython.com/task-with-timeout-child-process/
+    """
+
+    if not isinstance(mols, list):
+        mols = [mols]
+
+    # RMSD is computed by the child process
+    # The results need to be shared with the parent process
+    # https://docs.python.org/3/library/multiprocessing.html#sharing-state-between-processes
+    result = Value("f")
+    process = Process(
+        target=_rmsd_process,
+        args=(molref, mols, result, symmetry, center, minimize, strip, cache),
+    )
+
+    process.start()
+    process.join(timeout=timeout)
+
+    # Check if the process finished running successfully
+    if process.exitcode != 0:
+        # Actually terminate the process if it still running
+        if process.is_alive():
+            process.terminate()
+
+        return [np.nan] * len(mols)
+    else:
+        # Retrieve the result from the finished job.
+        # Currently MyPy gives an error, it's being worked on: https://github.com/python/typeshed/issues/8799
+        return result.value  # type: ignore[attr-defined]
+
+
+def prmsdwrapper(
+    molrefs: Union[molecule.Molecule, List[molecule.Molecule]],
+    mols: Union[molecule.Molecule, List[molecule.Molecule]],
+    num_workers: Union[int, None] = 1,
+    symmetry: bool = True,
+    center: bool = False,
+    minimize: bool = False,
+    strip: bool = True,
+    cache: bool = True,
+    timeout: Optional[float] = None,
+) -> Any:
+    """
+    Compute RMSD between two molecules with a timeout.
+
+    Parameters
+    ----------
+    molrefs: Union[molecule.Molecule, List[molecule.Molecule]]
+        Reference molecule
+    mols: Union[molecule.Molecule, List[molecule.Molecule]]
+        Molecules to compare to reference molecule
+    num_workers: int
+        Amount of processor to use for the parallel calculations
+    symmetry: bool, optional
+        Symmetry-corrected RMSD (using graph isomorphism)
+    center: bool, optional
+        Center molecules at origin
+    minimize: bool, optional
+        Minimised RMSD (using the quaternion polynomial method)
+    strip: bool, optional
+        Strip hydrogen atoms
+    timeout: float, optional
+        After how many seconds to stop the RMSD calculations
+
+    Returns
+    -------
+    List[float]
+        RMSDs
+    """
+
+    # Ensure the num_workers is less or equal than the max number of CPUs.
+    # MyPy doesn't like the min() operator since os.cpu_count() can return None in some cases
+    num_workers = min(num_workers, os.cpu_count()) if os.cpu_count() is not None else 1  # type: ignore[type-var]
+
+    # Cast the molecules to lists if they aren't already
+    if not isinstance(molrefs, list):
+        molrefs = [molrefs]
+    if not isinstance(mols, list):
+        mols = [mols]
+
+    # Match the length of the molref
+    if len(molrefs) == 1 and len(molrefs) < len(mols):
+        molrefs = molrefs * len(mols)
+
+    # Ensure molrefs and mols have the same len
+    if not len(molrefs) == len(mols):
+        raise ValueError("The input mol lists have different lengths")
+
+    with ProcessPoolExecutor(max_workers=num_workers) as executor:
+        rsmd_partial = partial(
+            _rmsd_timeout,
+            symmetry=symmetry,
+            center=center,
+            minimize=minimize,
+            strip=strip,
+            cache=cache,
+            timeout=timeout,
+        )
+        results = executor.map(rsmd_partial, molrefs, mols)
+
+    return list(results)