Skip to content

Commit

Permalink
Merge pull request #22 from TieuLongPhan/dev
Browse files Browse the repository at this point in the history
Fix performance issues
  • Loading branch information
klausweinbauer authored May 14, 2024
2 parents ce618bb + e6b589d commit 2147d6f
Show file tree
Hide file tree
Showing 6 changed files with 23 additions and 52 deletions.
5 changes: 5 additions & 0 deletions Test/SynUtils/test_chem_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,8 @@ def test_remove_stereochemistry(self):
smiles = "C[C@H][C@@H]"
result = normalize_smiles(smiles)
self.assertEqual("CCC", result)

def test_edge_case_1(self):
smiles = "F[Sb@OH12](F)(F)(F)(F)F"
result = normalize_smiles(smiles)
self.assertEqual("F[Sb](F)(F)(F)(F)F", result)
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "hatchling.build"

[project]
name = "synrbl"
version = "0.0.11"
version = "0.0.12"
authors = [
{name="Tieu Long Phan", email="long.tieu_phan@uni-leipzig.de"},
{name="Klaus Weinbauer", email="klaus@bioinf.uni-leipzig.de"}
Expand Down
50 changes: 1 addition & 49 deletions synrbl/SynMCSImputer/MissingGraph/find_graph_dict.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import pandas as pd

from rdkit import Chem
from rdkit.rdBase import BlockLogs
from joblib import Parallel, delayed
from typing import List
import multiprocessing
Expand Down Expand Up @@ -88,55 +87,9 @@ def find_single_graph_parallel(mcs_mol_list, sorted_reactants_mol_list, n_jobs=4
- 'issue' (str): Any issues encountered during processing.
"""

# def process_single_pair(reactant_mol, mcs_mol):
# try:
# block = BlockLogs()
# (
# mols,
# boundary_atoms_products,
# nearest_neighbor_products,
# ) = FindMissingGraphs.find_missing_parts_pairs(reactant_mol, mcs_mol)
# del block
# return {
# "smiles": [
# Chem.MolToSmiles(mol) if mol is not None else None for mol in mols
# ],
# "boundary_atoms_products": boundary_atoms_products,
# "nearest_neighbor_products": nearest_neighbor_products,
# "issue": "",
# }
# except Exception as e:
# return {
# "smiles": [],
# "boundary_atoms_products": [],
# "nearest_neighbor_products": [],
# "issue": str(e),
# }

# def process_single_pair_safe(reactant_mol, mcs_mol, job_timeout=5):
# pool = multiprocessing.Pool(1)
# async_result = pool.apply_async(
# process_single_pair,
# (
# reactant_mol,
# mcs_mol,
# ),
# )
# try:
# return async_result.get(job_timeout)
# except multiprocessing.TimeoutError:
# return {
# "smiles": [],
# "boundary_atoms_products": [],
# "nearest_neighbor_products": [],
# "issue": "Find Missing Graph terminated by timeout.",
# }
# finally:
# pool.terminate() # Terminate the pool to release resources
def process_single_pair(reactant_mol, mcs_mol, job_timeout=2):
try:
block = BlockLogs()
pool = multiprocessing.Pool(1)
pool = multiprocessing.pool.ThreadPool(1)
async_result = pool.apply_async(
FindMissingGraphs.find_missing_parts_pairs,
(
Expand All @@ -146,7 +99,6 @@ def process_single_pair(reactant_mol, mcs_mol, job_timeout=2):
)
result = async_result.get(job_timeout)
pool.terminate() # Terminate the pool to release resources
del block
return {
"smiles": [
Chem.MolToSmiles(mol) if mol is not None else None
Expand Down
3 changes: 3 additions & 0 deletions synrbl/SynMCSImputer/MissingGraph/find_missing_graphs.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import copy

from rdkit import Chem
from rdkit.rdBase import BlockLogs
from synrbl.SynMCSImputer.SubStructure.substructure_analyzer import SubstructureAnalyzer
from synrbl.SynMCSImputer.MissingGraph.molcurator import MoleculeCurator
from typing import List, Dict, Optional, Tuple
Expand Down Expand Up @@ -74,6 +75,7 @@ def find_missing_parts_pairs(
- list of lists: Each sublist contains the nearest neighbors of the
corresponding molecule.
"""
block = BlockLogs()
missing_parts_list = []
boundary_atoms_lists = []
nearest_neighbor_lists = []
Expand Down Expand Up @@ -198,6 +200,7 @@ def find_missing_parts_pairs(
boundary_atoms_lists.append(None)
nearest_neighbor_lists.append(None)

del block
return missing_parts_list, boundary_atoms_lists, nearest_neighbor_lists

@staticmethod
Expand Down
6 changes: 6 additions & 0 deletions synrbl/SynMCSImputer/SubStructure/mcs_graph_detector.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import gc

from rdkit import Chem
from rdkit.Chem import rdFMCS
from rdkit.Chem import rdRascalMCES
Expand Down Expand Up @@ -79,6 +81,7 @@ def IterativeMCSReactionPairs(
"""

# Sort reactants based on the specified method
mcs_results = []
if sort == "MCIS":
if params is None:
params = rdFMCS.MCSParameters()
Expand Down Expand Up @@ -116,6 +119,9 @@ def IterativeMCSReactionPairs(
else:
raise ValueError("Invalid sort method. Choose 'MCS' or 'Fragments'.")

del mcs_results
gc.collect()

mcs_list = []
current_product = product_mol
for reactant, _ in sorted_reactants:
Expand Down
9 changes: 7 additions & 2 deletions synrbl/SynUtils/chem_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,14 @@ def remove_atom_mapping(smiles: str) -> str:
return smiles


def remove_stereo_chemistry(smiles: str) -> str:
pattern = re.compile(r"\[(?P<atom>(\w+))@+\w+\]")
smiles = pattern.sub(r"[\g<atom>]", smiles)
return smiles


def normalize_smiles(smiles: str) -> str:
smiles = smiles.replace("@", "")
smiles = remove_stereo_chemistry(smiles)
if ">>" in smiles:
return ">>".join([normalize_smiles(t) for t in smiles.split(">>")])
elif "." in smiles:
Expand Down Expand Up @@ -250,7 +256,6 @@ def count_radical_atoms(smiles: str, atomic_num: int) -> int:

# Iterate over all atoms in the molecule
for atom in mol.GetAtoms():

if atom.GetAtomicNum() == atomic_num and atom.GetNumRadicalElectrons() > 0:
# Further check if the atom is isolated (has no neighbors)
if len(atom.GetNeighbors()) == 0:
Expand Down

0 comments on commit 2147d6f

Please sign in to comment.