Skip to content

Commit

Permalink
add reduction template
Browse files Browse the repository at this point in the history
  • Loading branch information
TieuLongPhan committed Apr 16, 2024
1 parent fd4b122 commit c9ba148
Show file tree
Hide file tree
Showing 4 changed files with 216 additions and 0 deletions.
9 changes: 9 additions & 0 deletions synrbl/SynChemImputer/compounds_template.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"aldehyde": ["template_1", "template_2", "template_3", "template_4"],
"ketone": ["template_1", "template_2", "template_3", "template_4"],
"ester": ["template_2", "template_3", "template_4"],
"acyl_chloride": ["template_2", "template_3", "template_4"],
"amid": ["template_4"],
"carboxylic_acid": ["template_4"],
"other": ["template_1"]
}
63 changes: 63 additions & 0 deletions synrbl/SynChemImputer/curate_reduction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import re
import copy
from typing import List, Dict, Any
from joblib import Parallel, delayed
from synrbl.SynChemImputer.reduction_template import ReductionTemplate
import rdkit.RDLogger as RDLogger

RDLogger.DisableLog("rdApp.*")


class CurateReduction:

@staticmethod
def check_for_isolated_hydrogen(smiles: str) -> bool:

pattern = r"\[H\](?![^[]*\])"
return bool(re.search(pattern, smiles))

@staticmethod
def curate(
reaction_dict: Dict[str, Any],
reaction_column: str = "reactions",
compound_template: Dict[str, Any] = None,
all_templates: Dict = None,
return_all: bool = False,
) -> Dict[str, Any]:

new_reaction_dict = copy.deepcopy(reaction_dict)
reactions = reaction_dict.get(reaction_column, [])
# print(reactions)
if not reactions:
return reaction_dict # Early return if no reactions are found

# Process the first reaction for simplification
curate_reaction = ReductionTemplate.reduction_template(
reactions, compound_template, all_templates, return_all
)
new_reaction_dict["curated_reaction"] = curate_reaction
new_reaction_dict["radical"] = CurateReduction.check_for_isolated_hydrogen(
curate_reaction[0] if curate_reaction else ""
)

return new_reaction_dict

@classmethod
def parallel_curate(
cls,
reaction_list: List[Dict[str, Any]],
reaction_column: str = "reactions",
compound_template: Dict[str, Any] = None,
all_templates: Dict = None,
return_all: bool = False,
n_jobs: int = 4,
verbose: int = 1,
) -> List[Dict[str, Any]]:

results = Parallel(n_jobs=n_jobs, verbose=verbose)(
delayed(cls.curate)(
reaction, reaction_column, compound_template, all_templates, return_all
)
for reaction in reaction_list
)
return results
102 changes: 102 additions & 0 deletions synrbl/SynChemImputer/reduction_template.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
from typing import List, Dict, Union
from rdkit import Chem
from fgutils import FGQuery
import rdkit.RDLogger as RDLogger

RDLogger.DisableLog("rdApp.*")


class ReductionTemplate:
@staticmethod
def count_radical_isolated_hydrogens(smiles):

mol = Chem.MolFromSmiles(smiles)

# Initialize count for isolated radical hydrogens
hydrogen_count = 0

# Iterate over all atoms in the molecule
for atom in mol.GetAtoms():
# Check if the atom is a hydrogen atom
if atom.GetAtomicNum() == 1:
# Check if the hydrogen atom is isolated (has no neighbors)
if len(atom.GetNeighbors()) == 0:
# Check if the hydrogen is a radical (has unpaired electrons)
if atom.GetNumRadicalElectrons() > 0:
hydrogen_count += 1

return hydrogen_count

@staticmethod
def find_reactive_functional_groups(reaction_smiles: str) -> List[str]:
query = FGQuery(use_smiles=True)
reactant, product = reaction_smiles.split(">>")
fg_reactant = query.get(reactant)
fg_product = query.get(product)
fg_reactant = [value[0] for value in fg_reactant]
fg_product = [value[0] for value in fg_product]
return [fg for fg in fg_reactant if fg not in fg_product]

@staticmethod
def process_template(
reaction_smiles: str,
neutralize: bool = False,
all_templates: Dict = None,
template: str = None,
) -> str:
if template is None:
selected_template = all_templates[
0
] # Default to template_1 if none provided
else:
selected_template = all_templates[template]
reactants, products = reaction_smiles.split(">>")
hydrogen_count = ReductionTemplate.count_radical_hydrogens(reactants)
if hydrogen_count % 2 != 0:
return reaction_smiles
hh_count = hydrogen_count // 2
reactant_list = [x for x in reactants.split(".") if x != "[H]"]
product_list = products.split(".")
template_type = "neutral" if neutralize else "ion"
for _ in range(hh_count):
reactant_list.extend(selected_template[template_type]["reactants"])
product_list.extend(selected_template[template_type]["products"])
updated_reactants = ".".join(reactant_list)
updated_products = ".".join(product_list)
return f"{updated_reactants}>>{updated_products}"

@classmethod
def reduction_template(
cls,
reaction_smiles: str,
compound_template: Dict,
all_templates: Dict = None,
return_all: bool = False,
) -> Union[str, List[str]]:
try:
fg_reactive = cls.find_reactive_functional_groups(reaction_smiles)
if len(fg_reactive) == 0:
fg_reactive = ["other"]
processed_smiles = []
for group, templates in compound_template.items():
if group in fg_reactive:
# print(f"Processing {group} with template {templates}")
processed_smiles.extend(
[
cls.process_template(
reaction_smiles,
neutralize=False,
all_templates=all_templates,
template=tpl,
)
for tpl in templates
]
)
return (
processed_smiles
if return_all
else (processed_smiles[0] if processed_smiles else None)
)
except Exception as e:
print(e)
return [reaction_smiles]
42 changes: 42 additions & 0 deletions synrbl/SynChemImputer/reduction_templates.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{
"template_1": {
"neutral": {
"reactants": ["[HH]"],
"products": []
},
"ion": {
"reactants": ["[HH]"],
"products": []
}
},
"template_2": {
"neutral": {
"reactants": ["[BH4-]", "[Na+]", "Cl"],
"products": ["[BH3]", "[Na][Cl]"]
},
"ion": {
"reactants": ["[BH4-]", "[Na+]", "[H+]"],
"products": ["[BH3]", "[Na+]"]
}
},
"template_3": {
"neutral": {
"reactants": ["[BH3-]C#N", "[Na+]", "Cl"],
"products": ["[BH2]C#N", "[Na][Cl]"]
},
"ion": {
"reactants": ["[BH3-]C#N", "[Na+]", "[H+]"],
"products": ["[BH2]C#N", "[Na+]"]
}
},
"template_4": {
"neutral": {
"reactants": ["[AlH4-]", "[Li+]", "Cl"],
"products": ["[AlH3]", "[Li][Cl]"]
},
"ion": {
"reactants": ["[AlH4-]", "[Na+]", "[H+]"],
"products": ["[AlH3]", "[Na+]"]
}
}
}

0 comments on commit c9ba148

Please sign in to comment.