-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
fd4b122
commit c9ba148
Showing
4 changed files
with
216 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
{ | ||
"aldehyde": ["template_1", "template_2", "template_3", "template_4"], | ||
"ketone": ["template_1", "template_2", "template_3", "template_4"], | ||
"ester": ["template_2", "template_3", "template_4"], | ||
"acyl_chloride": ["template_2", "template_3", "template_4"], | ||
"amid": ["template_4"], | ||
"carboxylic_acid": ["template_4"], | ||
"other": ["template_1"] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
import re | ||
import copy | ||
from typing import List, Dict, Any | ||
from joblib import Parallel, delayed | ||
from synrbl.SynChemImputer.reduction_template import ReductionTemplate | ||
import rdkit.RDLogger as RDLogger | ||
|
||
RDLogger.DisableLog("rdApp.*") | ||
|
||
|
||
class CurateReduction: | ||
|
||
@staticmethod | ||
def check_for_isolated_hydrogen(smiles: str) -> bool: | ||
|
||
pattern = r"\[H\](?![^[]*\])" | ||
return bool(re.search(pattern, smiles)) | ||
|
||
@staticmethod | ||
def curate( | ||
reaction_dict: Dict[str, Any], | ||
reaction_column: str = "reactions", | ||
compound_template: Dict[str, Any] = None, | ||
all_templates: Dict = None, | ||
return_all: bool = False, | ||
) -> Dict[str, Any]: | ||
|
||
new_reaction_dict = copy.deepcopy(reaction_dict) | ||
reactions = reaction_dict.get(reaction_column, []) | ||
# print(reactions) | ||
if not reactions: | ||
return reaction_dict # Early return if no reactions are found | ||
|
||
# Process the first reaction for simplification | ||
curate_reaction = ReductionTemplate.reduction_template( | ||
reactions, compound_template, all_templates, return_all | ||
) | ||
new_reaction_dict["curated_reaction"] = curate_reaction | ||
new_reaction_dict["radical"] = CurateReduction.check_for_isolated_hydrogen( | ||
curate_reaction[0] if curate_reaction else "" | ||
) | ||
|
||
return new_reaction_dict | ||
|
||
@classmethod | ||
def parallel_curate( | ||
cls, | ||
reaction_list: List[Dict[str, Any]], | ||
reaction_column: str = "reactions", | ||
compound_template: Dict[str, Any] = None, | ||
all_templates: Dict = None, | ||
return_all: bool = False, | ||
n_jobs: int = 4, | ||
verbose: int = 1, | ||
) -> List[Dict[str, Any]]: | ||
|
||
results = Parallel(n_jobs=n_jobs, verbose=verbose)( | ||
delayed(cls.curate)( | ||
reaction, reaction_column, compound_template, all_templates, return_all | ||
) | ||
for reaction in reaction_list | ||
) | ||
return results |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
from typing import List, Dict, Union | ||
from rdkit import Chem | ||
from fgutils import FGQuery | ||
import rdkit.RDLogger as RDLogger | ||
|
||
RDLogger.DisableLog("rdApp.*") | ||
|
||
|
||
class ReductionTemplate: | ||
@staticmethod | ||
def count_radical_isolated_hydrogens(smiles): | ||
|
||
mol = Chem.MolFromSmiles(smiles) | ||
|
||
# Initialize count for isolated radical hydrogens | ||
hydrogen_count = 0 | ||
|
||
# Iterate over all atoms in the molecule | ||
for atom in mol.GetAtoms(): | ||
# Check if the atom is a hydrogen atom | ||
if atom.GetAtomicNum() == 1: | ||
# Check if the hydrogen atom is isolated (has no neighbors) | ||
if len(atom.GetNeighbors()) == 0: | ||
# Check if the hydrogen is a radical (has unpaired electrons) | ||
if atom.GetNumRadicalElectrons() > 0: | ||
hydrogen_count += 1 | ||
|
||
return hydrogen_count | ||
|
||
@staticmethod | ||
def find_reactive_functional_groups(reaction_smiles: str) -> List[str]: | ||
query = FGQuery(use_smiles=True) | ||
reactant, product = reaction_smiles.split(">>") | ||
fg_reactant = query.get(reactant) | ||
fg_product = query.get(product) | ||
fg_reactant = [value[0] for value in fg_reactant] | ||
fg_product = [value[0] for value in fg_product] | ||
return [fg for fg in fg_reactant if fg not in fg_product] | ||
|
||
@staticmethod | ||
def process_template( | ||
reaction_smiles: str, | ||
neutralize: bool = False, | ||
all_templates: Dict = None, | ||
template: str = None, | ||
) -> str: | ||
if template is None: | ||
selected_template = all_templates[ | ||
0 | ||
] # Default to template_1 if none provided | ||
else: | ||
selected_template = all_templates[template] | ||
reactants, products = reaction_smiles.split(">>") | ||
hydrogen_count = ReductionTemplate.count_radical_hydrogens(reactants) | ||
if hydrogen_count % 2 != 0: | ||
return reaction_smiles | ||
hh_count = hydrogen_count // 2 | ||
reactant_list = [x for x in reactants.split(".") if x != "[H]"] | ||
product_list = products.split(".") | ||
template_type = "neutral" if neutralize else "ion" | ||
for _ in range(hh_count): | ||
reactant_list.extend(selected_template[template_type]["reactants"]) | ||
product_list.extend(selected_template[template_type]["products"]) | ||
updated_reactants = ".".join(reactant_list) | ||
updated_products = ".".join(product_list) | ||
return f"{updated_reactants}>>{updated_products}" | ||
|
||
@classmethod | ||
def reduction_template( | ||
cls, | ||
reaction_smiles: str, | ||
compound_template: Dict, | ||
all_templates: Dict = None, | ||
return_all: bool = False, | ||
) -> Union[str, List[str]]: | ||
try: | ||
fg_reactive = cls.find_reactive_functional_groups(reaction_smiles) | ||
if len(fg_reactive) == 0: | ||
fg_reactive = ["other"] | ||
processed_smiles = [] | ||
for group, templates in compound_template.items(): | ||
if group in fg_reactive: | ||
# print(f"Processing {group} with template {templates}") | ||
processed_smiles.extend( | ||
[ | ||
cls.process_template( | ||
reaction_smiles, | ||
neutralize=False, | ||
all_templates=all_templates, | ||
template=tpl, | ||
) | ||
for tpl in templates | ||
] | ||
) | ||
return ( | ||
processed_smiles | ||
if return_all | ||
else (processed_smiles[0] if processed_smiles else None) | ||
) | ||
except Exception as e: | ||
print(e) | ||
return [reaction_smiles] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
{ | ||
"template_1": { | ||
"neutral": { | ||
"reactants": ["[HH]"], | ||
"products": [] | ||
}, | ||
"ion": { | ||
"reactants": ["[HH]"], | ||
"products": [] | ||
} | ||
}, | ||
"template_2": { | ||
"neutral": { | ||
"reactants": ["[BH4-]", "[Na+]", "Cl"], | ||
"products": ["[BH3]", "[Na][Cl]"] | ||
}, | ||
"ion": { | ||
"reactants": ["[BH4-]", "[Na+]", "[H+]"], | ||
"products": ["[BH3]", "[Na+]"] | ||
} | ||
}, | ||
"template_3": { | ||
"neutral": { | ||
"reactants": ["[BH3-]C#N", "[Na+]", "Cl"], | ||
"products": ["[BH2]C#N", "[Na][Cl]"] | ||
}, | ||
"ion": { | ||
"reactants": ["[BH3-]C#N", "[Na+]", "[H+]"], | ||
"products": ["[BH2]C#N", "[Na+]"] | ||
} | ||
}, | ||
"template_4": { | ||
"neutral": { | ||
"reactants": ["[AlH4-]", "[Li+]", "Cl"], | ||
"products": ["[AlH3]", "[Li][Cl]"] | ||
}, | ||
"ion": { | ||
"reactants": ["[AlH4-]", "[Na+]", "[H+]"], | ||
"products": ["[AlH3]", "[Na+]"] | ||
} | ||
} | ||
} |