This repository has been archived by the owner on Oct 19, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 111
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
102 changed files
with
1,973 additions
and
835 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
FROM docker.io/continuumio/conda-ci-linux-64-python3.7:latest | ||
|
||
USER root | ||
|
||
RUN apt-get update && \ | ||
apt-get -y install rsync procps && \ | ||
wget https://sourceforge.net/projects/lmod/files/lua-5.1.4.9.tar.bz2 && \ | ||
tar xf lua-5.1.4.9.tar.bz2 && \ | ||
cd lua-5.1.4.9 && \ | ||
./configure --prefix=/opt/apps/lua/5.1.4.9 && \ | ||
make; make install && \ | ||
cd /opt/apps/lua; ln -s 5.1.4.9 lua && \ | ||
ln -s /opt/apps/lua/lua/bin/lua /usr/local/bin && \ | ||
ln -s /opt/apps/lua/lua/bin/luac /usr/local/bin && \ | ||
cd; wget https://sourceforge.net/projects/lmod/files/Lmod-8.2.tar.bz2 && \ | ||
tar xf Lmod-8.2.tar.bz2 && \ | ||
cd Lmod-8.2; ./configure --prefix=/opt/apps --with-fastTCLInterp=no && \ | ||
make install && \ | ||
ln -s /opt/apps/lmod/lmod/init/profile /etc/profile.d/z00_lmod.sh | ||
|
||
ENV LMOD_ROOT=/opt/apps/lmod \ | ||
LMOD_PKG=/opt/apps/lmod/lmod \ | ||
LMOD_VERSION=8.2 \ | ||
LMOD_CMD=/opt/apps/lmod/lmod/libexec/lmod \ | ||
LMOD_DIR=/opt/apps/lmod/lmod/libexec \ | ||
BASH_ENV=/opt/apps/lmod/lmod/init/bash | ||
|
||
COPY . /reinventcli/ | ||
|
||
WORKDIR /reinventcli | ||
|
||
RUN conda update -n base -c defaults conda && \ | ||
conda env update --name=base --file=reinvent.yml && \ | ||
chmod -R "a+rx" /reinventcli |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
[Dolphin] | ||
Timestamp=2022,4,8,15,57,33 | ||
Version=3 |
4 changes: 4 additions & 0 deletions
4
running_modes/automated_curriculum_learning/actions/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
from running_modes.automated_curriculum_learning.actions.base_action import BaseAction | ||
from running_modes.automated_curriculum_learning.actions.base_sample_action import BaseSampleAction | ||
from running_modes.automated_curriculum_learning.actions.lib_invent_sample_model import LibInventSampleModel | ||
from running_modes.automated_curriculum_learning.actions.link_invent_sample_model import LinkInventSampleModel |
11 changes: 11 additions & 0 deletions
11
running_modes/automated_curriculum_learning/actions/base_action.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
import abc | ||
from running_modes.automated_curriculum_learning.logging.base_logger import BaseLogger | ||
|
||
|
||
class BaseAction(abc.ABC): | ||
def __init__(self, logger=None): | ||
""" | ||
(Abstract) Initializes an action. | ||
:param logger: An optional logger instance. | ||
""" | ||
self.logger: BaseLogger = logger |
11 changes: 11 additions & 0 deletions
11
running_modes/automated_curriculum_learning/actions/base_sample_action.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
import numpy as np | ||
from running_modes.automated_curriculum_learning.actions import BaseAction | ||
|
||
|
||
class BaseSampleAction(BaseAction): | ||
|
||
def _get_indices_of_unique_smiles(self, smiles: [str]) -> np.array: | ||
"""Returns an np.array of indices corresponding to the first entries in a list of smiles strings""" | ||
_, idxs = np.unique(smiles, return_index=True) | ||
sorted_indices = np.sort(idxs) | ||
return sorted_indices |
68 changes: 68 additions & 0 deletions
68
running_modes/automated_curriculum_learning/actions/lib_invent_sample_model.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
from typing import List | ||
|
||
import numpy as np | ||
from reinvent_chemistry import Conversions | ||
from reinvent_chemistry.library_design import BondMaker, AttachmentPoints | ||
from reinvent_models.lib_invent.models.dataset import Dataset | ||
from reinvent_models.model_factory.generative_model_base import GenerativeModelBase | ||
from torch.utils.data import DataLoader | ||
|
||
from running_modes.automated_curriculum_learning.actions import BaseSampleAction | ||
from running_modes.automated_curriculum_learning.dto.sampled_sequences_dto import SampledSequencesDTO | ||
|
||
|
||
class LibInventSampleModel(BaseSampleAction): | ||
|
||
def __init__(self, model: GenerativeModelBase, batch_size: int, logger=None, randomize=False, sample_uniquely=True): | ||
""" | ||
Creates an instance of SampleModel. | ||
:params model: A model instance (better in scaffold_decorating mode). | ||
:params batch_size: Batch size to use. | ||
:return: | ||
""" | ||
super().__init__(logger) | ||
self.model = model | ||
self._batch_size = batch_size | ||
self._bond_maker = BondMaker() | ||
self._attachment_points = AttachmentPoints() | ||
self._randomize = randomize | ||
self._conversions = Conversions() | ||
self._sample_uniquely = sample_uniquely | ||
|
||
def run(self, scaffold_list: List[str]) -> List[SampledSequencesDTO]: | ||
""" | ||
Samples the model for the given number of SMILES. | ||
:params scaffold_list: A list of scaffold SMILES. | ||
:return: A list of SampledSequencesDTO. | ||
""" | ||
scaffold_list = self._randomize_scaffolds(scaffold_list) if self._randomize else scaffold_list | ||
clean_scaffolds = [self._attachment_points.remove_attachment_point_numbers(scaffold) for scaffold in scaffold_list] | ||
dataset = Dataset(clean_scaffolds, self.model.get_vocabulary().scaffold_vocabulary, | ||
self.model.get_vocabulary().scaffold_tokenizer) | ||
dataloader = DataLoader(dataset, batch_size=len(dataset), shuffle=False, collate_fn=Dataset.collate_fn) | ||
|
||
for batch in dataloader: | ||
sampled_sequences = [] | ||
|
||
for _ in range(self._batch_size): | ||
scaffold_seqs, scaffold_seq_lengths = batch | ||
packed = self.model.sample(scaffold_seqs, scaffold_seq_lengths) | ||
for scaffold, decoration, nll in packed: | ||
sampled_sequences.append(SampledSequencesDTO(scaffold, decoration, nll)) | ||
|
||
if self._sample_uniquely: | ||
sampled_sequences = self._sample_unique_sequences(sampled_sequences) | ||
|
||
return sampled_sequences | ||
|
||
def _sample_unique_sequences(self, sampled_sequences: List[SampledSequencesDTO]) -> List[SampledSequencesDTO]: | ||
strings = ["".join([ss.input, ss.output]) for index, ss in enumerate(sampled_sequences)] | ||
unique_idxs = self._get_indices_of_unique_smiles(strings) | ||
sampled_sequences_np = np.array(sampled_sequences) | ||
unique_sampled_sequences = sampled_sequences_np[unique_idxs] | ||
return unique_sampled_sequences.tolist() | ||
|
||
def _randomize_scaffolds(self, scaffolds: List[str]): | ||
scaffold_mols = [self._conversions.smile_to_mol(scaffold) for scaffold in scaffolds] | ||
randomized = [self._bond_maker.randomize_scaffold(mol) for mol in scaffold_mols] | ||
return randomized |
72 changes: 72 additions & 0 deletions
72
running_modes/automated_curriculum_learning/actions/link_invent_sample_model.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
from typing import List | ||
|
||
import numpy as np | ||
from reinvent_chemistry import Conversions, TransformationTokens | ||
from reinvent_chemistry.library_design import BondMaker, AttachmentPoints | ||
from reinvent_models.link_invent.dataset.dataset import Dataset | ||
from reinvent_models.model_factory.generative_model_base import GenerativeModelBase | ||
from torch.utils.data import DataLoader | ||
|
||
from running_modes.automated_curriculum_learning.actions import BaseSampleAction | ||
from running_modes.automated_curriculum_learning.dto.sampled_sequences_dto import SampledSequencesDTO | ||
|
||
|
||
class LinkInventSampleModel(BaseSampleAction): | ||
def __init__(self, model: GenerativeModelBase, batch_size: int, logger=None, randomize=False, sample_uniquely=True): | ||
""" | ||
Creates an instance of SampleModel. | ||
:params model: A model instance. | ||
:params batch_size: Batch size to use. | ||
:return: | ||
""" | ||
super().__init__(logger) | ||
self.model = model | ||
self._batch_size = batch_size | ||
self._bond_maker = BondMaker() | ||
self._randomize = randomize | ||
self._sample_uniquely = sample_uniquely | ||
|
||
self._conversions = Conversions() | ||
self._attachment_points = AttachmentPoints() | ||
self._tokens = TransformationTokens() | ||
|
||
def run(self, warheads_list: List[str]) -> List[SampledSequencesDTO]: | ||
""" | ||
Samples the model for the given number of SMILES. | ||
:params warheads_list: A list of warhead pair SMILES. | ||
:return: A list of SampledSequencesDTO. | ||
""" | ||
warheads_list = self._randomize_warheads(warheads_list) if self._randomize else warheads_list | ||
clean_warheads = [self._attachment_points.remove_attachment_point_numbers(warheads) for warheads in warheads_list] | ||
dataset = Dataset(clean_warheads, self.model.get_vocabulary().input) | ||
data_loader = DataLoader(dataset, batch_size=len(dataset), shuffle=False, collate_fn=dataset.collate_fn) | ||
|
||
for batch in data_loader: | ||
sampled_sequences = [] | ||
for _ in range(self._batch_size): | ||
sampled_sequences.extend(self.model.sample(*batch)) | ||
|
||
if self._sample_uniquely: | ||
sampled_sequences = self._sample_unique_sequences(sampled_sequences) | ||
|
||
return sampled_sequences | ||
|
||
def _sample_unique_sequences(self, sampled_sequences: List[SampledSequencesDTO]) -> List[SampledSequencesDTO]: | ||
# TODO could be part of a base sample action as it is the same for link and lib invent | ||
strings = ["".join([ss.input, ss.output]) for index, ss in enumerate(sampled_sequences)] | ||
unique_idxs = self._get_indices_of_unique_smiles(strings) | ||
sampled_sequences_np = np.array(sampled_sequences) | ||
unique_sampled_sequences = sampled_sequences_np[unique_idxs] | ||
return unique_sampled_sequences.tolist() | ||
|
||
def _randomize_warheads(self, warhead_pair_list: List[str]): | ||
randomized_warhead_pair_list = [] | ||
for warhead_pair in warhead_pair_list: | ||
warhead_list = warhead_pair.split(self._tokens.ATTACHMENT_SEPARATOR_TOKEN) | ||
warhead_mol_list = [self._conversions.smile_to_mol(warhead) for warhead in warhead_list] | ||
warhead_randomized_list = [self._conversions.mol_to_random_smiles(mol) for mol in warhead_mol_list] | ||
# Note do not use self.self._bond_maker.randomize_scaffold, as it would add unwanted brackets to the | ||
# attachment points (which are not part of the warhead vocabulary) | ||
warhead_pair_randomized = self._tokens.ATTACHMENT_SEPARATOR_TOKEN.join(warhead_randomized_list) | ||
randomized_warhead_pair_list.append(warhead_pair_randomized) | ||
return randomized_warhead_pair_list |
38 changes: 38 additions & 0 deletions
38
running_modes/automated_curriculum_learning/actions/reinvent_sample_model.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
from typing import Tuple, Any | ||
|
||
import numpy as np | ||
from reinvent_chemistry import Conversions | ||
from reinvent_models.model_factory.generative_model_base import GenerativeModelBase | ||
|
||
from running_modes.automated_curriculum_learning.actions import BaseSampleAction | ||
from running_modes.automated_curriculum_learning.dto import SampledBatchDTO | ||
|
||
|
||
class ReinventSampleModel(BaseSampleAction): | ||
def __init__(self, model: GenerativeModelBase, batch_size: int, logger=None): | ||
""" | ||
Creates an instance of SampleModel. | ||
:params model: A model instance. | ||
:params batch_size: Batch size to use. | ||
:return: | ||
""" | ||
super().__init__(logger) | ||
self.model = model | ||
self._batch_size = batch_size | ||
|
||
self._conversions = Conversions() | ||
|
||
def run(self) -> SampledBatchDTO: | ||
seqs, smiles, agent_likelihood = self._sample_unique_sequences(self.model, self._batch_size) | ||
batch = SampledBatchDTO(seqs, smiles, agent_likelihood) | ||
|
||
return batch | ||
|
||
def _sample_unique_sequences(self, agent: GenerativeModelBase, batch_size: int) -> Tuple[Any, Any, Any]: | ||
seqs, smiles, agent_likelihood = agent.sample(batch_size) | ||
unique_idxs = self._get_indices_of_unique_smiles(smiles) | ||
seqs_unique = seqs[unique_idxs] | ||
smiles_np = np.array(smiles) | ||
smiles_unique = smiles_np[unique_idxs] | ||
agent_likelihood_unique = agent_likelihood[unique_idxs] | ||
return seqs_unique, smiles_unique, agent_likelihood_unique |
Oops, something went wrong.