Skip to content

Commit

Permalink
LAB-1466 best of each design checkpoint (#960)
Browse files Browse the repository at this point in the history
  • Loading branch information
supraja-968 authored Apr 22, 2024
1 parent 3b13d37 commit fe82a38
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 4 deletions.
4 changes: 2 additions & 2 deletions tools/colabdesign/colabdesign.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"name": "colabdesign v1.4",
"name": "colabdesign v1.5",
"description": "RFdiffusion is a method for structure generation, with or without conditional information (a motif, target etc). It can perform a whole range of protein design challenges. This version includes creating checkpoints for this model",
"guide": "Note: steps marked (optional) and (advanced) give additional constraints for the designs but it is not necessary to add/edit their input in order to submit an experimental run.\n\n1. Select a .pdb file containing the *target protein* you would like to design a binder against.\n2. (optional) Select a *target chain* from the pdb file (chain A is the default). \n3. (optional) Crop the target structure by specifying the *target start residue* and the *target end residue* on the target chain. \n4. (optional) Define *hotspot* residues on the target as the binding sites, stating the chain (e.g. A) followed by the number of the residue (e.g. A30). You can specify multiple hotspots at once.\n5. Specify the *binder length* (total number of residues) you wish to design, followed by the *number of binders* to design (max 8 per run). \n6. (advanced) Define a *contigs override* that constrains the binder design to a subset of the residues. The contig consists of information on the target and the binder, separated by a colon. Residues which are to be kept fixed, are indicated by the chain label followed by the range of residue numbers. A number without a chain label indicates the number of residues to be designed.", "author": "sokrypton",
"github": "https://github.com/sokrypton/ColabDesign",
Expand All @@ -12,7 +12,7 @@
],
"arguments": [
],
"dockerPull": "quay.io/labdao/colabdesign@sha256:92ce1b8474fbcc398a5cf037be8cc21695183e087c991d3515789c44a9a0e542",
"dockerPull": "quay.io/labdao/colabdesign@sha256:570fa90433f2db9bb5cbc18d01a9c23110f898a7c1e1978e85db90959274db89",
"gpuBool": true,
"memoryGB": 12,
"cpu": 3,
Expand Down
8 changes: 6 additions & 2 deletions tools/colabdesign/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,12 @@ def add_deepest_keys_to_dataframe(deepest_keys_values, df_results):

def create_and_upload_checkpoints(df_results, result_csv_path, flow_uuid, job_uuid):
checkpoint_csv_path = os.path.dirname(result_csv_path)
for index, row in df_results.iterrows():

df_sorted = df_results.sort_values(by=['design', 'rmsd'], ascending=[True, True])
best_per_design = df_sorted.groupby('design').first()
best_per_design.reset_index(inplace=True)

for index, row in best_per_design.iterrows():
plddt_for_checkpoint = row['plddt'] *100
i_pae_for_checkpoint = row['i_pae']
design = row['design']
Expand All @@ -121,7 +126,6 @@ def create_and_upload_checkpoints(df_results, result_csv_path, flow_uuid, job_uu
new_df.to_csv(event_csv_filepath, index= False)

bucket_name = "app-checkpoint-bucket"
time.sleep(2)
object_name = f"checkpoints/{flow_uuid}/{job_uuid}/checkpoint_{index}"
upload_to_s3(event_csv_filepath, bucket_name, f"{object_name}/{event_csv_filename}")
upload_to_s3(pdb_path, bucket_name, f"{object_name}/{pdb_file_name}")
Expand Down

0 comments on commit fe82a38

Please sign in to comment.