From ce0435525bf4cbe2c35935a64a00957956350d53 Mon Sep 17 00:00:00 2001 From: Dien Nguyen Date: Thu, 26 Oct 2023 12:36:16 -0400 Subject: [PATCH 01/35] original docking_utils.py file --- api/utils/docking_utils.py | 497 +++++++++++++++++++++++++++++++++++++ 1 file changed, 497 insertions(+) create mode 100755 api/utils/docking_utils.py diff --git a/api/utils/docking_utils.py b/api/utils/docking_utils.py new file mode 100755 index 0000000..755007f --- /dev/null +++ b/api/utils/docking_utils.py @@ -0,0 +1,497 @@ +from flask import Flask, flash, request, redirect, url_for, send_from_directory +from flask_restx import Api +from flask.templating import render_template +from werkzeug.utils import secure_filename +import re +import os +import math +import shutil +import json +import subprocess +import random +import sys +from datetime import date + + +def hex_docking(rec_lig,rec_lig2,receptor, ligand, date,docking_pdb_path): + +# Function to call Hex, including hard coded settings + + + code = """ open_receptor """ + docking_pdb_path + """results/receptor_to_dock/""" + receptor + """.pdb +open_ligand """ + docking_pdb_path +"""results/ligand_to_dock/""" + ligand + """.pdb +docking_correlation 1 +docking_score_threshold 0 +max_docking_solutions 50 +docking_receptor_stepsize 5.50 +docking_ligand_stepsize 5.50 +docking_alpha_stepsize 2.80 +docking_main_scan 16 +receptor_origin C-825:VAL-O +commit_edits +activate_docking +save_range 1 100 """ + docking_pdb_path + """results/%s_folder_%s/%s/result %s pdb""" % (rec_lig, date, rec_lig2, rec_lig) + subprocess.Popen('/home/yyu/hex/bin/hex', stdin=subprocess.PIPE, stderr=subprocess.STDOUT).communicate(bytes(code.encode('utf-8'))) + + + + +def best_result(file_name, monomer, rec_lig, receptor, ligand): + + # Function to generate the "best docking results", being the result with the best score and with the residue with the best contact frequency + + file_name_dir = str('/home/vlau/BAR_API_HEX_Jan_23/docking_test_pdbs/results/'+ receptor + '_' + ligand + '_folder_' + str(date.today()) + '/' + receptor + '_' + monomer + '_' + ligand + '/result/') #directory for the docking results + file_name_path = str(file_name_dir + file_name[:-20] + '.pdb') #directory for the result, identifies as the best result + des1 = file_name_dir + 'best_docking_results_for_'+ file_name[:-24] + '.pdb' #destination directory for the best_docking_result file + shutil.copyfile(file_name_path,des1) + + #Same thing done with the ligand file only + ori2='/home/vlau/BAR_API_HEX_Jan_23/docking_test_pdbs/results/'+ receptor + '_' + ligand + '_folder_' + str(date.today()) + '/' + receptor + '_' + monomer + '_' + ligand + '/ligand_reserved_pdb/' + file_name + des2='/home/vlau/BAR_API_HEX_Jan_23/docking_test_pdbs/results/'+ receptor + '_' + ligand + '_folder_' + str(date.today()) + '/' + receptor + '_' + monomer + '_' + ligand + '/ligand_reserved_pdb/best_docking_results.pdb' + shutil.copyfile(ori2,des2) + + + # This is to create a copy of that file with 'Z' as the name of the chain in the ligand, + # it is important for the 3dsjmol visualization + + with open(str(file_name_dir + 'best_docking_results_for_' + file_name[:-24] + '.pdb'), 'r') as file: #to not modify the chain name for the protein chains + lines = file.readlines() + subpart1 = lines[:lines.index( + 'REMARK Docked ligand coordinates...\n')] #subpart 1 is from start to 1st line in ligand coordinates + subpart2 = lines[lines.index( + 'REMARK Docked ligand coordinates...\n'):] #subpart 2 from 1st line in ligand coordinates to end of file + with open(str(file_name_dir + 'best_docking_results_for_' + receptor + '_' + monomer + '_' + ligand + '.pdb'), 'w') as file: + for l in subpart1: + file.write(l) + for line in subpart2: + if line[0:4] == 'ATOM' or line[:6] == 'HETATM' or line[:3] == 'TER': + newline = line[:21] + 'Z' + line[22:] + file.write(newline) + else: + file.write(line) + print('best docking result file is generated for ' + file_name[:-24]) + + + + +def separate_results(monomer, file_dir, first_file_name, dir_final, monomers_list): + + # Function to separate the multimer file into its monomers for every result file created by hex + + ends = [] #this list will be modified with the indices of every monomer's terminal line + the first coordinate's line index + # Open the .pdb file to separate + with open (file_dir + first_file_name, 'r+') as r: + lines = r.readlines() + for l in lines: + if l.startswith('ATOM 1 '): + ends.append(lines.index(l)) #and save the index of the first coordinate's line in the list ends + + # Searches the .pdb files for the lines that indicate the end of a chain + for l in lines: + if l[0:3] == 'TER': + ends.append(lines.index(l)) #and add their indexes in the ends list + + if os.path.isdir(dir_final) == False: #create folder to dump the new monomer file or files + os.makedirs(dir_final) + + # LOGIC:The end of the previous chain is the start of the current one, + start_pos = ends[monomers_list.index(monomer)] + end_pos = ends[monomers_list.index(monomer)+1] + + # It copies every line that is not referencing an atom coordinates + # or that it is in the range of the monomer we want to isolate + file_list = os.listdir(file_dir) + for r in file_list: #for every result file: + file_path = str(file_dir + '/' + r) + new_file_path = str(dir_final + r[:-4] + '_' + monomer + '.pdb') #create a new result file which will include only one protein chain, not all + with open(file_path, 'r') as file: + lines = [line for line in file.readlines()] + # Dump in the new file everything before the first coordinate line + between the lines that contain + # the monomer coordinates + after the last receptor's coordinates + lines = lines[:ends[1]] + lines[start_pos:end_pos] + lines[ends[-1]:] + with open(new_file_path, 'w') as file: + file.writelines(lines) + + + + +def separate_monomers(monomer, file_dir, file_name, dir_final, monomers_list): + + # Function to separate the original protein pdb file in its monomers + + # Open the .pdb file to separate + with open (file_dir + '/' + file_name + '.pdb', 'r+') as r: + lines = r.readlines() + ends = [0] + + # Searches the .pdb files for the lines that indicate the end of a chain + for l in lines: + if l[0:3] == 'TER': + ends.append(lines.index(l)) + if os.path.isdir(dir_final) == False: + os.makedirs(dir_final) + monomer_pdb = open(dir_final + '/' + file_name + '_' + monomer + '.pdb', 'a+') + + + # The end of the previous chain is the start of the current one, + # 0 was previously included in the list ends to be the start of the first chain + start_pos = ends[monomers_list.index(monomer)] + end_pos = ends[monomers_list.index(monomer)+1] + + # It copies every line that is not referencing an atom coordinates + # or that it is in the range of the monomer we want to isolate + for l in lines: + if l[0:4] != 'ATOM' or lines.index(l) in range(start_pos, end_pos): + monomer_pdb.write(l) + # It needs to copy also the ligand data (if there is any) which is labeled with SDF + elif l[17:20] == 'SDF': + monomer_pdb.write(l) + + + + + +def ligand_reserved(monomer, rec_lig, receptor, ligand,docking_pdb_path): + + # Function to separate the ligand coordinates of every solution, it's useful to simply the calculation of the contact frequencies + + dir_path = str(docking_pdb_path + '/results/'+ rec_lig + '_folder_' + str(date.today()) + '/' + receptor + '_' + monomer + '_' + ligand + '/result') #results directory + print('Isolating ' + rec_lig + '_' + monomer) + + os.makedirs(docking_pdb_path + '/results/'+ rec_lig + '_folder_' + str(date.today()) + '/' + receptor + '_' + monomer + '_' + ligand + '/ligand_reserved_pdb') #ligand_reserved directory + file_list = os.listdir(dir_path) + result_list = [] + + # Some operative system will create hidden files, the script consider .pdb files only + for i in file_list: + if i[0] != '.' and len(i.split('.')) == 2 and i.split('.')[1] == 'pdb': + result_list.append(i) + for r in result_list: + file_path = str(dir_path + '/' + r) + ligand_reserved_file_path = str(docking_pdb_path + '/results/'+ rec_lig + '_folder_' + str(date.today()) + '/' + receptor + '_' + monomer + '_' + ligand + '/ligand_reserved_pdb/' + r[:-4] + '_ligand_reserved.pdb') + with open(file_path, 'r') as file: + lines = [line for line in file.readlines()] + # Everything below the line 'REMARK Docked ligand coordinates...' is data of the ligand + lines = lines[lines.index('REMARK Docked ligand coordinates...\n'):] + with open(ligand_reserved_file_path, 'w') as file: + file.writelines(lines) + + + + + +def result_dict_generator(threshold, monomer, rec_lig, receptor, ligand): + + # Function to calculate the contact frequencies of every amino acid + + result_dir_path = str('/home/vlau/BAR_API_HEX_Jan_23/docking_test_pdbs/results/'+ rec_lig + '_folder_'+ str(date.today()) + '/' + receptor + '_' + monomer + '_' + ligand + '/ligand_reserved_pdb/') #directory for the results files, the ligand only ones we created with the ligand_reserved function! + receptor_file_path = str('/home/vlau/BAR_API_HEX_Jan_23/docking_test_pdbs/results/receptor_to_dock/monomers/'+ receptor + '_' + monomer + '.pdb') #directory for the receptor protein pdb file + + # Store every receptor's atom coordinates information as a nested dictionary called 'reference' + with open(receptor_file_path, 'r') as file: + reference = {} + for line in file.readlines(): + if line[0:4] == 'ATOM': + if int(line[22:27]) in reference: + reference[int(line[22:27])][int(line[6:11])] = tuple(map(float, filter(None, line[31:54].split(' ')))) + else: + reference[int(line[22:27])] = {int(line[6:11]) : tuple(map(float, filter(None, line[31:54].split(' '))))} + + #so the reference is {residue: {atom :(x, y, z)}} + + # The energy for each reference element will be stored in dictionary 'ac' + ac = {} + file_list = os.listdir(result_dir_path) + result_list = [] + + # Generate the list for all .pdb names in the directory + for i in file_list: + if i[0] != '.' and len(i.split('.')) == 2 and i.split('.')[1] == 'pdb': + result_list.append(i) + + en_list = [] #future list of energies + file_names = [] #future list of file names + resi_list = [] #future list of aa + + #reading the first file and saving its lines will make things much quicker for the rest of them + first_file_path = str(result_dir_path + receptor + '_' + ligand + '0001_' + monomer + '_ligand_reserved.pdb') + z=open(first_file_path) + lines_first=z.readlines() + x=lines_first[2] + print (x) + + + # Store energy values for each ligand_reserved file + for r in result_list: + print('current file:' + r) + energy = '' + file_path = str(result_dir_path + r) + + with open(file_path) as file: + lines = file.readlines() + for l in lines: + if 'REMARK' in l.split(' ') and 'Energy' in l.split(' '): + # The energy is divided by the number of results to + # later obtain an average energy when we will sum the + energy = (float(l.split(' ')[6][:-1]))/(len(result_list)) + # Generate file and energy list by order + file_names.append(str(r)) + en_list.append(energy) + + # Go over every coordinate of atoms in the ligand_reserved file and store into coor + coor = [tuple(map(float, filter(None, line[31:54].split(' ')))) + for line in lines if line[0:4] == 'ATOM'] + lst = [] + + for res in reference.keys(): # for each amino acid in the receptor file: + distances = [] + + for atom in coor: # for each atom of the ligand + + for aa in reference[res].keys(): # for each atom of that amino acid + # check if the distance between atoms of the ligands + # and of the amino acid are lower than chosen threshold (5) + distances.append(math.sqrt((reference[res][aa][0] - atom[0]) ** 2 + (reference[res][aa][1] - atom[1])** 2 + + (reference[res][aa][2] - atom[2]) ** 2)) + + if all(d >= threshold for d in distances): #if none of the distances is lower than the threshold, skip + continue + + else: # if at least one distance is lower then add this aminoacid to the ac dict + if res in ac.keys(): + ac[res] += energy # adding energy (previosly divided by the number of results) more times if + else: # found multiple times, that way you would have an average + ac[res] = energy + + # Store the resi number into lst + if res not in lst: + lst.append(res) + # Store rei_num for one file into resi_list as a list + resi_list.append(lst) + + + + best_result_name = '' + # Find the resi number with the lowest energy + red_resi = '' + for k, v in ac.items(): + if v == min(ac.values()): + red_resi = k + print('best_residue: ' + str(red_resi)) + + # Find the file that both satisfies the lowest energy and containing the lowest energy resi + max_en = 0 + for f in file_names: + if en_list[file_names.index(f)] <= max_en: + temp = resi_list[file_names.index(f)] + for i in temp: + if i == red_resi: + best_result_name = f + + + res_dict_path = result_dir_path + 'res_dict.json' + + # Use the result file from /result/, change the name to best docking result, and convert it into chain Z + try: + best_result(best_result_name, monomer, rec_lig, receptor, ligand) + # sometimes the simulations results are not good enough to satisfy both requirements, + # it's common especially when one monomer is never close to the ligand. + # Not including this line would stop an otherwise useful simulation + except FileNotFoundError: + f_file = receptor + '_' + ligand + '0001_' + monomer + '_ligand_reserved.pdb' + best_result(f_file, monomer, rec_lig, receptor, ligand) + + print(ac) + + with open(res_dict_path, 'w') as file: + file.write(json.dumps(ac)) + print('res_dict.json is generated') + return ac + + + + + +def color_surfaces(monomer, receptor, ligand, rec_lig, docking_pdb_path): + + # Function to create the nested dictionary with every monomer as key with value a dictionary with its amino acids as keys and contact frequencies as values + + result_dict = {} #this will be the dictionary + + folder_name = str(receptor + '_' + monomer + '_' + ligand) + + if receptor + '_' + monomer not in result_dict.keys(): + result_dict[receptor + '_' + monomer] = {} + if os.path.isfile(docking_pdb_path + '/results/' + rec_lig + '_folder_' + str(date.today()) + '/' + folder_name + '/ligand_reserved_pdb/res_dict.json') == False: + result_dict[receptor+ '_' + monomer][ligand] = result_dict_generator(5, monomer, rec_lig, receptor, ligand) + else: + result_dict[receptor+ '_' + monomer][ligand] = eval( + open(docking_pdb_path + '/results/' + rec_lig + '_folder_' + str(date.today()) + '/' + folder_name + '/ligand_reserved_pdb/res_dict.json', 'r').read()) + print('res_dict.json previously exists and has read') + + resultjson_path = docking_pdb_path + '/results/' + rec_lig + '_folder_' + str(date.today()) + '/' + folder_name + '/results.json' + + # Initialize results.json + ini = {} + with open(resultjson_path, 'w') as file: + file.write(json.dumps(ini)) + results = {} + for r in result_dict: #result_dict is where we have our contact freuquencies + if r in results.keys(): + for v in result_dict[r]: + results[r][v] == result_dict[r][v] + else: + results[r] = result_dict[r] + with open(resultjson_path, 'w') as file: + file.write(json.dumps(results)) + print('result.json is finished') + + + + + +def pipeline(rec_lig, is_monomer, receptor, ligand, monomers_list, docking_pdb_path): + + print('Current pair:' + rec_lig) + + today_dir = docking_pdb_path + '/results/' + rec_lig + '_folder_' + str(date.today()) + '/' + + datetoday = str(date.today()) + + results_dir = today_dir + rec_lig + '/result/' + os.makedirs(results_dir) + + hex_docking(rec_lig, rec_lig, receptor, ligand, datetoday,docking_pdb_path) # CALL HEX + + results_list = os.listdir(results_dir) + first_file_name = str(receptor + '_' + ligand + '0001.pdb') + + + # Repeats the analysis for every monomer in the receptor file + for monomer in monomers_list: + dir_final = today_dir + receptor + '_' + monomer + '_' + ligand + '/result/' + print('plotting monomer: ' + monomer + ' with the ligand: ' + ligand) + separate_results(monomer, results_dir, first_file_name, dir_final, monomers_list) + ligand_reserved(monomer, rec_lig, receptor, ligand,docking_pdb_path) + print('Ligands are now reserved in docking results.') + color_surfaces(monomer, receptor, ligand, rec_lig, docking_pdb_path) + #plot_frequencies(monomer) + + + +class Protein_Docking: + @staticmethod + def start(receptor,ligand,docking_pdb_path): + + # Check if the receptor is a monomer or a complex and save the receptor and ligand names as variables + + receptor_folder = docking_pdb_path + '/results/receptor_to_dock' + receptor_folder_list = os.listdir(receptor_folder) + ligand_folder = os.listdir(docking_pdb_path + '/results/ligand_to_dock') + + for rec in receptor_folder_list: + # There could be hidden files in the receptor or ligand directory so only consider pdb files + if rec[0] != '.' and len(rec.split('.')) == 2 and rec.split('.')[1] == 'pdb': + receptor = rec[:-4] + + # To check if the receptor is a monomer or not, the script will search the .pdb file + # for the line that indicated the presence of multiple chains, + with open(receptor_folder + '/' + rec, 'r+') as f: + is_monomer = True + for x in f.readlines(): + if re.match(r'COMPND \d CHAIN: \w, \w*', x) != None: + is_monomer = False + #if the receptor would be a monomer the regex would be r'COMPND \d CHAIN: \w;' + + # To make a list of the monomers' labels + print(receptor + ' identified as a protein complex') + if x[11:16] == 'CHAIN': + monomers_list = x.split(': ')[-1].split(', ') + # The COMPND line ends with ';' therefore it needs to be removed from the last label + monomers_list[-1] = monomers_list[-1][0] + + for lig in ligand_folder: + if lig[0] != '.' and len(lig.split('.')) == 2 and lig.split('.')[1] == 'pdb': + #DO NOT USE PDB FOR LIGAND FILES, it is possible but it can lead to errors due to the missing hydrogens + ligand = lig[:-4] + + rec_lig = receptor + '_' + ligand + + # To save the terminal output later (very important) + stdoutOrigin=sys.stdout + sys.stdout = open(docking_pdb_path + 'results/Terminal_recordings/' + receptor + '_' + ligand + '_' + str(date.today()) + '.txt' , "w") + + # Call to the pipeline with different parameters whether the receptor is a monomer or a complex + if is_monomer == False: + dir_final = docking_pdb_path + '/results/receptor_to_dock/monomers' + for monomer in monomers_list: + print('separating monomer: ' + monomer) + separate_monomers(monomer, receptor_folder, receptor, dir_final, monomers_list) # To separate the monomers in the multimer file + + pipeline(rec_lig, is_monomer, receptor, ligand, monomers_list,docking_pdb_path) + else: + dir_final = docking_pdb_path + '/results/receptor_to_dock/monomers' + monomers_list = ['monomer'] + separate_monomers('monomer', receptor_folder, receptor, dir_final, monomers_list) # To analyze the data from hex you still need to separate it. + # It allows to use the same functions in both cases + pipeline(rec_lig, is_monomer, receptor, ligand, monomers_list,docking_pdb_path) + + #To put together the json files with all the data from all monomers + new_json = docking_pdb_path + '/results/'+ rec_lig + '_folder_' + str(date.today()) + '/' + '/final.json' + final_json = {} + min_values = [] + max_values = [] + abs_max = None + abs_min = None + + for monomer in monomers_list: + monomer_json = docking_pdb_path + '/results/' +rec_lig + '_folder_' + str(date.today()) + '/' + str(receptor + '_' + monomer + '_' + ligand) +'/results.json' + with open(monomer_json, 'r') as file: + monomer_dict = json.load(file) + + monomer_key = list(monomer_dict.keys())[0] + ligand_key = list(monomer_dict[monomer_key].keys())[0] + + inside_dict = monomer_dict[monomer_key][ligand_key] + + # To eliminate empty dictionaries that might cause division errors below normalized_mon_dicitonary calculations + if inside_dict == {}: + continue + else: + mini = min(inside_dict.values()) + maxi = max(inside_dict.values()) + + min_values.append(mini) + max_values.append(maxi) + + abs_max = max(max_values) + abs_min = min(min_values) + + print("This is the maximum value: ",abs_max, file=sys.stderr) + print("This is the minimum value: ",abs_min, file=sys.stderr) + + #Now looping through every monomer, and calculating every residue energy to be normalized by using absolute minimum and maximum. + for monomer in monomers_list: + monomer_json = docking_pdb_path + '/results/' +rec_lig + '_folder_' + str(date.today()) + '/' + str(receptor + '_' + monomer + '_' + ligand) +'/results.json' + with open(monomer_json, 'r') as file: + monomer_dict = json.load(file) + + monomer_key = list(monomer_dict.keys())[0] + ligand_key = list(monomer_dict[monomer_key].keys())[0] + + inside_dict = monomer_dict[monomer_key][ligand_key] + + # It is here to prevent substraction of equal values or values that doesn't make any sense in terms of accuracy + + if abs_min == abs_max : + normalized_mon_dict = {monomer_key:{ligand_key:{k:1 for k,v in inside_dict.items()}}} + final_json.update(normalized_mon_dict) + else: + normalized_mon_dict = {monomer_key:{ligand_key:{k:(v-abs_min)/(abs_max - abs_min) for k,v in inside_dict.items()}}} + final_json.update(normalized_mon_dict) + #Opening and writing new_json file that was directed to be final.json and was updated with normalization dictionary values + + with open(new_json,'w') as file: + file.write(json.dumps(final_json)) + print('Final json is finished') + print(new_json, file=sys.stderr) From ffe6b47805325ea90cdb78034c6c9c09f082cedf Mon Sep 17 00:00:00 2001 From: Dien Nguyen Date: Thu, 26 Oct 2023 12:43:54 -0400 Subject: [PATCH 02/35] Add constant for hex execution path Add docking_pdb_path parameter to best_result function to avoid hard-coding file paths Modify snps.py to pass in receptor and ligand of interest through endpoint Change naming conventions to not include the date, and only run hex if the protein-ligand pair has not been run before Add a block in docking_utils.py to search for the receptor and ligand indicated by endpoints --- api/resources/snps.py | 49 +++++++++ api/utils/docking_utils.py | 213 ++++++++++++++++++++----------------- 2 files changed, 164 insertions(+), 98 deletions(-) diff --git a/api/resources/snps.py b/api/resources/snps.py index da1860f..fc9c0a4 100644 --- a/api/resources/snps.py +++ b/api/resources/snps.py @@ -26,6 +26,7 @@ from api.utils.hotspot_utils import HotspotUtils import sys from api import db, cache, limiter +from api.utils.docking_utils import Protein_Docking snps = Namespace("SNPs", description="Information about SNPs", path="/snps") @@ -46,6 +47,54 @@ default="None", ) +@snps.route("/docking//") +class Docking(Resource): + @snps.param("receptor", _in="path", default="bri1") + @snps.param("ligand", _in="path", default="brass") + def get(self, receptor, ligand): + # receptor= escape(receptor) + # ligand = escape(ligand) + + #arabidopsis_pdb_path = "/var/www/html/eplant_legacy/java/Phyre2-Models/Phyre2_" + #poplar_pdb_path = "/var/www/html/eplant_poplar/pdb/" + #tomato_pdb_path = "/var/www/html/eplant_tomato/pdbc/" + #docking_pdb_link = "//bar.utoronto.ca/docking-pdbs/" + #docking_pdb_path = "/var/www/html/docking-pdbs/" + #arabidopsis_pdb_path = "/home/metyumelkonyan/BCB330/results/receptor_to_dock" + #poplar_pdb_path = "/home/metyumelkonyan/BCB330/results/receptor_to_dock" + #tomato_pdb_path = "/home/metyumelkonyan/BCB330/results/receptor_to_dock" + docking_pdb_link = "//bar.utoronto.ca/docking-pdbs/" + docking_pdb_path = "/home/diennguyen/BAR_API/docking_test_pdbs" + + #Receptors can be adjusted please adjust the file format on the directories as well (sdf vs pdb) + # receptor = "3riz" + # ligand = "TDR" + # receptor = "5gij_ATOM" + # ligand = "TDIF" + + # if BARUtils.is_arabidopsis_gene_valid(receptor_pdb): + # receptor_pdb_path = arabidopsis_pdb_path + \ + # receptor_pdb.upper() + ".pdb" + # elif BARUtils.is_poplar_gene_valid(receptor_pdb): + # receptor_pdb_path = ( + # poplar_pdb_path + BARUtils.format_poplar( + # receptor_pdb) + ".pdb" + # ) + # elif BARUtils.is_tomato_gene_valid(receptor_pdb, True): + # receptor_pdb_path = tomato_pdb_path + receptor_pdb.capitalize() + ".pdb" + # else: + # return BARUtils.error_exit("Invalid receptor pdb gene id"), 400 + + #ligand_sdf_path = "/home/yyu/public_html/library" + ligand + ".pdb" + + docking_file_name = receptor.upper() + "-" + ligand.upper() + \ + "-docking0001.pdb " + response = requests.get("https:" + docking_pdb_link + docking_file_name) + + # Importing start function to initiate docking_utils file + + Protein_Docking.start(receptor,ligand,docking_pdb_path) + @snps.route("/phenix//") class Phenix(Resource): diff --git a/api/utils/docking_utils.py b/api/utils/docking_utils.py index 755007f..0772f38 100755 --- a/api/utils/docking_utils.py +++ b/api/utils/docking_utils.py @@ -12,17 +12,18 @@ import sys from datetime import date +HEX_BIN_PATH = '/home/diennguyen/hex/bin/hex' -def hex_docking(rec_lig,rec_lig2,receptor, ligand, date,docking_pdb_path): +def hex_docking(rec_lig,rec_lig2,receptor, ligand, docking_pdb_path): # Function to call Hex, including hard coded settings - - code = """ open_receptor """ + docking_pdb_path + """results/receptor_to_dock/""" + receptor + """.pdb -open_ligand """ + docking_pdb_path +"""results/ligand_to_dock/""" + ligand + """.pdb +# max_docking_solutions set at 5 for testing + code = """ open_receptor """ + docking_pdb_path + """/results/receptor_to_dock/""" + receptor + """.pdb +open_ligand """ + docking_pdb_path +"""/results/ligand_to_dock/""" + ligand + """.pdb docking_correlation 1 docking_score_threshold 0 -max_docking_solutions 50 +max_docking_solutions 5 docking_receptor_stepsize 5.50 docking_ligand_stepsize 5.50 docking_alpha_stepsize 2.80 @@ -30,24 +31,24 @@ def hex_docking(rec_lig,rec_lig2,receptor, ligand, date,docking_pdb_path): receptor_origin C-825:VAL-O commit_edits activate_docking -save_range 1 100 """ + docking_pdb_path + """results/%s_folder_%s/%s/result %s pdb""" % (rec_lig, date, rec_lig2, rec_lig) - subprocess.Popen('/home/yyu/hex/bin/hex', stdin=subprocess.PIPE, stderr=subprocess.STDOUT).communicate(bytes(code.encode('utf-8'))) +save_range 1 100 """ + docking_pdb_path + """/results/%s/%s/result %s pdb""" % (rec_lig, rec_lig2, rec_lig) + subprocess.Popen(HEX_BIN_PATH, stdin=subprocess.PIPE, stderr=subprocess.STDOUT).communicate(bytes(code.encode('utf-8'))) -def best_result(file_name, monomer, rec_lig, receptor, ligand): +def best_result(file_name, monomer, rec_lig, receptor, ligand, docking_pdb_path): # Function to generate the "best docking results", being the result with the best score and with the residue with the best contact frequency - file_name_dir = str('/home/vlau/BAR_API_HEX_Jan_23/docking_test_pdbs/results/'+ receptor + '_' + ligand + '_folder_' + str(date.today()) + '/' + receptor + '_' + monomer + '_' + ligand + '/result/') #directory for the docking results + file_name_dir = str(docking_pdb_path + '/results/'+ receptor + '_' + ligand + '/' + receptor + '_' + monomer + '_' + ligand + '/result/') #directory for the docking results file_name_path = str(file_name_dir + file_name[:-20] + '.pdb') #directory for the result, identifies as the best result des1 = file_name_dir + 'best_docking_results_for_'+ file_name[:-24] + '.pdb' #destination directory for the best_docking_result file shutil.copyfile(file_name_path,des1) #Same thing done with the ligand file only - ori2='/home/vlau/BAR_API_HEX_Jan_23/docking_test_pdbs/results/'+ receptor + '_' + ligand + '_folder_' + str(date.today()) + '/' + receptor + '_' + monomer + '_' + ligand + '/ligand_reserved_pdb/' + file_name - des2='/home/vlau/BAR_API_HEX_Jan_23/docking_test_pdbs/results/'+ receptor + '_' + ligand + '_folder_' + str(date.today()) + '/' + receptor + '_' + monomer + '_' + ligand + '/ligand_reserved_pdb/best_docking_results.pdb' + ori2 = docking_pdb_path + '/results/'+ receptor + '_' + ligand + '/' + receptor + '_' + monomer + '_' + ligand + '/ligand_reserved_pdb/' + file_name + des2 = docking_pdb_path + '/results/'+ receptor + '_' + ligand + '/' + receptor + '_' + monomer + '_' + ligand + '/ligand_reserved_pdb/best_docking_results.pdb' shutil.copyfile(ori2,des2) @@ -122,7 +123,7 @@ def separate_monomers(monomer, file_dir, file_name, dir_final, monomers_list): # Open the .pdb file to separate with open (file_dir + '/' + file_name + '.pdb', 'r+') as r: lines = r.readlines() - ends = [0] + ends = [0] # ends contains all line numbers of "TER" # Searches the .pdb files for the lines that indicate the end of a chain for l in lines: @@ -155,10 +156,10 @@ def ligand_reserved(monomer, rec_lig, receptor, ligand,docking_pdb_path): # Function to separate the ligand coordinates of every solution, it's useful to simply the calculation of the contact frequencies - dir_path = str(docking_pdb_path + '/results/'+ rec_lig + '_folder_' + str(date.today()) + '/' + receptor + '_' + monomer + '_' + ligand + '/result') #results directory + dir_path = str(docking_pdb_path + '/results/'+ rec_lig + '/' + receptor + '_' + monomer + '_' + ligand + '/result') #results directory print('Isolating ' + rec_lig + '_' + monomer) - os.makedirs(docking_pdb_path + '/results/'+ rec_lig + '_folder_' + str(date.today()) + '/' + receptor + '_' + monomer + '_' + ligand + '/ligand_reserved_pdb') #ligand_reserved directory + os.makedirs(docking_pdb_path + '/results/'+ rec_lig + '/' + receptor + '_' + monomer + '_' + ligand + '/ligand_reserved_pdb') #ligand_reserved directory file_list = os.listdir(dir_path) result_list = [] @@ -168,7 +169,7 @@ def ligand_reserved(monomer, rec_lig, receptor, ligand,docking_pdb_path): result_list.append(i) for r in result_list: file_path = str(dir_path + '/' + r) - ligand_reserved_file_path = str(docking_pdb_path + '/results/'+ rec_lig + '_folder_' + str(date.today()) + '/' + receptor + '_' + monomer + '_' + ligand + '/ligand_reserved_pdb/' + r[:-4] + '_ligand_reserved.pdb') + ligand_reserved_file_path = str(docking_pdb_path + '/results/'+ rec_lig + '/' + receptor + '_' + monomer + '_' + ligand + '/ligand_reserved_pdb/' + r[:-4] + '_ligand_reserved.pdb') with open(file_path, 'r') as file: lines = [line for line in file.readlines()] # Everything below the line 'REMARK Docked ligand coordinates...' is data of the ligand @@ -180,12 +181,12 @@ def ligand_reserved(monomer, rec_lig, receptor, ligand,docking_pdb_path): -def result_dict_generator(threshold, monomer, rec_lig, receptor, ligand): +def result_dict_generator(threshold, monomer, rec_lig, receptor, ligand, docking_pdb_path): # Function to calculate the contact frequencies of every amino acid - result_dir_path = str('/home/vlau/BAR_API_HEX_Jan_23/docking_test_pdbs/results/'+ rec_lig + '_folder_'+ str(date.today()) + '/' + receptor + '_' + monomer + '_' + ligand + '/ligand_reserved_pdb/') #directory for the results files, the ligand only ones we created with the ligand_reserved function! - receptor_file_path = str('/home/vlau/BAR_API_HEX_Jan_23/docking_test_pdbs/results/receptor_to_dock/monomers/'+ receptor + '_' + monomer + '.pdb') #directory for the receptor protein pdb file + result_dir_path = str(docking_pdb_path + '/results/'+ rec_lig + '/' + receptor + '_' + monomer + '_' + ligand + '/ligand_reserved_pdb/') #directory for the results files, the ligand only ones we created with the ligand_reserved function! + receptor_file_path = str(docking_pdb_path + '/results/receptor_to_dock/monomers/'+ receptor + '_' + monomer + '.pdb') #directory for the receptor protein pdb file # Store every receptor's atom coordinates information as a nested dictionary called 'reference' with open(receptor_file_path, 'r') as file: @@ -293,13 +294,13 @@ def result_dict_generator(threshold, monomer, rec_lig, receptor, ligand): # Use the result file from /result/, change the name to best docking result, and convert it into chain Z try: - best_result(best_result_name, monomer, rec_lig, receptor, ligand) + best_result(best_result_name, monomer, rec_lig, receptor, ligand, docking_pdb_path) # sometimes the simulations results are not good enough to satisfy both requirements, # it's common especially when one monomer is never close to the ligand. # Not including this line would stop an otherwise useful simulation except FileNotFoundError: f_file = receptor + '_' + ligand + '0001_' + monomer + '_ligand_reserved.pdb' - best_result(f_file, monomer, rec_lig, receptor, ligand) + best_result(f_file, monomer, rec_lig, receptor, ligand, docking_pdb_path) print(ac) @@ -322,14 +323,14 @@ def color_surfaces(monomer, receptor, ligand, rec_lig, docking_pdb_path): if receptor + '_' + monomer not in result_dict.keys(): result_dict[receptor + '_' + monomer] = {} - if os.path.isfile(docking_pdb_path + '/results/' + rec_lig + '_folder_' + str(date.today()) + '/' + folder_name + '/ligand_reserved_pdb/res_dict.json') == False: - result_dict[receptor+ '_' + monomer][ligand] = result_dict_generator(5, monomer, rec_lig, receptor, ligand) + if os.path.isfile(docking_pdb_path + '/results/' + rec_lig + '/' + folder_name + '/ligand_reserved_pdb/res_dict.json') == False: + result_dict[receptor+ '_' + monomer][ligand] = result_dict_generator(5, monomer, rec_lig, receptor, ligand, docking_pdb_path) else: result_dict[receptor+ '_' + monomer][ligand] = eval( - open(docking_pdb_path + '/results/' + rec_lig + '_folder_' + str(date.today()) + '/' + folder_name + '/ligand_reserved_pdb/res_dict.json', 'r').read()) + open(docking_pdb_path + '/results/' + rec_lig + '/' + folder_name + '/ligand_reserved_pdb/res_dict.json', 'r').read()) print('res_dict.json previously exists and has read') - resultjson_path = docking_pdb_path + '/results/' + rec_lig + '_folder_' + str(date.today()) + '/' + folder_name + '/results.json' + resultjson_path = docking_pdb_path + '/results/' + rec_lig + '/' + folder_name + '/results.json' # Initialize results.json ini = {} @@ -354,14 +355,12 @@ def pipeline(rec_lig, is_monomer, receptor, ligand, monomers_list, docking_pdb_p print('Current pair:' + rec_lig) - today_dir = docking_pdb_path + '/results/' + rec_lig + '_folder_' + str(date.today()) + '/' - - datetoday = str(date.today()) + today_dir = docking_pdb_path + '/results/' + rec_lig + '/' results_dir = today_dir + rec_lig + '/result/' os.makedirs(results_dir) - hex_docking(rec_lig, rec_lig, receptor, ligand, datetoday,docking_pdb_path) # CALL HEX + hex_docking(rec_lig, rec_lig, receptor, ligand,docking_pdb_path) # CALL HEX results_list = os.listdir(results_dir) first_file_name = str(receptor + '_' + ligand + '0001.pdb') @@ -389,11 +388,14 @@ def start(receptor,ligand,docking_pdb_path): receptor_folder_list = os.listdir(receptor_folder) ligand_folder = os.listdir(docking_pdb_path + '/results/ligand_to_dock') + receptor_file_found = False for rec in receptor_folder_list: + sys.stdout.write(rec) # There could be hidden files in the receptor or ligand directory so only consider pdb files - if rec[0] != '.' and len(rec.split('.')) == 2 and rec.split('.')[1] == 'pdb': + if rec[0] != '.' and len(rec.split('.')) == 2 and rec.split('.')[1] == 'pdb'\ + and rec[:-4].lower() == receptor.lower(): + receptor_file_found = True receptor = rec[:-4] - # To check if the receptor is a monomer or not, the script will search the .pdb file # for the line that indicated the presence of multiple chains, with open(receptor_folder + '/' + rec, 'r+') as f: @@ -409,89 +411,104 @@ def start(receptor,ligand,docking_pdb_path): monomers_list = x.split(': ')[-1].split(', ') # The COMPND line ends with ';' therefore it needs to be removed from the last label monomers_list[-1] = monomers_list[-1][0] + break + ligand_file_found = False for lig in ligand_folder: - if lig[0] != '.' and len(lig.split('.')) == 2 and lig.split('.')[1] == 'pdb': + sys.stdout.write(lig) + if lig[0] != '.' and len(lig.split('.')) == 2 and lig.split('.')[1] == 'pdb'\ + and lig[:4].lower() == ligand.lower(): + ligand_file_found = True #DO NOT USE PDB FOR LIGAND FILES, it is possible but it can lead to errors due to the missing hydrogens ligand = lig[:-4] + break + + + ##TODO: Add block to raise error if receptor or ligand files are not found rec_lig = receptor + '_' + ligand + #check if results folder already exists + results_path = docking_pdb_path + '/results/' + rec_lig + if not os.path.exists(results_path): # To save the terminal output later (very important) - stdoutOrigin=sys.stdout - sys.stdout = open(docking_pdb_path + 'results/Terminal_recordings/' + receptor + '_' + ligand + '_' + str(date.today()) + '.txt' , "w") + stdoutOrigin=sys.stdout + sys.stdout = open(docking_pdb_path + '/results/Terminal_recordings/' + rec_lig + '_' + str(date.today()) + '.txt' , "w") - # Call to the pipeline with different parameters whether the receptor is a monomer or a complex - if is_monomer == False: - dir_final = docking_pdb_path + '/results/receptor_to_dock/monomers' - for monomer in monomers_list: - print('separating monomer: ' + monomer) - separate_monomers(monomer, receptor_folder, receptor, dir_final, monomers_list) # To separate the monomers in the multimer file + # Call to the pipeline with different parameters whether the receptor is a monomer or a complex + if is_monomer == False: + dir_final = docking_pdb_path + '/results/receptor_to_dock/monomers' + for monomer in monomers_list: + print('separating monomer: ' + monomer) + separate_monomers(monomer, receptor_folder, receptor, dir_final, monomers_list) # To separate the monomers in the multimer file - pipeline(rec_lig, is_monomer, receptor, ligand, monomers_list,docking_pdb_path) - else: - dir_final = docking_pdb_path + '/results/receptor_to_dock/monomers' - monomers_list = ['monomer'] - separate_monomers('monomer', receptor_folder, receptor, dir_final, monomers_list) # To analyze the data from hex you still need to separate it. - # It allows to use the same functions in both cases - pipeline(rec_lig, is_monomer, receptor, ligand, monomers_list,docking_pdb_path) - - #To put together the json files with all the data from all monomers - new_json = docking_pdb_path + '/results/'+ rec_lig + '_folder_' + str(date.today()) + '/' + '/final.json' - final_json = {} - min_values = [] - max_values = [] - abs_max = None - abs_min = None - - for monomer in monomers_list: - monomer_json = docking_pdb_path + '/results/' +rec_lig + '_folder_' + str(date.today()) + '/' + str(receptor + '_' + monomer + '_' + ligand) +'/results.json' - with open(monomer_json, 'r') as file: - monomer_dict = json.load(file) - - monomer_key = list(monomer_dict.keys())[0] - ligand_key = list(monomer_dict[monomer_key].keys())[0] - - inside_dict = monomer_dict[monomer_key][ligand_key] - - # To eliminate empty dictionaries that might cause division errors below normalized_mon_dicitonary calculations - if inside_dict == {}: - continue - else: - mini = min(inside_dict.values()) - maxi = max(inside_dict.values()) + pipeline(rec_lig, is_monomer, receptor, ligand, monomers_list,docking_pdb_path) + else: + dir_final = docking_pdb_path + '/results/receptor_to_dock/monomers' + monomers_list = ['monomer'] + separate_monomers('monomer', receptor_folder, receptor, dir_final, monomers_list) # To analyze the data from hex you still need to separate it. + # It allows to use the same functions in both cases + pipeline(rec_lig, is_monomer, receptor, ligand, monomers_list,docking_pdb_path) + + #To put together the json files with all the data from all monomers + new_json = docking_pdb_path + '/results/'+ rec_lig + '/' + '/final.json' + final_json = {} + min_values = [] + max_values = [] + abs_max = None + abs_min = None - min_values.append(mini) - max_values.append(maxi) + for monomer in monomers_list: + monomer_json = docking_pdb_path + '/results/' + rec_lig + '/' + str(receptor + '_' + monomer + '_' + ligand) +'/results.json' + with open(monomer_json, 'r') as file: + monomer_dict = json.load(file) - abs_max = max(max_values) - abs_min = min(min_values) + monomer_key = list(monomer_dict.keys())[0] + ligand_key = list(monomer_dict[monomer_key].keys())[0] - print("This is the maximum value: ",abs_max, file=sys.stderr) - print("This is the minimum value: ",abs_min, file=sys.stderr) + inside_dict = monomer_dict[monomer_key][ligand_key] - #Now looping through every monomer, and calculating every residue energy to be normalized by using absolute minimum and maximum. - for monomer in monomers_list: - monomer_json = docking_pdb_path + '/results/' +rec_lig + '_folder_' + str(date.today()) + '/' + str(receptor + '_' + monomer + '_' + ligand) +'/results.json' - with open(monomer_json, 'r') as file: - monomer_dict = json.load(file) + # To eliminate empty dictionaries that might cause division errors below normalized_mon_dicitonary calculations + if inside_dict == {}: + continue + else: + mini = min(inside_dict.values()) + maxi = max(inside_dict.values()) - monomer_key = list(monomer_dict.keys())[0] - ligand_key = list(monomer_dict[monomer_key].keys())[0] + min_values.append(mini) + max_values.append(maxi) - inside_dict = monomer_dict[monomer_key][ligand_key] + abs_max = max(max_values) + abs_min = min(min_values) - # It is here to prevent substraction of equal values or values that doesn't make any sense in terms of accuracy + print("This is the maximum value: ",abs_max, file=sys.stderr) + print("This is the minimum value: ",abs_min, file=sys.stderr) - if abs_min == abs_max : - normalized_mon_dict = {monomer_key:{ligand_key:{k:1 for k,v in inside_dict.items()}}} - final_json.update(normalized_mon_dict) - else: - normalized_mon_dict = {monomer_key:{ligand_key:{k:(v-abs_min)/(abs_max - abs_min) for k,v in inside_dict.items()}}} - final_json.update(normalized_mon_dict) - #Opening and writing new_json file that was directed to be final.json and was updated with normalization dictionary values - - with open(new_json,'w') as file: - file.write(json.dumps(final_json)) - print('Final json is finished') - print(new_json, file=sys.stderr) + #Now looping through every monomer, and calculating every residue energy to be normalized by using absolute minimum and maximum. + for monomer in monomers_list: + monomer_json = docking_pdb_path + '/results/' +rec_lig + '/' + str(receptor + '_' + monomer + '_' + ligand) +'/results.json' + with open(monomer_json, 'r') as file: + monomer_dict = json.load(file) + + monomer_key = list(monomer_dict.keys())[0] + ligand_key = list(monomer_dict[monomer_key].keys())[0] + + inside_dict = monomer_dict[monomer_key][ligand_key] + + # It is here to prevent substraction of equal values or values that doesn't make any sense in terms of accuracy + + if abs_min == abs_max : + normalized_mon_dict = {monomer_key:{ligand_key:{k:1 for k,v in inside_dict.items()}}} + final_json.update(normalized_mon_dict) + else: + normalized_mon_dict = {monomer_key:{ligand_key:{k:(v-abs_min)/(abs_max - abs_min) for k,v in inside_dict.items()}}} + final_json.update(normalized_mon_dict) + #Opening and writing new_json file that was directed to be final.json and was updated with normalization dictionary values + + with open(new_json,'w') as file: + file.write(json.dumps(final_json)) + print('Final json is finished') + print(new_json, file=sys.stderr) + sys.stdout.close() + else: + print("Docking has already been done on this protein-ligand.") From 470fd9632f5e6bf33976b81540da7828e0a38ccf Mon Sep 17 00:00:00 2001 From: Dien Nguyen Date: Thu, 2 Nov 2023 11:39:47 -0400 Subject: [PATCH 03/35] redirect hex output to a text file, add function to parse hex output --- api/utils/docking_utils.py | 39 +++++++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/api/utils/docking_utils.py b/api/utils/docking_utils.py index 0772f38..cd1d604 100755 --- a/api/utils/docking_utils.py +++ b/api/utils/docking_utils.py @@ -16,6 +16,9 @@ def hex_docking(rec_lig,rec_lig2,receptor, ligand, docking_pdb_path): + hex_output = open(docking_pdb_path + "/results/" + rec_lig + + "/{}_hex_output.txt".format(rec_lig), "w") + # Function to call Hex, including hard coded settings # max_docking_solutions set at 5 for testing @@ -23,7 +26,7 @@ def hex_docking(rec_lig,rec_lig2,receptor, ligand, docking_pdb_path): open_ligand """ + docking_pdb_path +"""/results/ligand_to_dock/""" + ligand + """.pdb docking_correlation 1 docking_score_threshold 0 -max_docking_solutions 5 +max_docking_solutions 25 docking_receptor_stepsize 5.50 docking_ligand_stepsize 5.50 docking_alpha_stepsize 2.80 @@ -32,7 +35,8 @@ def hex_docking(rec_lig,rec_lig2,receptor, ligand, docking_pdb_path): commit_edits activate_docking save_range 1 100 """ + docking_pdb_path + """/results/%s/%s/result %s pdb""" % (rec_lig, rec_lig2, rec_lig) - subprocess.Popen(HEX_BIN_PATH, stdin=subprocess.PIPE, stderr=subprocess.STDOUT).communicate(bytes(code.encode('utf-8'))) + subprocess.Popen(HEX_BIN_PATH, stdin=subprocess.PIPE, stderr=subprocess.STDOUT, stdout=hex_output).communicate(bytes(code.encode('utf-8'))) + hex_output.close() @@ -310,8 +314,34 @@ def result_dict_generator(threshold, monomer, rec_lig, receptor, ligand, docking return ac - - +def parse_hex_output(rec_lig, docking_pdb_path): + hex_output = open(docking_pdb_path + "/results/" + rec_lig + + "/{}_hex_output.txt".format(rec_lig), "r") + lines = hex_output.readlines() + result_start = 0 + result_end = 0 + for i in range(len(lines)): + splitted_line = lines[i].split(" ") + if len(splitted_line) > 8 and splitted_line[0] == "Clst": + result_start = i + 2 + if len(splitted_line) > 2 and splitted_line[1] == "save_range": + result_end = i - 2 + clustering_lines = lines[result_start:result_end] + clusters = {} + for line in clustering_lines: + cleaned_line = line.strip().split(" ") + res = [] + for ch in cleaned_line: + if ch != "": + res.append(ch) + clst = int(res[0]) + sln = int(res[1]) + if clst not in clusters: + clusters[clst] = [sln] + else: + clusters[clst].append(sln) + return(clusters) + def color_surfaces(monomer, receptor, ligand, rec_lig, docking_pdb_path): @@ -390,7 +420,6 @@ def start(receptor,ligand,docking_pdb_path): receptor_file_found = False for rec in receptor_folder_list: - sys.stdout.write(rec) # There could be hidden files in the receptor or ligand directory so only consider pdb files if rec[0] != '.' and len(rec.split('.')) == 2 and rec.split('.')[1] == 'pdb'\ and rec[:-4].lower() == receptor.lower(): From ed5a58ef99d6a57432a58fc628ab0b0c5cd0e357 Mon Sep 17 00:00:00 2001 From: Dien Nguyen Date: Tue, 21 Nov 2023 01:02:18 -0500 Subject: [PATCH 04/35] Add classes for refactoring of docking_utils.py file The following classes were added: Receptor, MonomerReceptor, ComplexReceptor, Ligand, Docking, MonomerDocking, ComplexDocking, and Docker. __init__ methods were added for each class. Methods for creating receptor, ligand and docking objects were written in Docker class. Hex_docking function was written for Docking class, which is inherited by MonomerDocking and ComplexDocking. --- api/utils/refactored_docking_utils.py | 285 ++++++++++++++++++++++++++ 1 file changed, 285 insertions(+) create mode 100644 api/utils/refactored_docking_utils.py diff --git a/api/utils/refactored_docking_utils.py b/api/utils/refactored_docking_utils.py new file mode 100644 index 0000000..1c811ce --- /dev/null +++ b/api/utils/refactored_docking_utils.py @@ -0,0 +1,285 @@ +from abc import ABC, abstractmethod +from typing import List +import os +import re +import subprocess + +HEX_BIN_PATH = '/home/diennguyen/hex/bin/hex' + +class Receptor(ABC): + """An abstract class that represents a receptor + + --- Attributes --- + name (str): the name of the receptor + file_path (str): the relative path to the receptors pdb file + """ + @abstractmethod + def __init__(self, name: str, file_path: str): + self.name = name + self.file_path = file_path + +class MonomerReceptor(Receptor): + """ A class that represents a receptor that is a monomer, meaning it consists + of only one chain. + + --- Attributes --- + name (str): the name of the receptor + file_path (str): the relative path to the receptors pdb file + """ + name: str + file_path: str + + def __init__(self, name, file_path): + super().__init__(name, file_path) + + +class ComplexReceptor(Receptor): + """ A class that represents a receptor that is a complex, meaning it consists + of more than one chain. + + --- Attributes --- + name (str): the name of the receptor + file_path (str): the relative path to the receptors pdb file + monomer_list (List[str]): the list of monomers that make up the complex + line_numbers (List[int]): the list of line numbers that separate the monomers + """ + def __init__(self, name: str, file_path: str, monomers_list: List[str]): + super().__init__(name, file_path) + self.monomers_list = monomers_list + self.line_numbers = [] + + def separate_monomers(self): + pass + +class Ligand: + """A class that represents a ligand. + + --- Attributes --- + name (str): the name of the receptor + file_path (str): the relative path to the receptors pdb file + """ + def __init__(self, name: str, file_path: str): + self.name = name + self.file_path = file_path + +class Docking(ABC): + """An abstract class that represents the docking between a receptor and a + ligand. + + --- Attributes --- + receptor (Receptor): a Receptor object that represents a receptor + ligand (Ligand): a Ligand object that represents a ligand + results_path (str): the file path to where the results are stored + ligand_reserved_list (List[int]): a list of line numbers, one for each solution, + the indicates where the "Docked ligand" section begins + """ + + @abstractmethod + def __init__(self, receptor: Receptor, ligand: Ligand, results_path: str): + self.receptor = receptor + self.ligand = ligand + self.results_path = results_path + self.ligand_reserved_list = [] + + def hex_docking(self): + hex_output_file = open(self.results_path + 'hex_output.txt', "w") + + # Function to call Hex, including hard coded settings + + # max_docking_solutions set at 5 for testing + code = """ open_receptor """ + self.receptor.file_path + """ + open_ligand """ + self.ligand.file_path + """ + docking_correlation 1 + docking_score_threshold 0 + max_docking_solutions 5 + docking_receptor_stepsize 5.50 + docking_ligand_stepsize 5.50 + docking_alpha_stepsize 2.80 + docking_main_scan 16 + receptor_origin C-825:VAL-O + commit_edits + activate_docking + save_range 1 100 """ + self.results_path + """ %s pdb""" % (self.receptor.name + '_' + self.ligand.name) + subprocess.Popen(HEX_BIN_PATH, + stdin=subprocess.PIPE, + stderr=subprocess.STDOUT, + stdout=hex_output_file).communicate(bytes(code.encode('utf-8'))) + hex_output_file.close() + print("Hex docking completed") + + @abstractmethod + def ligand_reserved(self): + pass + + @abstractmethod + def result_dict_generator(self): + pass + + @abstractmethod + def best_result(self): + pass + + @abstractmethod + def color_surfaces(self): + pass + +class MonomerDocking(Docking): + """A class the represents a docking between a monomer receptor and a monomer. + + --- Attributes --- + receptor (MonomerReceptor): a Receptor object that represents a monomer receptor + ligand (Ligand): a Ligand object that represents a ligand + results_path (str): the file path to where the results are stored + ligand_reserved (List[int]): a list of line numbers, one for each solution, + the indicates where the "Docked ligand" section begins + """ + + def __init__(self, receptor: MonomerReceptor, ligand: Ligand, results_path: str): + super().__init__(receptor, ligand, results_path) + + def ligand_reserved(self): + pass + + def result_dict_generator(self): + pass + + def best_result(self): + pass + + def color_surfaces(self): + pass + +class ComplexDocking(Docking): + """A class that represents a docking between a complex receptor and a ligand. + + --- Attributes --- + receptor (MonomerReceptor): a Receptor object that represents a monomer receptor + ligand (Ligand): a Ligand object that represents a ligand + results_path (str): the file path to where the results are stored + ligand_reserved (List[int]): a list of line numbers, one for each solution, + the indicates where the "Docked ligand" section begins + split_results (List[List[Tuple[int]]]): a list where each sublist is a chain, + which contains a list of tuples. Each tuple indicates the line numbers + of the start and end of that chain in a results file. + """ + + def __init__(self, receptor: ComplexReceptor, ligand: Ligand, results_path: str): + super().__init__(receptor, ligand, results_path) + split_results = [] + + def separate_results(self): + pass + + def ligand_reserved(self): + pass + + def result_dict_generator(self): + pass + + def best_result(self): + pass + + def color_surfaces(self): + pass + +class Docker: + """A class that represents the controller to create docking pairs and carry + out the docking""" + + @staticmethod + def start(receptor: str, ligand: str, docking_pdb_path: str): + + # create docking object + docking = Docker.create_docking(receptor, ligand, docking_pdb_path) + if docking is None: + return + + docking.hex_docking() + + def create_receptor(receptor_name: str, receptor_file_path: str): + with open(receptor_file_path) as f: + is_monomer = True + for line in f.readlines(): + if re.match(r'COMPND \d CHAIN: \w, \w*', line) != None: + is_monomer = False + #if the receptor would be a monomer the regex would be + # r'COMPND \d CHAIN: \w;' + + # To make a list of the monomers' labels + print(receptor_name + ' identified as a protein complex') + if line[11:16] == 'CHAIN': + monomers_list = line.split(': ')[-1].split(', ') + # The COMPND line ends with ';' therefore it needs to be + # removed from the last label + monomers_list[-1] = monomers_list[-1][0] + new_receptor = ComplexReceptor(receptor_name, + receptor_file_path, + monomers_list) + return new_receptor + print("Unknown pdb structure, need further investigation") + + if is_monomer: + new_receptor = MonomerReceptor(receptor_name, + receptor_file_path) + return new_receptor + + def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str): + + # check that the docking combination has not been run before + results_path = docking_pdb_path + 'results/' + receptor_name + '_' + ligand_name + '_testing/' + if os.path.exists(results_path): + print("The docking between {0} and {1} has already been done.".format(receptor_name, ligand_name)) + return None + + os.makedirs(results_path) + + # find receptor file and create receptor object + receptor_folder = docking_pdb_path + 'results/receptor_to_dock' + receptor_found = False + + for receptor_file in os.listdir(receptor_folder): + if receptor_file[0] != '.' and len(receptor_file.split('.')) == 2 and \ + receptor_file.split('.')[1] == 'pdb' and \ + receptor_file[:-4].lower() == receptor_name.lower(): + receptor_file_found = True + receptor_file_path = receptor_folder + '/' + receptor_file + receptor = Docker.create_receptor(receptor_name, receptor_file_path) + + # find ligand file and create ligand object + ligand_folder = docking_pdb_path + 'results/ligand_to_dock' + ligand_file_found = False + + for ligand_file in os.listdir(ligand_folder): + if ligand_file[0] != '.' and len(ligand_file.split('.')) == 2 and \ + ligand_file.split('.')[1] == 'pdb' and \ + ligand_file[:-4].lower() == ligand_name.lower(): + ligand_file_found = True + ligand_file_path = ligand_folder + '/' + ligand_file + ligand = Ligand(ligand_name, ligand_file_path) + + if not receptor_file_found: + print("Receptor file not found") + return + elif not ligand_file_found: + print("Ligand file not found") + return + + # receptor and ligand objects are created and ready for docking + if isinstance(receptor, MonomerReceptor): + docking = MonomerDocking(receptor, ligand, results_path) + else: + docking = ComplexDocking(receptor, ligand, results_path) + return docking + +if __name__ == "__main__": + # receptor = Docker.create_receptor("5gij_ATOM", "/home/diennguyen/BAR_API/docking_test_pdbs/results/receptor_to_dock/5gij_ATOM.pdb") + # print(receptor.name) + # print(receptor.file_path) + # receptor2 = Docker.create_receptor("8g2j", "/home/diennguyen/BAR_API/docking_test_pdbs/results/receptor_to_dock/8g2j.pdb") + # print(receptor2.name) + # print(receptor2.file_path) + # print(receptor2.monomers_list) + docking = Docker.create_docking("8g2j", "UPG", "/home/diennguyen/BAR_API/docking_test_pdbs/") + print(docking.results_path) + print(docking.receptor.file_path) + docking.hex_docking() \ No newline at end of file From be00a1a5733575f56fa5a06795cd48603b06f003 Mon Sep 17 00:00:00 2001 From: Dien Nguyen Date: Tue, 16 Jan 2024 22:21:03 -0500 Subject: [PATCH 05/35] Add methods for each class Add a controller class called Docker, which creates instances of other classes for docking. --- api/utils/refactored_docking_utils.py | 450 +++++++++++++++++++++++--- 1 file changed, 400 insertions(+), 50 deletions(-) diff --git a/api/utils/refactored_docking_utils.py b/api/utils/refactored_docking_utils.py index 1c811ce..32f80b7 100644 --- a/api/utils/refactored_docking_utils.py +++ b/api/utils/refactored_docking_utils.py @@ -3,6 +3,11 @@ import os import re import subprocess +import pickle +import math +import sys +import json +import datetime HEX_BIN_PATH = '/home/diennguyen/hex/bin/hex' @@ -41,21 +46,39 @@ class ComplexReceptor(Receptor): name (str): the name of the receptor file_path (str): the relative path to the receptors pdb file monomer_list (List[str]): the list of monomers that make up the complex - line_numbers (List[int]): the list of line numbers that separate the monomers + line_numbers (List[List[int]]): the list of line numbers that separate the monomers, e.g. [[100,200],[300,500]] """ def __init__(self, name: str, file_path: str, monomers_list: List[str]): super().__init__(name, file_path) self.monomers_list = monomers_list - self.line_numbers = [] + self.line_numbers = self.separate_monomers() def separate_monomers(self): - pass + line_numbers = [] + file = open(self.file_path, "r") + line = file.readline() + prev = None + curr_line = 0 + while line != '': + # the first line of the first monomer + if line[:12] == "ATOM 1 ": + prev = curr_line - 1 + # the last line of a monomer + elif line[:3] == 'TER': + # line_numbers.append(curr_line) + line_numbers.append([prev + 1, curr_line]) + prev = curr_line + curr_line += 1 + line = file.readline() + + return line_numbers + class Ligand: """A class that represents a ligand. --- Attributes --- - name (str): the name of the receptor + name (str): the name of the receptor file_path (str): the relative path to the receptors pdb file """ def __init__(self, name: str, file_path: str): @@ -87,40 +110,194 @@ def hex_docking(self): # Function to call Hex, including hard coded settings # max_docking_solutions set at 5 for testing - code = """ open_receptor """ + self.receptor.file_path + """ - open_ligand """ + self.ligand.file_path + """ - docking_correlation 1 - docking_score_threshold 0 - max_docking_solutions 5 - docking_receptor_stepsize 5.50 - docking_ligand_stepsize 5.50 - docking_alpha_stepsize 2.80 - docking_main_scan 16 - receptor_origin C-825:VAL-O - commit_edits - activate_docking - save_range 1 100 """ + self.results_path + """ %s pdb""" % (self.receptor.name + '_' + self.ligand.name) + hex_command = """ open_receptor """ + self.receptor.file_path + """ + open_ligand """ + self.ligand.file_path + """ + docking_correlation 1 + docking_score_threshold 0 + max_docking_solutions 25 + docking_receptor_stepsize 5.50 + docking_ligand_stepsize 5.50 + docking_alpha_stepsize 2.80 + docking_main_scan 16 + receptor_origin C-825:VAL-O + commit_edits + activate_docking + save_range 1 100 """ \ + + self.results_path + """ %s pdb""" % (self.receptor.name + '_' + self.ligand.name) + print(hex_command) subprocess.Popen(HEX_BIN_PATH, stdin=subprocess.PIPE, stderr=subprocess.STDOUT, - stdout=hex_output_file).communicate(bytes(code.encode('utf-8'))) + stdout=hex_output_file).communicate(bytes(hex_command.encode('utf-8'))) hex_output_file.close() + ct = datetime.datetime.now() + print("current time:-", ct) print("Hex docking completed") - @abstractmethod - def ligand_reserved(self): - pass + def crte_ligand_reserved_attr(self): + """This function populates the Docking instance's ligand_reserved_list attribute + with a list of line numbers. Each line number is where the Docked Ligand section + begins for each result. + For example, [1500, 1499, 1500] means that there are three solutions. In the first + solution, the "Docked Ligand" section begins at line 1500. In the second solution, + it begins at line 1499, and so on ... + + # TODO: Check if having the same chain name (e.g. C) in BOTH ligand + # and receptor will be a problem for execution. + """ + line_numbers = [] + for filename in os.listdir(self.results_path): + if filename[-3:] == 'pdb': + file = open(self.results_path + filename, "r") + lines = file.readlines() + for i in range(len(lines)): + if "Docked ligand coordinates..." in lines[i]: + line_numbers.append(i) + break + self.ligand_reserved_list = line_numbers + + def parse_hex_output(self): + hex_output = open(self.results_path + 'hex_output.txt', "r") + lines = hex_output.readlines() + # line number where the clustering starts and ends + result_start = 0 + result_end = 0 + for i in range(len(lines)): + splitted_line = lines[i].split(" ") + if len(splitted_line) > 8 and splitted_line[0] == "Clst": + result_start = i + 2 + if len(splitted_line) > 2 and "save_range" in splitted_line: + result_end = i - 2 + clustering_lines = lines[result_start:result_end] + clusters = {} + clusters["num_soln"] = len(clustering_lines) + for line in clustering_lines: + cleaned_line = line.strip().split(" ") + res = [] + # only keep non-blank items in line + for ch in cleaned_line: + if ch != "": + res.append(ch) + clst = int(res[0]) + sln = int(res[1]) + if clst not in clusters: + clusters[clst] = [sln] + else: + clusters[clst].append(sln) + return(clusters) + - @abstractmethod - def result_dict_generator(self): - pass + def result_dict_generator(self, monomer_number, threshold): + receptor_file = open(self.receptor.file_path, "r") + + if monomer_number != -1: # if -1, go to monomer logic + # get the start and end line numbers of the monomer in the receptor pdb + monomer_start = self.receptor.line_numbers[monomer_number][0] + monomer_end = self.receptor.line_numbers[monomer_number][1] + + # get the lines for that receptor only + receptor_file_lines = receptor_file.readlines()[monomer_start:monomer_end] + else: # Monomer logic + receptor_file_lines = receptor_file.readlines() + + # Store every receptor's atom coordinates information as a nested + # dictionary called 'reference' + reference = {} + for line in receptor_file_lines: + splitted_line = line.split() + if line[0:4] == 'ATOM': + coord = map(float, filter(None, splitted_line[6:9])) + if int(splitted_line[5]) in reference: + reference[int(splitted_line[5])][int(splitted_line[1])] = tuple(coord) + else: + reference[int(splitted_line[5])] = {int(splitted_line[1]) : tuple(coord)} + + # here, the structure of the reference dict is is {residue: {atom_num :(x, y, z)}}, + + + # The energy for each reference element will be stored in dictionary 'ac' + ac = {} + result_list = [] + for filename in os.listdir(self.results_path): + if filename[-3:] == 'pdb': + result_list.append(filename) + + lowest_en = None # to keep track of lowest energy + lowest_en_file = None # the file with the lowest energy + lowest_residue_list = None # list of residues of file with lowest energy + all_residue_list = [] + + cluster_dict = self.parse_hex_output() + + for i in range(len(result_list)): + print('current file: ' + result_list[i]) + energy = '' + + # get the ligand_reserved section of the result file + file = open(self.results_path + result_list[i], 'r') + ligand_reserved_start = self.ligand_reserved_list[i] + ligand_reserved_section = file.readlines()[ligand_reserved_start:] + + # go through ligand reserved section to calculate energy + residue_set = set() + coor = [] + for line in ligand_reserved_section: + if 'REMARK' in line.split(' ') and 'Energy' in line.split(' '): + cluster_size = len(cluster_dict[i + 1]) + total_solutions = cluster_dict['num_soln'] + + # energy is weighed according to the number of solutions + # in that cluster + energy = ((float(line.split(' ')[6][:-1]))/total_solutions) * cluster_size + + # record values if lowest energy + if lowest_en is None or energy < lowest_en: + lowest_en_file = result_list[i] + lowest_en = energy + elif line[:4] == 'ATOM': + # coordinates of one atom + coordinates = tuple(map(float, filter(None, line.split()[6:9]))) + coor.append(coordinates) + # each atom's coordinates is now stored in the list coordinates + + residue_set = set() + for res in reference.keys(): # for each amino acid in the receptor file: + distances = [] + + for atom in coor: # for each atom of the ligand + for aa in reference[res].keys(): # for each atom of that amino acid + # check if the distance between atoms of the ligands + # and of the amino acid are lower than chosen threshold (5) + distance = math.sqrt(sum([(reference[res][aa][0] - atom[0]) ** 2, + (reference[res][aa][1] - atom[1]) ** 2, + (reference[res][aa][2] - atom[2]) ** 2])) + + distances.append(distance) + + # if at least one of the distances is lower than the threshold, otherwise skip + if all(d >= threshold for d in distances): + continue + else: + # adding energy (previosly divided by the number of results) + # if found multiple times, we would get an average + if res in ac.keys(): + ac[res] += energy + else: + ac[res] = energy + + # Store the resi number into set + residue_set.add(res) + + all_residue_list.append(residue_set) + + return ac @abstractmethod def best_result(self): pass @abstractmethod - def color_surfaces(self): + def crte_receptor_dict(self): pass class MonomerDocking(Docking): @@ -130,34 +307,69 @@ class MonomerDocking(Docking): receptor (MonomerReceptor): a Receptor object that represents a monomer receptor ligand (Ligand): a Ligand object that represents a ligand results_path (str): the file path to where the results are stored - ligand_reserved (List[int]): a list of line numbers, one for each solution, + ligand_reserved_list (List[int]): a list of line numbers, one for each solution, the indicates where the "Docked ligand" section begins """ def __init__(self, receptor: MonomerReceptor, ligand: Ligand, results_path: str): super().__init__(receptor, ligand, results_path) - def ligand_reserved(self): + def best_result(self): pass - def result_dict_generator(self): - pass + def crte_receptor_dict(self, threshold): + receptor_res = {} + res_dict = self.result_dict_generator(-1, threshold) + ligand_res = {} + ligand_res[self.ligand.name] = res_dict + receptor_res[self.receptor.name] = ligand_res + return receptor_res + + def normalize_results(self, threshold): + results_dict = self.crte_receptor_dict(threshold) + receptor_key = list(results_dict.keys())[0] + ligand_key = list(results_dict[receptor_key].keys())[0] + + inside_dict = results_dict[receptor_key][ligand_key] + abs_max = None + abs_min = None + + # To eliminate empty dictionaries that might cause division errors below + # normalized_mon_dicitonary calculations + if inside_dict != {}: + abs_min = min(inside_dict.values()) + abs_max = max(inside_dict.values()) + + print("This is the maximum value: ",abs_max, file=sys.stderr) + print("This is the minimum value: ",abs_min, file=sys.stderr) + + all_normalized_results = {} - def best_result(self): - pass + normalized_mon_dict = {} + normalized_mon_dict[receptor_key] = {} + normalized_mon_dict[receptor_key][ligand_key] = {} + + # prevent substraction of equal values or values that doesn't make any sense in terms of accuracy + if abs_min == abs_max: + for k, v in inside_dict.items(): + normalized_mon_dict[receptor_key][ligand_key][k] = 1 + else: + for k, v in inside_dict.items(): + normalized_value = (v - abs_min) / (abs_max - abs_min) + normalized_mon_dict[receptor_key][ligand_key][k] = normalized_value + all_normalized_results.update(normalized_mon_dict) + return all_normalized_results - def color_surfaces(self): - pass class ComplexDocking(Docking): """A class that represents a docking between a complex receptor and a ligand. --- Attributes --- - receptor (MonomerReceptor): a Receptor object that represents a monomer receptor + receptor (ComplexReceptor): a Receptor object that represents a monomer receptor ligand (Ligand): a Ligand object that represents a ligand results_path (str): the file path to where the results are stored ligand_reserved (List[int]): a list of line numbers, one for each solution, - the indicates where the "Docked ligand" section begins + which indicates where the "Docked ligand" section begins split_results (List[List[Tuple[int]]]): a list where each sublist is a chain, which contains a list of tuples. Each tuple indicates the line numbers of the start and end of that chain in a results file. @@ -165,22 +377,116 @@ class ComplexDocking(Docking): def __init__(self, receptor: ComplexReceptor, ligand: Ligand, results_path: str): super().__init__(receptor, ligand, results_path) - split_results = [] + self.split_results = [] def separate_results(self): - pass - - def ligand_reserved(self): - pass + """For each solution, record the start and end line number (0-based) of + each chain. Then, populate self.split_results with the final list. + + Each sublist represents one solution file. Each tuple in the sublist + contains the start and end of one chain. The order of the tuples in + the sublist is the same as the order of the monomers in the receptor's + monomers_list.""" - def result_dict_generator(self): - pass + results_files = os.listdir(self.results_path) + + all_chains = [] + + # for each solution + for file in results_files: + if file[-3:] != "pdb": + break + result_file = open(self.results_path + file) + + # this list contains indices of the start and end of each chain + line_numbers = [] + line = result_file.readline() + curr_line = 0 + prev = None + while line != '': + # the start of the first chain + if line.split()[0] == "ATOM" and line.split()[1] == "1": + # if line.startswith('ATOM 1 '): + prev = curr_line - 1 + + # the end of a chain + elif line[0:3] == 'TER': + line_numbers.append([prev + 1, curr_line]) + prev = curr_line + + # read next line + line = result_file.readline() + curr_line += 1 + + # populate split_results attribute + self.split_results = line_numbers def best_result(self): pass - def color_surfaces(self): - pass + def crte_receptor_dict(self, threshold): + all_monomers = [] + for i in range(len(self.receptor.monomers_list)): + ligand_res = {} + res_dict = self.result_dict_generator(i, threshold) + ligand_res[self.ligand.name] = res_dict + all_monomers.append({self.receptor.name + '_' + self.receptor.monomers_list[i] : ligand_res}) + return all_monomers + + def normalize_results(self, threshold): + min_values = [] + max_values = [] + abs_max = None + abs_min = None + all_monomers_dict = self.crte_receptor_dict(threshold) + for i in range(len(all_monomers_dict)): + monomer_dict = all_monomers_dict[i] + monomer_key = list(monomer_dict.keys())[0] + ligand_key = list(monomer_dict[monomer_key].keys())[0] + + inside_dict = monomer_dict[monomer_key][ligand_key] + + # To eliminate empty dictionaries that might cause division errors below + # normalized_mon_dicitonary calculations + if inside_dict == {}: + continue + else: + mini = min(inside_dict.values()) + maxi = max(inside_dict.values()) + + min_values.append(mini) + max_values.append(maxi) + + abs_max = max(max_values) + abs_min = min(min_values) + + print("This is the maximum value: ",abs_max, file=sys.stderr) + print("This is the minimum value: ",abs_min, file=sys.stderr) + + # Now looping through every monomer, and calculating every residue energy to be + # normalized by using absolute minimum and maximum. + all_normalized_results = {} + for i in range(len(all_monomers_dict)): + monomer_dict = all_monomers_dict[i] + monomer_key = list(monomer_dict.keys())[0] + ligand_key = list(monomer_dict[monomer_key].keys())[0] + + inside_dict = monomer_dict[monomer_key][ligand_key] + + normalized_mon_dict = {} + normalized_mon_dict[monomer_key] = {} + normalized_mon_dict[monomer_key][ligand_key] = {} + + # prevent substraction of equal values or values that doesn't make any sense in terms of accuracy + if abs_min == abs_max: + for k, v in inside_dict.items(): + normalized_mon_dict[monomer_key][ligand_key][k] = 1 + else: + for k, v in inside_dict.items(): + normalized_value = (v - abs_min) / (abs_max - abs_min) + normalized_mon_dict[monomer_key][ligand_key][k] = normalized_value + all_normalized_results.update(normalized_mon_dict) + return all_normalized_results class Docker: """A class that represents the controller to create docking pairs and carry @@ -190,11 +496,27 @@ class Docker: def start(receptor: str, ligand: str, docking_pdb_path: str): # create docking object + ct = datetime.datetime.now() + print("Starting the docking process at {}".format(ct)) docking = Docker.create_docking(receptor, ligand, docking_pdb_path) if docking is None: - return + results_path = docking_pdb_path + 'results/' + receptor + '_' + ligand + '_testing/' + with open(results_path + "final.json") as json_file: + final_json = json.load(json_file) + return final_json docking.hex_docking() + if isinstance(docking, ComplexDocking): + docking.separate_results() + docking.crte_ligand_reserved_attr() + normalized_results = docking.normalize_results(5) + new_json = docking.results_path + "final.json" + with open(new_json,'w') as file: + file.write(json.dumps(normalized_results)) + ct = datetime.datetime.now() + print("current time:-", ct) + return normalized_results + def create_receptor(receptor_name: str, receptor_file_path: str): with open(receptor_file_path) as f: @@ -235,7 +557,7 @@ def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str): # find receptor file and create receptor object receptor_folder = docking_pdb_path + 'results/receptor_to_dock' - receptor_found = False + receptor_file_found = False for receptor_file in os.listdir(receptor_folder): if receptor_file[0] != '.' and len(receptor_file.split('.')) == 2 and \ @@ -276,10 +598,38 @@ def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str): # print(receptor.name) # print(receptor.file_path) # receptor2 = Docker.create_receptor("8g2j", "/home/diennguyen/BAR_API/docking_test_pdbs/results/receptor_to_dock/8g2j.pdb") + # print(receptor2.line_numbers) # print(receptor2.name) # print(receptor2.file_path) # print(receptor2.monomers_list) - docking = Docker.create_docking("8g2j", "UPG", "/home/diennguyen/BAR_API/docking_test_pdbs/") - print(docking.results_path) - print(docking.receptor.file_path) - docking.hex_docking() \ No newline at end of file + # Load the instance from the file + # with open('/home/diennguyen/BAR_API/docking_test_pdbs/results/docking_test.pkl', 'rb') as file: + # docking = pickle.load(file) + # with open('/home/diennguyen/BAR_API/docking_test_pdbs/results/5gij_TDIF_docking_test.pkl', 'rb') as file: + # docking = pickle.load(file) + + # docking.ligand_reserved_list = docking.ligand_reserved() + + # docking = Docker.create_docking("8g2j", "UPG", "/home/diennguyen/BAR_API/docking_test_pdbs/") + # print(docking.results_path) + # print(docking.receptor.file_path) + # docking.hex_docking() + # docking.ligand_reserved() + # docking.separate_results() + # print(docking.split_results) + # # Save instance to file + # with open('/home/diennguyen/BAR_API/docking_test_pdbs/results/docking_test.pkl', 'wb') as file: + # pickle.dump(docking, file) + + # docking = Docker.create_docking("5gij_ATOM", "TDIF", "/home/diennguyen/BAR_API/docking_test_pdbs/") + # docking.hex_docking() + # with open('/home/diennguyen/BAR_API/docking_test_pdbs/results/5gij_TDIF_docking_test.pkl', 'wb') as file: + # pickle.dump(docking, file) + + # print(docking.receptor.monomers_list) + # print(docking.receptor.line_numbers) + # docking.ligand_reserved() + # print(docking.ligand_reserved_list) + # print(docking.normalize_results(5)) + print(Docker.start("8g2j", "UPG", "/home/diennguyen/BAR_API/docking_test_pdbs/")) + # print(Docker.start("5gij_ATOM", "TDIF", "/home/diennguyen/BAR_API/docking_test_pdbs/")) \ No newline at end of file From 9001a046bfffd613b9713710e89cf198ee988a62 Mon Sep 17 00:00:00 2001 From: Dien Nguyen Date: Thu, 18 Jan 2024 14:05:03 -0500 Subject: [PATCH 06/35] Fix styling issues and add more documentation --- api/utils/refactored_docking_utils.py | 227 ++++++++++++-------------- 1 file changed, 107 insertions(+), 120 deletions(-) diff --git a/api/utils/refactored_docking_utils.py b/api/utils/refactored_docking_utils.py index 32f80b7..d7b3176 100644 --- a/api/utils/refactored_docking_utils.py +++ b/api/utils/refactored_docking_utils.py @@ -3,7 +3,6 @@ import os import re import subprocess -import pickle import math import sys import json @@ -11,6 +10,7 @@ HEX_BIN_PATH = '/home/diennguyen/hex/bin/hex' + class Receptor(ABC): """An abstract class that represents a receptor @@ -23,6 +23,7 @@ def __init__(self, name: str, file_path: str): self.name = name self.file_path = file_path + class MonomerReceptor(Receptor): """ A class that represents a receptor that is a monomer, meaning it consists of only one chain. @@ -54,6 +55,11 @@ def __init__(self, name: str, file_path: str, monomers_list: List[str]): self.line_numbers = self.separate_monomers() def separate_monomers(self): + """Returns a list of lists, where each sublist contains the line + numbers of the start and end of a monomer. + For example, receptor X has 3 chains in this order: A, B, C. + The method will return [[1, 6], [7, 9], [10, 15]]. + """ line_numbers = [] file = open(self.file_path, "r") line = file.readline() @@ -70,21 +76,22 @@ def separate_monomers(self): prev = curr_line curr_line += 1 line = file.readline() - + return line_numbers class Ligand: """A class that represents a ligand. - + --- Attributes --- - name (str): the name of the receptor + name (str): the name of the receptor file_path (str): the relative path to the receptors pdb file """ def __init__(self, name: str, file_path: str): self.name = name self.file_path = file_path + class Docking(ABC): """An abstract class that represents the docking between a receptor and a ligand. @@ -105,6 +112,8 @@ def __init__(self, receptor: Receptor, ligand: Ligand, results_path: str): self.ligand_reserved_list = [] def hex_docking(self): + """Run hex docking using the command line. + """ hex_output_file = open(self.results_path + 'hex_output.txt', "w") # Function to call Hex, including hard coded settings @@ -124,10 +133,9 @@ def hex_docking(self): activate_docking save_range 1 100 """ \ + self.results_path + """ %s pdb""" % (self.receptor.name + '_' + self.ligand.name) - print(hex_command) - subprocess.Popen(HEX_BIN_PATH, + subprocess.Popen(HEX_BIN_PATH, stdin=subprocess.PIPE, - stderr=subprocess.STDOUT, + stderr=subprocess.STDOUT, stdout=hex_output_file).communicate(bytes(hex_command.encode('utf-8'))) hex_output_file.close() ct = datetime.datetime.now() @@ -141,9 +149,6 @@ def crte_ligand_reserved_attr(self): For example, [1500, 1499, 1500] means that there are three solutions. In the first solution, the "Docked Ligand" section begins at line 1500. In the second solution, it begins at line 1499, and so on ... - - # TODO: Check if having the same chain name (e.g. C) in BOTH ligand - # and receptor will be a problem for execution. """ line_numbers = [] for filename in os.listdir(self.results_path): @@ -155,8 +160,13 @@ def crte_ligand_reserved_attr(self): line_numbers.append(i) break self.ligand_reserved_list = line_numbers - + def parse_hex_output(self): + """Returns a dictionary where the key is the cluster number and the + value is a list of solution numbers. One of the keys is "num_soln", + where its value is the total number of solutions. + For example: {num_soln : 5, 1 : [2, 4], 2 : [1, 3, 5]} + """ hex_output = open(self.results_path + 'hex_output.txt', "r") lines = hex_output.readlines() # line number where the clustering starts and ends @@ -184,23 +194,27 @@ def parse_hex_output(self): clusters[clst] = [sln] else: clusters[clst].append(sln) - return(clusters) - + return clusters def result_dict_generator(self, monomer_number, threshold): + """Return a dictionary where each key is a residue and each value is + the energy. The distance between each residue in the monomer and each + atom in the ligand is calculated, and only residues with distances + below the threshold are included. + """ receptor_file = open(self.receptor.file_path, "r") - if monomer_number != -1: # if -1, go to monomer logic + if monomer_number != -1: # if -1, go to monomer logic # get the start and end line numbers of the monomer in the receptor pdb monomer_start = self.receptor.line_numbers[monomer_number][0] monomer_end = self.receptor.line_numbers[monomer_number][1] # get the lines for that receptor only receptor_file_lines = receptor_file.readlines()[monomer_start:monomer_end] - else: # Monomer logic + else: # Monomer logic receptor_file_lines = receptor_file.readlines() - # Store every receptor's atom coordinates information as a nested + # Store every receptor's atom coordinates information as a nested # dictionary called 'reference' reference = {} for line in receptor_file_lines: @@ -214,7 +228,6 @@ def result_dict_generator(self, monomer_number, threshold): # here, the structure of the reference dict is is {residue: {atom_num :(x, y, z)}}, - # The energy for each reference element will be stored in dictionary 'ac' ac = {} result_list = [] @@ -222,15 +235,12 @@ def result_dict_generator(self, monomer_number, threshold): if filename[-3:] == 'pdb': result_list.append(filename) - lowest_en = None # to keep track of lowest energy - lowest_en_file = None # the file with the lowest energy - lowest_residue_list = None # list of residues of file with lowest energy + lowest_en = None # to keep track of lowest energy all_residue_list = [] cluster_dict = self.parse_hex_output() for i in range(len(result_list)): - print('current file: ' + result_list[i]) energy = '' # get the ligand_reserved section of the result file @@ -252,26 +262,25 @@ def result_dict_generator(self, monomer_number, threshold): # record values if lowest energy if lowest_en is None or energy < lowest_en: - lowest_en_file = result_list[i] lowest_en = energy elif line[:4] == 'ATOM': - # coordinates of one atom - coordinates = tuple(map(float, filter(None, line.split()[6:9]))) - coor.append(coordinates) + # coordinates of one atom + coordinates = tuple(map(float, filter(None, line.split()[6:9]))) + coor.append(coordinates) # each atom's coordinates is now stored in the list coordinates residue_set = set() - for res in reference.keys(): # for each amino acid in the receptor file: + for res in reference.keys(): # for each amino acid in the receptor file: distances = [] - - for atom in coor: # for each atom of the ligand - for aa in reference[res].keys(): # for each atom of that amino acid - # check if the distance between atoms of the ligands - # and of the amino acid are lower than chosen threshold (5) + + for atom in coor: # for each atom of the ligand + for aa in reference[res].keys(): # for each atom of that amino acid + # check if the distance between atoms of the ligands + # and of the amino acid are lower than chosen threshold (5) distance = math.sqrt(sum([(reference[res][aa][0] - atom[0]) ** 2, (reference[res][aa][1] - atom[1]) ** 2, (reference[res][aa][2] - atom[2]) ** 2])) - + distances.append(distance) # if at least one of the distances is lower than the threshold, otherwise skip @@ -285,7 +294,7 @@ def result_dict_generator(self, monomer_number, threshold): else: ac[res] = energy - # Store the resi number into set + # Store the resi number into set residue_set.add(res) all_residue_list.append(residue_set) @@ -300,9 +309,14 @@ def best_result(self): def crte_receptor_dict(self): pass + @abstractmethod + def normalize_results(self, threshold): + pass + + class MonomerDocking(Docking): """A class the represents a docking between a monomer receptor and a monomer. - + --- Attributes --- receptor (MonomerReceptor): a Receptor object that represents a monomer receptor ligand (Ligand): a Ligand object that represents a ligand @@ -318,14 +332,21 @@ def best_result(self): pass def crte_receptor_dict(self, threshold): + """"Return a dictionary that contains the residue-energy + dictionary of the monomer. This is not necessary, but maintains + consistency between monomer and complex receptor dictionaries. + """ receptor_res = {} res_dict = self.result_dict_generator(-1, threshold) ligand_res = {} ligand_res[self.ligand.name] = res_dict receptor_res[self.receptor.name] = ligand_res return receptor_res - + def normalize_results(self, threshold): + """Return normalized residue-energy dictionaries for the + receptor. + """ results_dict = self.crte_receptor_dict(threshold) receptor_key = list(results_dict.keys())[0] ligand_key = list(results_dict[receptor_key].keys())[0] @@ -334,15 +355,12 @@ def normalize_results(self, threshold): abs_max = None abs_min = None - # To eliminate empty dictionaries that might cause division errors below + # To eliminate empty dictionaries that might cause division errors below # normalized_mon_dicitonary calculations - if inside_dict != {}: + if inside_dict != {}: abs_min = min(inside_dict.values()) abs_max = max(inside_dict.values()) - print("This is the maximum value: ",abs_max, file=sys.stderr) - print("This is the minimum value: ",abs_min, file=sys.stderr) - all_normalized_results = {} normalized_mon_dict = {} @@ -363,7 +381,7 @@ def normalize_results(self, threshold): class ComplexDocking(Docking): """A class that represents a docking between a complex receptor and a ligand. - + --- Attributes --- receptor (ComplexReceptor): a Receptor object that represents a monomer receptor ligand (Ligand): a Ligand object that represents a ligand @@ -382,15 +400,13 @@ def __init__(self, receptor: ComplexReceptor, ligand: Ligand, results_path: str) def separate_results(self): """For each solution, record the start and end line number (0-based) of each chain. Then, populate self.split_results with the final list. - + Each sublist represents one solution file. Each tuple in the sublist contains the start and end of one chain. The order of the tuples in the sublist is the same as the order of the monomers in the receptor's - monomers_list.""" - + monomers_list. + """ results_files = os.listdir(self.results_path) - - all_chains = [] # for each solution for file in results_files: @@ -406,7 +422,7 @@ def separate_results(self): while line != '': # the start of the first chain if line.split()[0] == "ATOM" and line.split()[1] == "1": - # if line.startswith('ATOM 1 '): + # if line.startswith('ATOM 1 '): prev = curr_line - 1 # the end of a chain @@ -432,7 +448,7 @@ def crte_receptor_dict(self, threshold): ligand_res[self.ligand.name] = res_dict all_monomers.append({self.receptor.name + '_' + self.receptor.monomers_list[i] : ligand_res}) return all_monomers - + def normalize_results(self, threshold): min_values = [] max_values = [] @@ -446,24 +462,24 @@ def normalize_results(self, threshold): inside_dict = monomer_dict[monomer_key][ligand_key] - # To eliminate empty dictionaries that might cause division errors below + # To eliminate empty dictionaries that might cause division errors below # normalized_mon_dicitonary calculations - if inside_dict == {}: + if inside_dict == {}: continue else: mini = min(inside_dict.values()) maxi = max(inside_dict.values()) - + min_values.append(mini) max_values.append(maxi) - + abs_max = max(max_values) abs_min = min(min_values) - print("This is the maximum value: ",abs_max, file=sys.stderr) - print("This is the minimum value: ",abs_min, file=sys.stderr) - - # Now looping through every monomer, and calculating every residue energy to be + print("This is the maximum value: ", abs_max, file=sys.stderr) + print("This is the minimum value: ", abs_min, file=sys.stderr) + + # Now looping through every monomer, and calculating every residue energy to be # normalized by using absolute minimum and maximum. all_normalized_results = {} for i in range(len(all_monomers_dict)): @@ -488,54 +504,60 @@ def normalize_results(self, threshold): all_normalized_results.update(normalized_mon_dict) return all_normalized_results + class Docker: """A class that represents the controller to create docking pairs and carry - out the docking""" + out the docking. + """ @staticmethod def start(receptor: str, ligand: str, docking_pdb_path: str): - + """Start the docking process and analyze results. Return the + normalized residue-energyy dictionary. + """ # create docking object ct = datetime.datetime.now() print("Starting the docking process at {}".format(ct)) docking = Docker.create_docking(receptor, ligand, docking_pdb_path) if docking is None: - results_path = docking_pdb_path + 'results/' + receptor + '_' + ligand + '_testing/' + results_path = docking_pdb_path + 'results/' + receptor + '_' + ligand + '/' with open(results_path + "final.json") as json_file: final_json = json.load(json_file) return final_json - + docking.hex_docking() if isinstance(docking, ComplexDocking): docking.separate_results() docking.crte_ligand_reserved_attr() normalized_results = docking.normalize_results(5) new_json = docking.results_path + "final.json" - with open(new_json,'w') as file: + with open(new_json, 'w') as file: file.write(json.dumps(normalized_results)) ct = datetime.datetime.now() print("current time:-", ct) return normalized_results - def create_receptor(receptor_name: str, receptor_file_path: str): + """Return a new receptor with the name receptor_name, by parsing + the file at recepter_file_path. + """ with open(receptor_file_path) as f: is_monomer = True for line in f.readlines(): - if re.match(r'COMPND \d CHAIN: \w, \w*', line) != None: + if re.match(r'COMPND \d CHAIN: \w, \w*', line) is not None: is_monomer = False - #if the receptor would be a monomer the regex would be + # if the receptor would be a monomer the regex would be # r'COMPND \d CHAIN: \w;' - # To make a list of the monomers' labels + # To make a list of the monomers' labels print(receptor_name + ' identified as a protein complex') if line[11:16] == 'CHAIN': monomers_list = line.split(': ')[-1].split(', ') - # The COMPND line ends with ';' therefore it needs to be - # removed from the last label + # The COMPND line ends with ';' therefore it needs to be + # removed from the last label monomers_list[-1] = monomers_list[-1][0] - new_receptor = ComplexReceptor(receptor_name, - receptor_file_path, + new_receptor = ComplexReceptor(receptor_name, + receptor_file_path, monomers_list) return new_receptor print("Unknown pdb structure, need further investigation") @@ -544,25 +566,27 @@ def create_receptor(receptor_name: str, receptor_file_path: str): new_receptor = MonomerReceptor(receptor_name, receptor_file_path) return new_receptor - + def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str): - + """Return a docking pair, which contains a Receptor and a Ligand, as + specified by receptor_name and ligand_name, respectively. + """ # check that the docking combination has not been run before - results_path = docking_pdb_path + 'results/' + receptor_name + '_' + ligand_name + '_testing/' + results_path = docking_pdb_path + 'results/' + receptor_name + '_' + ligand_name + '/' if os.path.exists(results_path): print("The docking between {0} and {1} has already been done.".format(receptor_name, ligand_name)) return None - + os.makedirs(results_path) - + # find receptor file and create receptor object - receptor_folder = docking_pdb_path + 'results/receptor_to_dock' + receptor_folder = docking_pdb_path + 'results/receptor_to_dock' receptor_file_found = False for receptor_file in os.listdir(receptor_folder): if receptor_file[0] != '.' and len(receptor_file.split('.')) == 2 and \ - receptor_file.split('.')[1] == 'pdb' and \ - receptor_file[:-4].lower() == receptor_name.lower(): + receptor_file.split('.')[1] == 'pdb' and \ + receptor_file[:-4].lower() == receptor_name.lower(): receptor_file_found = True receptor_file_path = receptor_folder + '/' + receptor_file receptor = Docker.create_receptor(receptor_name, receptor_file_path) @@ -573,8 +597,8 @@ def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str): for ligand_file in os.listdir(ligand_folder): if ligand_file[0] != '.' and len(ligand_file.split('.')) == 2 and \ - ligand_file.split('.')[1] == 'pdb' and \ - ligand_file[:-4].lower() == ligand_name.lower(): + ligand_file.split('.')[1] == 'pdb' and \ + ligand_file[:-4].lower() == ligand_name.lower(): ligand_file_found = True ligand_file_path = ligand_folder + '/' + ligand_file ligand = Ligand(ligand_name, ligand_file_path) @@ -584,52 +608,15 @@ def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str): return elif not ligand_file_found: print("Ligand file not found") - return - + return + # receptor and ligand objects are created and ready for docking if isinstance(receptor, MonomerReceptor): docking = MonomerDocking(receptor, ligand, results_path) else: docking = ComplexDocking(receptor, ligand, results_path) return docking - + + if __name__ == "__main__": - # receptor = Docker.create_receptor("5gij_ATOM", "/home/diennguyen/BAR_API/docking_test_pdbs/results/receptor_to_dock/5gij_ATOM.pdb") - # print(receptor.name) - # print(receptor.file_path) - # receptor2 = Docker.create_receptor("8g2j", "/home/diennguyen/BAR_API/docking_test_pdbs/results/receptor_to_dock/8g2j.pdb") - # print(receptor2.line_numbers) - # print(receptor2.name) - # print(receptor2.file_path) - # print(receptor2.monomers_list) - # Load the instance from the file - # with open('/home/diennguyen/BAR_API/docking_test_pdbs/results/docking_test.pkl', 'rb') as file: - # docking = pickle.load(file) - # with open('/home/diennguyen/BAR_API/docking_test_pdbs/results/5gij_TDIF_docking_test.pkl', 'rb') as file: - # docking = pickle.load(file) - - # docking.ligand_reserved_list = docking.ligand_reserved() - - # docking = Docker.create_docking("8g2j", "UPG", "/home/diennguyen/BAR_API/docking_test_pdbs/") - # print(docking.results_path) - # print(docking.receptor.file_path) - # docking.hex_docking() - # docking.ligand_reserved() - # docking.separate_results() - # print(docking.split_results) - # # Save instance to file - # with open('/home/diennguyen/BAR_API/docking_test_pdbs/results/docking_test.pkl', 'wb') as file: - # pickle.dump(docking, file) - - # docking = Docker.create_docking("5gij_ATOM", "TDIF", "/home/diennguyen/BAR_API/docking_test_pdbs/") - # docking.hex_docking() - # with open('/home/diennguyen/BAR_API/docking_test_pdbs/results/5gij_TDIF_docking_test.pkl', 'wb') as file: - # pickle.dump(docking, file) - - # print(docking.receptor.monomers_list) - # print(docking.receptor.line_numbers) - # docking.ligand_reserved() - # print(docking.ligand_reserved_list) - # print(docking.normalize_results(5)) print(Docker.start("8g2j", "UPG", "/home/diennguyen/BAR_API/docking_test_pdbs/")) - # print(Docker.start("5gij_ATOM", "TDIF", "/home/diennguyen/BAR_API/docking_test_pdbs/")) \ No newline at end of file From 4195dc1f327bd71cd65383fc709cd82b5267d1e7 Mon Sep 17 00:00:00 2001 From: Dien Nguyen Date: Thu, 1 Feb 2024 15:57:55 -0500 Subject: [PATCH 07/35] change paths to match the BAR's file structure --- api/resources/snps.py | 54 +++++++++++---------------- api/utils/refactored_docking_utils.py | 50 ++++++++++++++++--------- 2 files changed, 54 insertions(+), 50 deletions(-) mode change 100644 => 100755 api/resources/snps.py mode change 100644 => 100755 api/utils/refactored_docking_utils.py diff --git a/api/resources/snps.py b/api/resources/snps.py old mode 100644 new mode 100755 index fc9c0a4..d721be0 --- a/api/resources/snps.py +++ b/api/resources/snps.py @@ -26,7 +26,7 @@ from api.utils.hotspot_utils import HotspotUtils import sys from api import db, cache, limiter -from api.utils.docking_utils import Protein_Docking +from api.utils.refactored_docking_utils import Docker snps = Namespace("SNPs", description="Information about SNPs", path="/snps") @@ -47,45 +47,32 @@ default="None", ) + @snps.route("/docking//") class Docking(Resource): + decorators = [limiter.limit("2/minute")] + @snps.param("receptor", _in="path", default="bri1") @snps.param("ligand", _in="path", default="brass") def get(self, receptor, ligand): - # receptor= escape(receptor) - # ligand = escape(ligand) - - #arabidopsis_pdb_path = "/var/www/html/eplant_legacy/java/Phyre2-Models/Phyre2_" - #poplar_pdb_path = "/var/www/html/eplant_poplar/pdb/" - #tomato_pdb_path = "/var/www/html/eplant_tomato/pdbc/" - #docking_pdb_link = "//bar.utoronto.ca/docking-pdbs/" - #docking_pdb_path = "/var/www/html/docking-pdbs/" - #arabidopsis_pdb_path = "/home/metyumelkonyan/BCB330/results/receptor_to_dock" - #poplar_pdb_path = "/home/metyumelkonyan/BCB330/results/receptor_to_dock" - #tomato_pdb_path = "/home/metyumelkonyan/BCB330/results/receptor_to_dock" + receptor = escape(receptor) + ligand = escape(ligand) + + # TODO: Clean comments left by metyu before commit + docking_pdb_link = "//bar.utoronto.ca/docking-pdbs/" - docking_pdb_path = "/home/diennguyen/BAR_API/docking_test_pdbs" + docking_pdb_path = "/DATA/HEX_API/RESULTS/" + # TODO: Then add regex check to receptors/ligands (For Arabidopsis genes, simply reuse + # is_arabidopsis_gene_valid; but you will need make regex check for your SDFs) #Receptors can be adjusted please adjust the file format on the directories as well (sdf vs pdb) - # receptor = "3riz" - # ligand = "TDR" - # receptor = "5gij_ATOM" - # ligand = "TDIF" - - # if BARUtils.is_arabidopsis_gene_valid(receptor_pdb): - # receptor_pdb_path = arabidopsis_pdb_path + \ - # receptor_pdb.upper() + ".pdb" - # elif BARUtils.is_poplar_gene_valid(receptor_pdb): - # receptor_pdb_path = ( - # poplar_pdb_path + BARUtils.format_poplar( - # receptor_pdb) + ".pdb" - # ) - # elif BARUtils.is_tomato_gene_valid(receptor_pdb, True): - # receptor_pdb_path = tomato_pdb_path + receptor_pdb.capitalize() + ".pdb" - # else: - # return BARUtils.error_exit("Invalid receptor pdb gene id"), 400 - - #ligand_sdf_path = "/home/yyu/public_html/library" + ligand + ".pdb" + + if not BARUtils.is_arabidopsis_gene_valid(receptor): + return BARUtils.error_exit("Invalid arapbidopsis pdb gene id"), 400 + + matched = re.search("[a-z]", ligand) + if matched is None: + return BARUtils.error_exit("Invalid ligand name"), 400 docking_file_name = receptor.upper() + "-" + ligand.upper() + \ "-docking0001.pdb " @@ -93,7 +80,8 @@ def get(self, receptor, ligand): # Importing start function to initiate docking_utils file - Protein_Docking.start(receptor,ligand,docking_pdb_path) + final_json = Docker.start(receptor, ligand, docking_pdb_path) + return BARUtils.success_exit(final_json) @snps.route("/phenix//") diff --git a/api/utils/refactored_docking_utils.py b/api/utils/refactored_docking_utils.py old mode 100644 new mode 100755 index d7b3176..ab7d15d --- a/api/utils/refactored_docking_utils.py +++ b/api/utils/refactored_docking_utils.py @@ -8,7 +8,7 @@ import json import datetime -HEX_BIN_PATH = '/home/diennguyen/hex/bin/hex' +HEX_BIN_PATH = '/usr/local/bin/hex/bin/hex' class Receptor(ABC): @@ -520,10 +520,15 @@ def start(receptor: str, ligand: str, docking_pdb_path: str): print("Starting the docking process at {}".format(ct)) docking = Docker.create_docking(receptor, ligand, docking_pdb_path) if docking is None: - results_path = docking_pdb_path + 'results/' + receptor + '_' + ligand + '/' + receptor = receptor.split('.')[0] + results_path = docking_pdb_path + receptor + '_' + ligand + '/' with open(results_path + "final.json") as json_file: final_json = json.load(json_file) return final_json + elif docking == "Receptor file not found": + return "Receptor file not found" + elif docking == "Ligand file not found": + return "Ligand file not found" docking.hex_docking() if isinstance(docking, ComplexDocking): @@ -572,43 +577,52 @@ def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str): specified by receptor_name and ligand_name, respectively. """ # check that the docking combination has not been run before - results_path = docking_pdb_path + 'results/' + receptor_name + '_' + ligand_name + '/' - if os.path.exists(results_path): - print("The docking between {0} and {1} has already been done.".format(receptor_name, ligand_name)) + # results_path = docking_pdb_path + 'RESULTS/' + receptor_name + '_' + ligand_name + '/' + if '.' in receptor_name: + receptor_name = receptor_name[:receptor_name.index('.')] + results_path = docking_pdb_path + receptor_name + '_' + ligand_name + '/' + print(results_path) + if os.path.exists(results_path): #or \ + #os.path.exists(docking_pdb_path + receptor_name + '.1_' + ligand_name + '/'): + print("The docking between {0} and {1} has already been done.".format(receptor_name, + ligand_name)) return None + os.makedirs(results_path) # find receptor file and create receptor object - receptor_folder = docking_pdb_path + 'results/receptor_to_dock' + receptor_folder = '/DATA/AF2-pdbs/Arabidopsis/AF2_Ath_PDBs_FAs_renamed/' + # receptor_folder = '/var/www/html/eplant/AF2_Ath_PDBs' receptor_file_found = False for receptor_file in os.listdir(receptor_folder): - if receptor_file[0] != '.' and len(receptor_file.split('.')) == 2 and \ - receptor_file.split('.')[1] == 'pdb' and \ - receptor_file[:-4].lower() == receptor_name.lower(): + # if receptor_file[0] != '.' and len(receptor_file.split('.')) == 2 and \ + # receptor_file[-4:] == 'pdb' and \ + # receptor_file[:-4].lower() == receptor_name.lower(): + if receptor_file[0] != '.' and receptor_file[-4:] == '.pdb' and \ + (receptor_name in receptor_file): receptor_file_found = True - receptor_file_path = receptor_folder + '/' + receptor_file + receptor_file_path = receptor_folder + receptor_file receptor = Docker.create_receptor(receptor_name, receptor_file_path) # find ligand file and create ligand object - ligand_folder = docking_pdb_path + 'results/ligand_to_dock' + # ligand_folder = docking_pdb_path + 'HEX_SELECTED_LIGANDS/' + ligand_folder = '/DATA/HEX_API/HEX_SELECTED_LIGANDS/' ligand_file_found = False for ligand_file in os.listdir(ligand_folder): if ligand_file[0] != '.' and len(ligand_file.split('.')) == 2 and \ - ligand_file.split('.')[1] == 'pdb' and \ + ligand_file.split('.')[1] == 'sdf' and \ ligand_file[:-4].lower() == ligand_name.lower(): ligand_file_found = True ligand_file_path = ligand_folder + '/' + ligand_file ligand = Ligand(ligand_name, ligand_file_path) if not receptor_file_found: - print("Receptor file not found") - return + return "Receptor file not found" elif not ligand_file_found: - print("Ligand file not found") - return + return "Ligand file not found" # receptor and ligand objects are created and ready for docking if isinstance(receptor, MonomerReceptor): @@ -619,4 +633,6 @@ def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str): if __name__ == "__main__": - print(Docker.start("8g2j", "UPG", "/home/diennguyen/BAR_API/docking_test_pdbs/")) + # print(Docker.start("8g2j", "UPG", "/DATA/HEX_API/")) + print(Docker.start("AT1G66340", "6325_Ethylene", "/DATA/HEX_API/RESULTS/")) + From 6756ff473ba435877b32354641733d33f5d2b946 Mon Sep 17 00:00:00 2001 From: Dien Nguyen Date: Thu, 1 Feb 2024 16:05:42 -0500 Subject: [PATCH 08/35] add code for mapping sdf names to number --- api/utils/sdf_mapping.py | 49 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 api/utils/sdf_mapping.py diff --git a/api/utils/sdf_mapping.py b/api/utils/sdf_mapping.py new file mode 100644 index 0000000..9bf2033 --- /dev/null +++ b/api/utils/sdf_mapping.py @@ -0,0 +1,49 @@ +import os +import re +from typing import List + +def get_substance_name(filename: str, folder_path: str): + file = open(folder_path + filename, "r") + line = file.readline().strip() + if line == "": + return None + while line != "> " and line != "$$$$": + line = file.readline().strip() + # right now, line == "> " or line is empty + if line == "$$$$": + return None + line = file.readline().strip() + names = [] + while line != "": + if len(line) > 0 and line[0] == ">": + break + # while line != "": + # check regex to see if it contains lowercase + # matched = re.search("[a-z]", line) + # if matched is not None: + # return line + # else: + # line = file.readline().strip() + names.append(line) + line = file.readline().strip() + return names + +def create_mapping(folder_path: str): + mapped_sdf = {} + sdf_files = os.listdir(folder_path) + for file in sdf_files: + if file[0] != "." and file[-4:] == ".sdf": + file_number = file[:file.index("_")] + name = file[file.index("_") + 1:-4] + # the commented out section is for sdfs that have not been filtered + # names = get_substance_name(file, folder_path) + # print(name) + # sdf_number = file.split(".")[0] + # mapped_sdf[sdf_number] = ",".join(names) + mapped_sdf[file_number] = name # check if want to map file_number or file name + return mapped_sdf + +if __name__ == "__main__": + sdf_folder_paths = ['/home/diennguyen/BAR_API/HEX_API/HEX_SMALL_MOLECULES'] + print(create_mapping(sdf_folder_paths[0])) + From 740cce9b59ba4c94fa399050cc3410c1dface6da Mon Sep 17 00:00:00 2001 From: Dien Nguyen Date: Thu, 1 Feb 2024 22:49:38 -0500 Subject: [PATCH 09/35] Replace docking_utils.py and remove old version --- api/resources/snps.py | 2 +- api/utils/docking_utils.py | 1175 +++++++++++++++++++----------------- 2 files changed, 636 insertions(+), 541 deletions(-) diff --git a/api/resources/snps.py b/api/resources/snps.py index d721be0..aa5616e 100755 --- a/api/resources/snps.py +++ b/api/resources/snps.py @@ -26,7 +26,7 @@ from api.utils.hotspot_utils import HotspotUtils import sys from api import db, cache, limiter -from api.utils.refactored_docking_utils import Docker +from api.utils.docking_utils import Docker snps = Namespace("SNPs", description="Information about SNPs", path="/snps") diff --git a/api/utils/docking_utils.py b/api/utils/docking_utils.py index cd1d604..ab7d15d 100755 --- a/api/utils/docking_utils.py +++ b/api/utils/docking_utils.py @@ -1,543 +1,638 @@ -from flask import Flask, flash, request, redirect, url_for, send_from_directory -from flask_restx import Api -from flask.templating import render_template -from werkzeug.utils import secure_filename -import re +from abc import ABC, abstractmethod +from typing import List import os -import math -import shutil -import json +import re import subprocess -import random +import math import sys -from datetime import date - -HEX_BIN_PATH = '/home/diennguyen/hex/bin/hex' - -def hex_docking(rec_lig,rec_lig2,receptor, ligand, docking_pdb_path): - - hex_output = open(docking_pdb_path + "/results/" + rec_lig + - "/{}_hex_output.txt".format(rec_lig), "w") - -# Function to call Hex, including hard coded settings - -# max_docking_solutions set at 5 for testing - code = """ open_receptor """ + docking_pdb_path + """/results/receptor_to_dock/""" + receptor + """.pdb -open_ligand """ + docking_pdb_path +"""/results/ligand_to_dock/""" + ligand + """.pdb -docking_correlation 1 -docking_score_threshold 0 -max_docking_solutions 25 -docking_receptor_stepsize 5.50 -docking_ligand_stepsize 5.50 -docking_alpha_stepsize 2.80 -docking_main_scan 16 -receptor_origin C-825:VAL-O -commit_edits -activate_docking -save_range 1 100 """ + docking_pdb_path + """/results/%s/%s/result %s pdb""" % (rec_lig, rec_lig2, rec_lig) - subprocess.Popen(HEX_BIN_PATH, stdin=subprocess.PIPE, stderr=subprocess.STDOUT, stdout=hex_output).communicate(bytes(code.encode('utf-8'))) - hex_output.close() - - - - -def best_result(file_name, monomer, rec_lig, receptor, ligand, docking_pdb_path): - - # Function to generate the "best docking results", being the result with the best score and with the residue with the best contact frequency - - file_name_dir = str(docking_pdb_path + '/results/'+ receptor + '_' + ligand + '/' + receptor + '_' + monomer + '_' + ligand + '/result/') #directory for the docking results - file_name_path = str(file_name_dir + file_name[:-20] + '.pdb') #directory for the result, identifies as the best result - des1 = file_name_dir + 'best_docking_results_for_'+ file_name[:-24] + '.pdb' #destination directory for the best_docking_result file - shutil.copyfile(file_name_path,des1) - - #Same thing done with the ligand file only - ori2 = docking_pdb_path + '/results/'+ receptor + '_' + ligand + '/' + receptor + '_' + monomer + '_' + ligand + '/ligand_reserved_pdb/' + file_name - des2 = docking_pdb_path + '/results/'+ receptor + '_' + ligand + '/' + receptor + '_' + monomer + '_' + ligand + '/ligand_reserved_pdb/best_docking_results.pdb' - shutil.copyfile(ori2,des2) - - - # This is to create a copy of that file with 'Z' as the name of the chain in the ligand, - # it is important for the 3dsjmol visualization - - with open(str(file_name_dir + 'best_docking_results_for_' + file_name[:-24] + '.pdb'), 'r') as file: #to not modify the chain name for the protein chains - lines = file.readlines() - subpart1 = lines[:lines.index( - 'REMARK Docked ligand coordinates...\n')] #subpart 1 is from start to 1st line in ligand coordinates - subpart2 = lines[lines.index( - 'REMARK Docked ligand coordinates...\n'):] #subpart 2 from 1st line in ligand coordinates to end of file - with open(str(file_name_dir + 'best_docking_results_for_' + receptor + '_' + monomer + '_' + ligand + '.pdb'), 'w') as file: - for l in subpart1: - file.write(l) - for line in subpart2: - if line[0:4] == 'ATOM' or line[:6] == 'HETATM' or line[:3] == 'TER': - newline = line[:21] + 'Z' + line[22:] - file.write(newline) - else: - file.write(line) - print('best docking result file is generated for ' + file_name[:-24]) - - - - -def separate_results(monomer, file_dir, first_file_name, dir_final, monomers_list): - - # Function to separate the multimer file into its monomers for every result file created by hex - - ends = [] #this list will be modified with the indices of every monomer's terminal line + the first coordinate's line index - # Open the .pdb file to separate - with open (file_dir + first_file_name, 'r+') as r: - lines = r.readlines() - for l in lines: - if l.startswith('ATOM 1 '): - ends.append(lines.index(l)) #and save the index of the first coordinate's line in the list ends - - # Searches the .pdb files for the lines that indicate the end of a chain - for l in lines: - if l[0:3] == 'TER': - ends.append(lines.index(l)) #and add their indexes in the ends list - - if os.path.isdir(dir_final) == False: #create folder to dump the new monomer file or files - os.makedirs(dir_final) - - # LOGIC:The end of the previous chain is the start of the current one, - start_pos = ends[monomers_list.index(monomer)] - end_pos = ends[monomers_list.index(monomer)+1] - - # It copies every line that is not referencing an atom coordinates - # or that it is in the range of the monomer we want to isolate - file_list = os.listdir(file_dir) - for r in file_list: #for every result file: - file_path = str(file_dir + '/' + r) - new_file_path = str(dir_final + r[:-4] + '_' + monomer + '.pdb') #create a new result file which will include only one protein chain, not all - with open(file_path, 'r') as file: - lines = [line for line in file.readlines()] - # Dump in the new file everything before the first coordinate line + between the lines that contain - # the monomer coordinates + after the last receptor's coordinates - lines = lines[:ends[1]] + lines[start_pos:end_pos] + lines[ends[-1]:] - with open(new_file_path, 'w') as file: - file.writelines(lines) - - - - -def separate_monomers(monomer, file_dir, file_name, dir_final, monomers_list): - - # Function to separate the original protein pdb file in its monomers - - # Open the .pdb file to separate - with open (file_dir + '/' + file_name + '.pdb', 'r+') as r: - lines = r.readlines() - ends = [0] # ends contains all line numbers of "TER" - - # Searches the .pdb files for the lines that indicate the end of a chain - for l in lines: - if l[0:3] == 'TER': - ends.append(lines.index(l)) - if os.path.isdir(dir_final) == False: - os.makedirs(dir_final) - monomer_pdb = open(dir_final + '/' + file_name + '_' + monomer + '.pdb', 'a+') - - - # The end of the previous chain is the start of the current one, - # 0 was previously included in the list ends to be the start of the first chain - start_pos = ends[monomers_list.index(monomer)] - end_pos = ends[monomers_list.index(monomer)+1] - - # It copies every line that is not referencing an atom coordinates - # or that it is in the range of the monomer we want to isolate - for l in lines: - if l[0:4] != 'ATOM' or lines.index(l) in range(start_pos, end_pos): - monomer_pdb.write(l) - # It needs to copy also the ligand data (if there is any) which is labeled with SDF - elif l[17:20] == 'SDF': - monomer_pdb.write(l) - - - - - -def ligand_reserved(monomer, rec_lig, receptor, ligand,docking_pdb_path): - - # Function to separate the ligand coordinates of every solution, it's useful to simply the calculation of the contact frequencies - - dir_path = str(docking_pdb_path + '/results/'+ rec_lig + '/' + receptor + '_' + monomer + '_' + ligand + '/result') #results directory - print('Isolating ' + rec_lig + '_' + monomer) - - os.makedirs(docking_pdb_path + '/results/'+ rec_lig + '/' + receptor + '_' + monomer + '_' + ligand + '/ligand_reserved_pdb') #ligand_reserved directory - file_list = os.listdir(dir_path) - result_list = [] - - # Some operative system will create hidden files, the script consider .pdb files only - for i in file_list: - if i[0] != '.' and len(i.split('.')) == 2 and i.split('.')[1] == 'pdb': - result_list.append(i) - for r in result_list: - file_path = str(dir_path + '/' + r) - ligand_reserved_file_path = str(docking_pdb_path + '/results/'+ rec_lig + '/' + receptor + '_' + monomer + '_' + ligand + '/ligand_reserved_pdb/' + r[:-4] + '_ligand_reserved.pdb') - with open(file_path, 'r') as file: - lines = [line for line in file.readlines()] - # Everything below the line 'REMARK Docked ligand coordinates...' is data of the ligand - lines = lines[lines.index('REMARK Docked ligand coordinates...\n'):] - with open(ligand_reserved_file_path, 'w') as file: - file.writelines(lines) - - - - - -def result_dict_generator(threshold, monomer, rec_lig, receptor, ligand, docking_pdb_path): - - # Function to calculate the contact frequencies of every amino acid - - result_dir_path = str(docking_pdb_path + '/results/'+ rec_lig + '/' + receptor + '_' + monomer + '_' + ligand + '/ligand_reserved_pdb/') #directory for the results files, the ligand only ones we created with the ligand_reserved function! - receptor_file_path = str(docking_pdb_path + '/results/receptor_to_dock/monomers/'+ receptor + '_' + monomer + '.pdb') #directory for the receptor protein pdb file - - # Store every receptor's atom coordinates information as a nested dictionary called 'reference' - with open(receptor_file_path, 'r') as file: - reference = {} - for line in file.readlines(): - if line[0:4] == 'ATOM': - if int(line[22:27]) in reference: - reference[int(line[22:27])][int(line[6:11])] = tuple(map(float, filter(None, line[31:54].split(' ')))) - else: - reference[int(line[22:27])] = {int(line[6:11]) : tuple(map(float, filter(None, line[31:54].split(' '))))} - - #so the reference is {residue: {atom :(x, y, z)}} - - # The energy for each reference element will be stored in dictionary 'ac' - ac = {} - file_list = os.listdir(result_dir_path) - result_list = [] - - # Generate the list for all .pdb names in the directory - for i in file_list: - if i[0] != '.' and len(i.split('.')) == 2 and i.split('.')[1] == 'pdb': - result_list.append(i) - - en_list = [] #future list of energies - file_names = [] #future list of file names - resi_list = [] #future list of aa - - #reading the first file and saving its lines will make things much quicker for the rest of them - first_file_path = str(result_dir_path + receptor + '_' + ligand + '0001_' + monomer + '_ligand_reserved.pdb') - z=open(first_file_path) - lines_first=z.readlines() - x=lines_first[2] - print (x) - - - # Store energy values for each ligand_reserved file - for r in result_list: - print('current file:' + r) - energy = '' - file_path = str(result_dir_path + r) - - with open(file_path) as file: - lines = file.readlines() - for l in lines: - if 'REMARK' in l.split(' ') and 'Energy' in l.split(' '): - # The energy is divided by the number of results to - # later obtain an average energy when we will sum the - energy = (float(l.split(' ')[6][:-1]))/(len(result_list)) - # Generate file and energy list by order - file_names.append(str(r)) - en_list.append(energy) - - # Go over every coordinate of atoms in the ligand_reserved file and store into coor - coor = [tuple(map(float, filter(None, line[31:54].split(' ')))) - for line in lines if line[0:4] == 'ATOM'] - lst = [] - - for res in reference.keys(): # for each amino acid in the receptor file: - distances = [] - - for atom in coor: # for each atom of the ligand - - for aa in reference[res].keys(): # for each atom of that amino acid - # check if the distance between atoms of the ligands - # and of the amino acid are lower than chosen threshold (5) - distances.append(math.sqrt((reference[res][aa][0] - atom[0]) ** 2 + (reference[res][aa][1] - atom[1])** 2 - + (reference[res][aa][2] - atom[2]) ** 2)) - - if all(d >= threshold for d in distances): #if none of the distances is lower than the threshold, skip - continue - - else: # if at least one distance is lower then add this aminoacid to the ac dict - if res in ac.keys(): - ac[res] += energy # adding energy (previosly divided by the number of results) more times if - else: # found multiple times, that way you would have an average - ac[res] = energy - - # Store the resi number into lst - if res not in lst: - lst.append(res) - # Store rei_num for one file into resi_list as a list - resi_list.append(lst) - - - - best_result_name = '' - # Find the resi number with the lowest energy - red_resi = '' - for k, v in ac.items(): - if v == min(ac.values()): - red_resi = k - print('best_residue: ' + str(red_resi)) - - # Find the file that both satisfies the lowest energy and containing the lowest energy resi - max_en = 0 - for f in file_names: - if en_list[file_names.index(f)] <= max_en: - temp = resi_list[file_names.index(f)] - for i in temp: - if i == red_resi: - best_result_name = f - - - res_dict_path = result_dir_path + 'res_dict.json' - - # Use the result file from /result/, change the name to best docking result, and convert it into chain Z - try: - best_result(best_result_name, monomer, rec_lig, receptor, ligand, docking_pdb_path) - # sometimes the simulations results are not good enough to satisfy both requirements, - # it's common especially when one monomer is never close to the ligand. - # Not including this line would stop an otherwise useful simulation - except FileNotFoundError: - f_file = receptor + '_' + ligand + '0001_' + monomer + '_ligand_reserved.pdb' - best_result(f_file, monomer, rec_lig, receptor, ligand, docking_pdb_path) - - print(ac) - - with open(res_dict_path, 'w') as file: - file.write(json.dumps(ac)) - print('res_dict.json is generated') - return ac - - -def parse_hex_output(rec_lig, docking_pdb_path): - hex_output = open(docking_pdb_path + "/results/" + rec_lig + - "/{}_hex_output.txt".format(rec_lig), "r") - lines = hex_output.readlines() - result_start = 0 - result_end = 0 - for i in range(len(lines)): - splitted_line = lines[i].split(" ") - if len(splitted_line) > 8 and splitted_line[0] == "Clst": - result_start = i + 2 - if len(splitted_line) > 2 and splitted_line[1] == "save_range": - result_end = i - 2 - clustering_lines = lines[result_start:result_end] - clusters = {} - for line in clustering_lines: - cleaned_line = line.strip().split(" ") - res = [] - for ch in cleaned_line: - if ch != "": - res.append(ch) - clst = int(res[0]) - sln = int(res[1]) - if clst not in clusters: - clusters[clst] = [sln] - else: - clusters[clst].append(sln) - return(clusters) - - -def color_surfaces(monomer, receptor, ligand, rec_lig, docking_pdb_path): - - # Function to create the nested dictionary with every monomer as key with value a dictionary with its amino acids as keys and contact frequencies as values - - result_dict = {} #this will be the dictionary - - folder_name = str(receptor + '_' + monomer + '_' + ligand) - - if receptor + '_' + monomer not in result_dict.keys(): - result_dict[receptor + '_' + monomer] = {} - if os.path.isfile(docking_pdb_path + '/results/' + rec_lig + '/' + folder_name + '/ligand_reserved_pdb/res_dict.json') == False: - result_dict[receptor+ '_' + monomer][ligand] = result_dict_generator(5, monomer, rec_lig, receptor, ligand, docking_pdb_path) - else: - result_dict[receptor+ '_' + monomer][ligand] = eval( - open(docking_pdb_path + '/results/' + rec_lig + '/' + folder_name + '/ligand_reserved_pdb/res_dict.json', 'r').read()) - print('res_dict.json previously exists and has read') - - resultjson_path = docking_pdb_path + '/results/' + rec_lig + '/' + folder_name + '/results.json' - - # Initialize results.json - ini = {} - with open(resultjson_path, 'w') as file: - file.write(json.dumps(ini)) - results = {} - for r in result_dict: #result_dict is where we have our contact freuquencies - if r in results.keys(): - for v in result_dict[r]: - results[r][v] == result_dict[r][v] - else: - results[r] = result_dict[r] - with open(resultjson_path, 'w') as file: - file.write(json.dumps(results)) - print('result.json is finished') - - - - - -def pipeline(rec_lig, is_monomer, receptor, ligand, monomers_list, docking_pdb_path): - - print('Current pair:' + rec_lig) - - today_dir = docking_pdb_path + '/results/' + rec_lig + '/' - - results_dir = today_dir + rec_lig + '/result/' - os.makedirs(results_dir) - - hex_docking(rec_lig, rec_lig, receptor, ligand,docking_pdb_path) # CALL HEX - - results_list = os.listdir(results_dir) - first_file_name = str(receptor + '_' + ligand + '0001.pdb') - - - # Repeats the analysis for every monomer in the receptor file - for monomer in monomers_list: - dir_final = today_dir + receptor + '_' + monomer + '_' + ligand + '/result/' - print('plotting monomer: ' + monomer + ' with the ligand: ' + ligand) - separate_results(monomer, results_dir, first_file_name, dir_final, monomers_list) - ligand_reserved(monomer, rec_lig, receptor, ligand,docking_pdb_path) - print('Ligands are now reserved in docking results.') - color_surfaces(monomer, receptor, ligand, rec_lig, docking_pdb_path) - #plot_frequencies(monomer) - - - -class Protein_Docking: - @staticmethod - def start(receptor,ligand,docking_pdb_path): - - # Check if the receptor is a monomer or a complex and save the receptor and ligand names as variables - - receptor_folder = docking_pdb_path + '/results/receptor_to_dock' - receptor_folder_list = os.listdir(receptor_folder) - ligand_folder = os.listdir(docking_pdb_path + '/results/ligand_to_dock') - - receptor_file_found = False - for rec in receptor_folder_list: - # There could be hidden files in the receptor or ligand directory so only consider pdb files - if rec[0] != '.' and len(rec.split('.')) == 2 and rec.split('.')[1] == 'pdb'\ - and rec[:-4].lower() == receptor.lower(): - receptor_file_found = True - receptor = rec[:-4] - # To check if the receptor is a monomer or not, the script will search the .pdb file - # for the line that indicated the presence of multiple chains, - with open(receptor_folder + '/' + rec, 'r+') as f: - is_monomer = True - for x in f.readlines(): - if re.match(r'COMPND \d CHAIN: \w, \w*', x) != None: - is_monomer = False - #if the receptor would be a monomer the regex would be r'COMPND \d CHAIN: \w;' - - # To make a list of the monomers' labels - print(receptor + ' identified as a protein complex') - if x[11:16] == 'CHAIN': - monomers_list = x.split(': ')[-1].split(', ') - # The COMPND line ends with ';' therefore it needs to be removed from the last label - monomers_list[-1] = monomers_list[-1][0] - break - - ligand_file_found = False - for lig in ligand_folder: - sys.stdout.write(lig) - if lig[0] != '.' and len(lig.split('.')) == 2 and lig.split('.')[1] == 'pdb'\ - and lig[:4].lower() == ligand.lower(): - ligand_file_found = True - #DO NOT USE PDB FOR LIGAND FILES, it is possible but it can lead to errors due to the missing hydrogens - ligand = lig[:-4] - break - - - ##TODO: Add block to raise error if receptor or ligand files are not found - - rec_lig = receptor + '_' + ligand - - #check if results folder already exists - results_path = docking_pdb_path + '/results/' + rec_lig - if not os.path.exists(results_path): - # To save the terminal output later (very important) - stdoutOrigin=sys.stdout - sys.stdout = open(docking_pdb_path + '/results/Terminal_recordings/' + rec_lig + '_' + str(date.today()) + '.txt' , "w") - - # Call to the pipeline with different parameters whether the receptor is a monomer or a complex - if is_monomer == False: - dir_final = docking_pdb_path + '/results/receptor_to_dock/monomers' - for monomer in monomers_list: - print('separating monomer: ' + monomer) - separate_monomers(monomer, receptor_folder, receptor, dir_final, monomers_list) # To separate the monomers in the multimer file - - pipeline(rec_lig, is_monomer, receptor, ligand, monomers_list,docking_pdb_path) - else: - dir_final = docking_pdb_path + '/results/receptor_to_dock/monomers' - monomers_list = ['monomer'] - separate_monomers('monomer', receptor_folder, receptor, dir_final, monomers_list) # To analyze the data from hex you still need to separate it. - # It allows to use the same functions in both cases - pipeline(rec_lig, is_monomer, receptor, ligand, monomers_list,docking_pdb_path) - - #To put together the json files with all the data from all monomers - new_json = docking_pdb_path + '/results/'+ rec_lig + '/' + '/final.json' - final_json = {} - min_values = [] - max_values = [] - abs_max = None - abs_min = None - - for monomer in monomers_list: - monomer_json = docking_pdb_path + '/results/' + rec_lig + '/' + str(receptor + '_' + monomer + '_' + ligand) +'/results.json' - with open(monomer_json, 'r') as file: - monomer_dict = json.load(file) - - monomer_key = list(monomer_dict.keys())[0] - ligand_key = list(monomer_dict[monomer_key].keys())[0] - - inside_dict = monomer_dict[monomer_key][ligand_key] - - # To eliminate empty dictionaries that might cause division errors below normalized_mon_dicitonary calculations - if inside_dict == {}: - continue - else: - mini = min(inside_dict.values()) - maxi = max(inside_dict.values()) - - min_values.append(mini) - max_values.append(maxi) - - abs_max = max(max_values) - abs_min = min(min_values) - - print("This is the maximum value: ",abs_max, file=sys.stderr) - print("This is the minimum value: ",abs_min, file=sys.stderr) - - #Now looping through every monomer, and calculating every residue energy to be normalized by using absolute minimum and maximum. - for monomer in monomers_list: - monomer_json = docking_pdb_path + '/results/' +rec_lig + '/' + str(receptor + '_' + monomer + '_' + ligand) +'/results.json' - with open(monomer_json, 'r') as file: - monomer_dict = json.load(file) - - monomer_key = list(monomer_dict.keys())[0] - ligand_key = list(monomer_dict[monomer_key].keys())[0] - - inside_dict = monomer_dict[monomer_key][ligand_key] - - # It is here to prevent substraction of equal values or values that doesn't make any sense in terms of accuracy - - if abs_min == abs_max : - normalized_mon_dict = {monomer_key:{ligand_key:{k:1 for k,v in inside_dict.items()}}} - final_json.update(normalized_mon_dict) - else: - normalized_mon_dict = {monomer_key:{ligand_key:{k:(v-abs_min)/(abs_max - abs_min) for k,v in inside_dict.items()}}} - final_json.update(normalized_mon_dict) - #Opening and writing new_json file that was directed to be final.json and was updated with normalization dictionary values - - with open(new_json,'w') as file: - file.write(json.dumps(final_json)) - print('Final json is finished') - print(new_json, file=sys.stderr) - sys.stdout.close() - else: - print("Docking has already been done on this protein-ligand.") +import json +import datetime + +HEX_BIN_PATH = '/usr/local/bin/hex/bin/hex' + + +class Receptor(ABC): + """An abstract class that represents a receptor + + --- Attributes --- + name (str): the name of the receptor + file_path (str): the relative path to the receptors pdb file + """ + @abstractmethod + def __init__(self, name: str, file_path: str): + self.name = name + self.file_path = file_path + + +class MonomerReceptor(Receptor): + """ A class that represents a receptor that is a monomer, meaning it consists + of only one chain. + + --- Attributes --- + name (str): the name of the receptor + file_path (str): the relative path to the receptors pdb file + """ + name: str + file_path: str + + def __init__(self, name, file_path): + super().__init__(name, file_path) + + +class ComplexReceptor(Receptor): + """ A class that represents a receptor that is a complex, meaning it consists + of more than one chain. + + --- Attributes --- + name (str): the name of the receptor + file_path (str): the relative path to the receptors pdb file + monomer_list (List[str]): the list of monomers that make up the complex + line_numbers (List[List[int]]): the list of line numbers that separate the monomers, e.g. [[100,200],[300,500]] + """ + def __init__(self, name: str, file_path: str, monomers_list: List[str]): + super().__init__(name, file_path) + self.monomers_list = monomers_list + self.line_numbers = self.separate_monomers() + + def separate_monomers(self): + """Returns a list of lists, where each sublist contains the line + numbers of the start and end of a monomer. + For example, receptor X has 3 chains in this order: A, B, C. + The method will return [[1, 6], [7, 9], [10, 15]]. + """ + line_numbers = [] + file = open(self.file_path, "r") + line = file.readline() + prev = None + curr_line = 0 + while line != '': + # the first line of the first monomer + if line[:12] == "ATOM 1 ": + prev = curr_line - 1 + # the last line of a monomer + elif line[:3] == 'TER': + # line_numbers.append(curr_line) + line_numbers.append([prev + 1, curr_line]) + prev = curr_line + curr_line += 1 + line = file.readline() + + return line_numbers + + +class Ligand: + """A class that represents a ligand. + + --- Attributes --- + name (str): the name of the receptor + file_path (str): the relative path to the receptors pdb file + """ + def __init__(self, name: str, file_path: str): + self.name = name + self.file_path = file_path + + +class Docking(ABC): + """An abstract class that represents the docking between a receptor and a + ligand. + + --- Attributes --- + receptor (Receptor): a Receptor object that represents a receptor + ligand (Ligand): a Ligand object that represents a ligand + results_path (str): the file path to where the results are stored + ligand_reserved_list (List[int]): a list of line numbers, one for each solution, + the indicates where the "Docked ligand" section begins + """ + + @abstractmethod + def __init__(self, receptor: Receptor, ligand: Ligand, results_path: str): + self.receptor = receptor + self.ligand = ligand + self.results_path = results_path + self.ligand_reserved_list = [] + + def hex_docking(self): + """Run hex docking using the command line. + """ + hex_output_file = open(self.results_path + 'hex_output.txt', "w") + + # Function to call Hex, including hard coded settings + + # max_docking_solutions set at 5 for testing + hex_command = """ open_receptor """ + self.receptor.file_path + """ + open_ligand """ + self.ligand.file_path + """ + docking_correlation 1 + docking_score_threshold 0 + max_docking_solutions 25 + docking_receptor_stepsize 5.50 + docking_ligand_stepsize 5.50 + docking_alpha_stepsize 2.80 + docking_main_scan 16 + receptor_origin C-825:VAL-O + commit_edits + activate_docking + save_range 1 100 """ \ + + self.results_path + """ %s pdb""" % (self.receptor.name + '_' + self.ligand.name) + subprocess.Popen(HEX_BIN_PATH, + stdin=subprocess.PIPE, + stderr=subprocess.STDOUT, + stdout=hex_output_file).communicate(bytes(hex_command.encode('utf-8'))) + hex_output_file.close() + ct = datetime.datetime.now() + print("current time:-", ct) + print("Hex docking completed") + + def crte_ligand_reserved_attr(self): + """This function populates the Docking instance's ligand_reserved_list attribute + with a list of line numbers. Each line number is where the Docked Ligand section + begins for each result. + For example, [1500, 1499, 1500] means that there are three solutions. In the first + solution, the "Docked Ligand" section begins at line 1500. In the second solution, + it begins at line 1499, and so on ... + """ + line_numbers = [] + for filename in os.listdir(self.results_path): + if filename[-3:] == 'pdb': + file = open(self.results_path + filename, "r") + lines = file.readlines() + for i in range(len(lines)): + if "Docked ligand coordinates..." in lines[i]: + line_numbers.append(i) + break + self.ligand_reserved_list = line_numbers + + def parse_hex_output(self): + """Returns a dictionary where the key is the cluster number and the + value is a list of solution numbers. One of the keys is "num_soln", + where its value is the total number of solutions. + For example: {num_soln : 5, 1 : [2, 4], 2 : [1, 3, 5]} + """ + hex_output = open(self.results_path + 'hex_output.txt', "r") + lines = hex_output.readlines() + # line number where the clustering starts and ends + result_start = 0 + result_end = 0 + for i in range(len(lines)): + splitted_line = lines[i].split(" ") + if len(splitted_line) > 8 and splitted_line[0] == "Clst": + result_start = i + 2 + if len(splitted_line) > 2 and "save_range" in splitted_line: + result_end = i - 2 + clustering_lines = lines[result_start:result_end] + clusters = {} + clusters["num_soln"] = len(clustering_lines) + for line in clustering_lines: + cleaned_line = line.strip().split(" ") + res = [] + # only keep non-blank items in line + for ch in cleaned_line: + if ch != "": + res.append(ch) + clst = int(res[0]) + sln = int(res[1]) + if clst not in clusters: + clusters[clst] = [sln] + else: + clusters[clst].append(sln) + return clusters + + def result_dict_generator(self, monomer_number, threshold): + """Return a dictionary where each key is a residue and each value is + the energy. The distance between each residue in the monomer and each + atom in the ligand is calculated, and only residues with distances + below the threshold are included. + """ + receptor_file = open(self.receptor.file_path, "r") + + if monomer_number != -1: # if -1, go to monomer logic + # get the start and end line numbers of the monomer in the receptor pdb + monomer_start = self.receptor.line_numbers[monomer_number][0] + monomer_end = self.receptor.line_numbers[monomer_number][1] + + # get the lines for that receptor only + receptor_file_lines = receptor_file.readlines()[monomer_start:monomer_end] + else: # Monomer logic + receptor_file_lines = receptor_file.readlines() + + # Store every receptor's atom coordinates information as a nested + # dictionary called 'reference' + reference = {} + for line in receptor_file_lines: + splitted_line = line.split() + if line[0:4] == 'ATOM': + coord = map(float, filter(None, splitted_line[6:9])) + if int(splitted_line[5]) in reference: + reference[int(splitted_line[5])][int(splitted_line[1])] = tuple(coord) + else: + reference[int(splitted_line[5])] = {int(splitted_line[1]) : tuple(coord)} + + # here, the structure of the reference dict is is {residue: {atom_num :(x, y, z)}}, + + # The energy for each reference element will be stored in dictionary 'ac' + ac = {} + result_list = [] + for filename in os.listdir(self.results_path): + if filename[-3:] == 'pdb': + result_list.append(filename) + + lowest_en = None # to keep track of lowest energy + all_residue_list = [] + + cluster_dict = self.parse_hex_output() + + for i in range(len(result_list)): + energy = '' + + # get the ligand_reserved section of the result file + file = open(self.results_path + result_list[i], 'r') + ligand_reserved_start = self.ligand_reserved_list[i] + ligand_reserved_section = file.readlines()[ligand_reserved_start:] + + # go through ligand reserved section to calculate energy + residue_set = set() + coor = [] + for line in ligand_reserved_section: + if 'REMARK' in line.split(' ') and 'Energy' in line.split(' '): + cluster_size = len(cluster_dict[i + 1]) + total_solutions = cluster_dict['num_soln'] + + # energy is weighed according to the number of solutions + # in that cluster + energy = ((float(line.split(' ')[6][:-1]))/total_solutions) * cluster_size + + # record values if lowest energy + if lowest_en is None or energy < lowest_en: + lowest_en = energy + elif line[:4] == 'ATOM': + # coordinates of one atom + coordinates = tuple(map(float, filter(None, line.split()[6:9]))) + coor.append(coordinates) + # each atom's coordinates is now stored in the list coordinates + + residue_set = set() + for res in reference.keys(): # for each amino acid in the receptor file: + distances = [] + + for atom in coor: # for each atom of the ligand + for aa in reference[res].keys(): # for each atom of that amino acid + # check if the distance between atoms of the ligands + # and of the amino acid are lower than chosen threshold (5) + distance = math.sqrt(sum([(reference[res][aa][0] - atom[0]) ** 2, + (reference[res][aa][1] - atom[1]) ** 2, + (reference[res][aa][2] - atom[2]) ** 2])) + + distances.append(distance) + + # if at least one of the distances is lower than the threshold, otherwise skip + if all(d >= threshold for d in distances): + continue + else: + # adding energy (previosly divided by the number of results) + # if found multiple times, we would get an average + if res in ac.keys(): + ac[res] += energy + else: + ac[res] = energy + + # Store the resi number into set + residue_set.add(res) + + all_residue_list.append(residue_set) + + return ac + + @abstractmethod + def best_result(self): + pass + + @abstractmethod + def crte_receptor_dict(self): + pass + + @abstractmethod + def normalize_results(self, threshold): + pass + + +class MonomerDocking(Docking): + """A class the represents a docking between a monomer receptor and a monomer. + + --- Attributes --- + receptor (MonomerReceptor): a Receptor object that represents a monomer receptor + ligand (Ligand): a Ligand object that represents a ligand + results_path (str): the file path to where the results are stored + ligand_reserved_list (List[int]): a list of line numbers, one for each solution, + the indicates where the "Docked ligand" section begins + """ + + def __init__(self, receptor: MonomerReceptor, ligand: Ligand, results_path: str): + super().__init__(receptor, ligand, results_path) + + def best_result(self): + pass + + def crte_receptor_dict(self, threshold): + """"Return a dictionary that contains the residue-energy + dictionary of the monomer. This is not necessary, but maintains + consistency between monomer and complex receptor dictionaries. + """ + receptor_res = {} + res_dict = self.result_dict_generator(-1, threshold) + ligand_res = {} + ligand_res[self.ligand.name] = res_dict + receptor_res[self.receptor.name] = ligand_res + return receptor_res + + def normalize_results(self, threshold): + """Return normalized residue-energy dictionaries for the + receptor. + """ + results_dict = self.crte_receptor_dict(threshold) + receptor_key = list(results_dict.keys())[0] + ligand_key = list(results_dict[receptor_key].keys())[0] + + inside_dict = results_dict[receptor_key][ligand_key] + abs_max = None + abs_min = None + + # To eliminate empty dictionaries that might cause division errors below + # normalized_mon_dicitonary calculations + if inside_dict != {}: + abs_min = min(inside_dict.values()) + abs_max = max(inside_dict.values()) + + all_normalized_results = {} + + normalized_mon_dict = {} + normalized_mon_dict[receptor_key] = {} + normalized_mon_dict[receptor_key][ligand_key] = {} + + # prevent substraction of equal values or values that doesn't make any sense in terms of accuracy + if abs_min == abs_max: + for k, v in inside_dict.items(): + normalized_mon_dict[receptor_key][ligand_key][k] = 1 + else: + for k, v in inside_dict.items(): + normalized_value = (v - abs_min) / (abs_max - abs_min) + normalized_mon_dict[receptor_key][ligand_key][k] = normalized_value + all_normalized_results.update(normalized_mon_dict) + return all_normalized_results + + +class ComplexDocking(Docking): + """A class that represents a docking between a complex receptor and a ligand. + + --- Attributes --- + receptor (ComplexReceptor): a Receptor object that represents a monomer receptor + ligand (Ligand): a Ligand object that represents a ligand + results_path (str): the file path to where the results are stored + ligand_reserved (List[int]): a list of line numbers, one for each solution, + which indicates where the "Docked ligand" section begins + split_results (List[List[Tuple[int]]]): a list where each sublist is a chain, + which contains a list of tuples. Each tuple indicates the line numbers + of the start and end of that chain in a results file. + """ + + def __init__(self, receptor: ComplexReceptor, ligand: Ligand, results_path: str): + super().__init__(receptor, ligand, results_path) + self.split_results = [] + + def separate_results(self): + """For each solution, record the start and end line number (0-based) of + each chain. Then, populate self.split_results with the final list. + + Each sublist represents one solution file. Each tuple in the sublist + contains the start and end of one chain. The order of the tuples in + the sublist is the same as the order of the monomers in the receptor's + monomers_list. + """ + results_files = os.listdir(self.results_path) + + # for each solution + for file in results_files: + if file[-3:] != "pdb": + break + result_file = open(self.results_path + file) + + # this list contains indices of the start and end of each chain + line_numbers = [] + line = result_file.readline() + curr_line = 0 + prev = None + while line != '': + # the start of the first chain + if line.split()[0] == "ATOM" and line.split()[1] == "1": + # if line.startswith('ATOM 1 '): + prev = curr_line - 1 + + # the end of a chain + elif line[0:3] == 'TER': + line_numbers.append([prev + 1, curr_line]) + prev = curr_line + + # read next line + line = result_file.readline() + curr_line += 1 + + # populate split_results attribute + self.split_results = line_numbers + + def best_result(self): + pass + + def crte_receptor_dict(self, threshold): + all_monomers = [] + for i in range(len(self.receptor.monomers_list)): + ligand_res = {} + res_dict = self.result_dict_generator(i, threshold) + ligand_res[self.ligand.name] = res_dict + all_monomers.append({self.receptor.name + '_' + self.receptor.monomers_list[i] : ligand_res}) + return all_monomers + + def normalize_results(self, threshold): + min_values = [] + max_values = [] + abs_max = None + abs_min = None + all_monomers_dict = self.crte_receptor_dict(threshold) + for i in range(len(all_monomers_dict)): + monomer_dict = all_monomers_dict[i] + monomer_key = list(monomer_dict.keys())[0] + ligand_key = list(monomer_dict[monomer_key].keys())[0] + + inside_dict = monomer_dict[monomer_key][ligand_key] + + # To eliminate empty dictionaries that might cause division errors below + # normalized_mon_dicitonary calculations + if inside_dict == {}: + continue + else: + mini = min(inside_dict.values()) + maxi = max(inside_dict.values()) + + min_values.append(mini) + max_values.append(maxi) + + abs_max = max(max_values) + abs_min = min(min_values) + + print("This is the maximum value: ", abs_max, file=sys.stderr) + print("This is the minimum value: ", abs_min, file=sys.stderr) + + # Now looping through every monomer, and calculating every residue energy to be + # normalized by using absolute minimum and maximum. + all_normalized_results = {} + for i in range(len(all_monomers_dict)): + monomer_dict = all_monomers_dict[i] + monomer_key = list(monomer_dict.keys())[0] + ligand_key = list(monomer_dict[monomer_key].keys())[0] + + inside_dict = monomer_dict[monomer_key][ligand_key] + + normalized_mon_dict = {} + normalized_mon_dict[monomer_key] = {} + normalized_mon_dict[monomer_key][ligand_key] = {} + + # prevent substraction of equal values or values that doesn't make any sense in terms of accuracy + if abs_min == abs_max: + for k, v in inside_dict.items(): + normalized_mon_dict[monomer_key][ligand_key][k] = 1 + else: + for k, v in inside_dict.items(): + normalized_value = (v - abs_min) / (abs_max - abs_min) + normalized_mon_dict[monomer_key][ligand_key][k] = normalized_value + all_normalized_results.update(normalized_mon_dict) + return all_normalized_results + + +class Docker: + """A class that represents the controller to create docking pairs and carry + out the docking. + """ + + @staticmethod + def start(receptor: str, ligand: str, docking_pdb_path: str): + """Start the docking process and analyze results. Return the + normalized residue-energyy dictionary. + """ + # create docking object + ct = datetime.datetime.now() + print("Starting the docking process at {}".format(ct)) + docking = Docker.create_docking(receptor, ligand, docking_pdb_path) + if docking is None: + receptor = receptor.split('.')[0] + results_path = docking_pdb_path + receptor + '_' + ligand + '/' + with open(results_path + "final.json") as json_file: + final_json = json.load(json_file) + return final_json + elif docking == "Receptor file not found": + return "Receptor file not found" + elif docking == "Ligand file not found": + return "Ligand file not found" + + docking.hex_docking() + if isinstance(docking, ComplexDocking): + docking.separate_results() + docking.crte_ligand_reserved_attr() + normalized_results = docking.normalize_results(5) + new_json = docking.results_path + "final.json" + with open(new_json, 'w') as file: + file.write(json.dumps(normalized_results)) + ct = datetime.datetime.now() + print("current time:-", ct) + return normalized_results + + def create_receptor(receptor_name: str, receptor_file_path: str): + """Return a new receptor with the name receptor_name, by parsing + the file at recepter_file_path. + """ + with open(receptor_file_path) as f: + is_monomer = True + for line in f.readlines(): + if re.match(r'COMPND \d CHAIN: \w, \w*', line) is not None: + is_monomer = False + # if the receptor would be a monomer the regex would be + # r'COMPND \d CHAIN: \w;' + + # To make a list of the monomers' labels + print(receptor_name + ' identified as a protein complex') + if line[11:16] == 'CHAIN': + monomers_list = line.split(': ')[-1].split(', ') + # The COMPND line ends with ';' therefore it needs to be + # removed from the last label + monomers_list[-1] = monomers_list[-1][0] + new_receptor = ComplexReceptor(receptor_name, + receptor_file_path, + monomers_list) + return new_receptor + print("Unknown pdb structure, need further investigation") + + if is_monomer: + new_receptor = MonomerReceptor(receptor_name, + receptor_file_path) + return new_receptor + + def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str): + """Return a docking pair, which contains a Receptor and a Ligand, as + specified by receptor_name and ligand_name, respectively. + """ + # check that the docking combination has not been run before + # results_path = docking_pdb_path + 'RESULTS/' + receptor_name + '_' + ligand_name + '/' + if '.' in receptor_name: + receptor_name = receptor_name[:receptor_name.index('.')] + results_path = docking_pdb_path + receptor_name + '_' + ligand_name + '/' + print(results_path) + if os.path.exists(results_path): #or \ + #os.path.exists(docking_pdb_path + receptor_name + '.1_' + ligand_name + '/'): + print("The docking between {0} and {1} has already been done.".format(receptor_name, + ligand_name)) + return None + + + os.makedirs(results_path) + + # find receptor file and create receptor object + receptor_folder = '/DATA/AF2-pdbs/Arabidopsis/AF2_Ath_PDBs_FAs_renamed/' + # receptor_folder = '/var/www/html/eplant/AF2_Ath_PDBs' + receptor_file_found = False + + for receptor_file in os.listdir(receptor_folder): + # if receptor_file[0] != '.' and len(receptor_file.split('.')) == 2 and \ + # receptor_file[-4:] == 'pdb' and \ + # receptor_file[:-4].lower() == receptor_name.lower(): + if receptor_file[0] != '.' and receptor_file[-4:] == '.pdb' and \ + (receptor_name in receptor_file): + receptor_file_found = True + receptor_file_path = receptor_folder + receptor_file + receptor = Docker.create_receptor(receptor_name, receptor_file_path) + + # find ligand file and create ligand object + # ligand_folder = docking_pdb_path + 'HEX_SELECTED_LIGANDS/' + ligand_folder = '/DATA/HEX_API/HEX_SELECTED_LIGANDS/' + ligand_file_found = False + + for ligand_file in os.listdir(ligand_folder): + if ligand_file[0] != '.' and len(ligand_file.split('.')) == 2 and \ + ligand_file.split('.')[1] == 'sdf' and \ + ligand_file[:-4].lower() == ligand_name.lower(): + ligand_file_found = True + ligand_file_path = ligand_folder + '/' + ligand_file + ligand = Ligand(ligand_name, ligand_file_path) + + if not receptor_file_found: + return "Receptor file not found" + elif not ligand_file_found: + return "Ligand file not found" + + # receptor and ligand objects are created and ready for docking + if isinstance(receptor, MonomerReceptor): + docking = MonomerDocking(receptor, ligand, results_path) + else: + docking = ComplexDocking(receptor, ligand, results_path) + return docking + + +if __name__ == "__main__": + # print(Docker.start("8g2j", "UPG", "/DATA/HEX_API/")) + print(Docker.start("AT1G66340", "6325_Ethylene", "/DATA/HEX_API/RESULTS/")) + From d9bf0547ec3fe04fa3d36f0a09c9385d38b43854 Mon Sep 17 00:00:00 2001 From: Dien Nguyen Date: Thu, 8 Feb 2024 16:37:24 -0500 Subject: [PATCH 10/35] convert sdf_mapping to OOP and move to docking_utils.py file --- api/utils/docking_utils.py | 81 +++- api/utils/refactored_docking_utils.py | 638 -------------------------- api/utils/sdf_mapping.py | 49 -- 3 files changed, 73 insertions(+), 695 deletions(-) delete mode 100755 api/utils/refactored_docking_utils.py delete mode 100644 api/utils/sdf_mapping.py diff --git a/api/utils/docking_utils.py b/api/utils/docking_utils.py index ab7d15d..030a7ca 100755 --- a/api/utils/docking_utils.py +++ b/api/utils/docking_utils.py @@ -582,13 +582,11 @@ def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str): receptor_name = receptor_name[:receptor_name.index('.')] results_path = docking_pdb_path + receptor_name + '_' + ligand_name + '/' print(results_path) - if os.path.exists(results_path): #or \ - #os.path.exists(docking_pdb_path + receptor_name + '.1_' + ligand_name + '/'): - print("The docking between {0} and {1} has already been done.".format(receptor_name, + if os.path.exists(results_path): + print("The docking between {0} and {1} has already been done.".format(receptor_name, ligand_name)) return None - os.makedirs(results_path) # find receptor file and create receptor object @@ -632,7 +630,74 @@ def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str): return docking -if __name__ == "__main__": - # print(Docker.start("8g2j", "UPG", "/DATA/HEX_API/")) - print(Docker.start("AT1G66340", "6325_Ethylene", "/DATA/HEX_API/RESULTS/")) - +class SDFMapping: + """ + A class for mapping SDF names to their file names in the BAR. + """ + + def get_substance_name(self, filename: str, folder_path: str): + """Parse and return the names of a substance from a .sdf file. It + requires the line "> " to be present + in the file. + """ + file = open(folder_path + filename, "r") + line = file.readline().strip() + if line == "": + return None + while line != "> " and line != "$$$$": + line = file.readline().strip() + # right now, line == "> " or line is empty + if line == "$$$$": + return None + line = file.readline().strip() + names = [] + while line != "": + if len(line) > 0 and line[0] == ">": + break + names.append(line) + line = file.readline().strip() + return names + + @staticmethod + def create_mapping_filtered(folder_path: str, results_path: str): + """Create a json file that maps the name of the ligand to the + file name, for example: {"bld": "115196_bld.sdf"}. + + It only works for sdf files that are formatted like the + example shown above. + + folder_path: where the sdf files are stored + results_path: where the json file should be created + """ + mapped_sdf = {} + sdf_files = os.listdir(folder_path) + for file in sdf_files: + if file[0] != "." and file[-4:] == ".sdf": + name = file[file.index("_") + 1:-4] + mapped_sdf[name] = file + json_file = results_path + "sdf_mapping_filtered.json" + with open(json_file, 'w') as file: + file.write(json.dumps(mapped_sdf)) + return mapped_sdf + + def create_mapping_unfiltered(self, folder_path: str, results_path: str): + """Create a json file that maps the names of the ligand to the + file name, for example: {"122234": "Corn sugar gum,Xanthan gum"}. + + It only works for sdf files that contain this line: + "> ". + + folder_path: where the sdf files are stored + results_path: where the json file should be created + """ + mapped_sdf = {} + sdf_files = os.listdir(folder_path) + for file in sdf_files: + if file[0] != "." and file[-4:] == ".sdf": + names = self.get_substance_name(file, folder_path) + sdf_number = file.split(".")[0] + mapped_sdf[sdf_number] = ",".join(names) + json_file = results_path + "sdf_mapping_unfiltered.json" + with open(json_file, 'w') as file: + file.write(json.dumps(mapped_sdf)) + return mapped_sdf diff --git a/api/utils/refactored_docking_utils.py b/api/utils/refactored_docking_utils.py deleted file mode 100755 index ab7d15d..0000000 --- a/api/utils/refactored_docking_utils.py +++ /dev/null @@ -1,638 +0,0 @@ -from abc import ABC, abstractmethod -from typing import List -import os -import re -import subprocess -import math -import sys -import json -import datetime - -HEX_BIN_PATH = '/usr/local/bin/hex/bin/hex' - - -class Receptor(ABC): - """An abstract class that represents a receptor - - --- Attributes --- - name (str): the name of the receptor - file_path (str): the relative path to the receptors pdb file - """ - @abstractmethod - def __init__(self, name: str, file_path: str): - self.name = name - self.file_path = file_path - - -class MonomerReceptor(Receptor): - """ A class that represents a receptor that is a monomer, meaning it consists - of only one chain. - - --- Attributes --- - name (str): the name of the receptor - file_path (str): the relative path to the receptors pdb file - """ - name: str - file_path: str - - def __init__(self, name, file_path): - super().__init__(name, file_path) - - -class ComplexReceptor(Receptor): - """ A class that represents a receptor that is a complex, meaning it consists - of more than one chain. - - --- Attributes --- - name (str): the name of the receptor - file_path (str): the relative path to the receptors pdb file - monomer_list (List[str]): the list of monomers that make up the complex - line_numbers (List[List[int]]): the list of line numbers that separate the monomers, e.g. [[100,200],[300,500]] - """ - def __init__(self, name: str, file_path: str, monomers_list: List[str]): - super().__init__(name, file_path) - self.monomers_list = monomers_list - self.line_numbers = self.separate_monomers() - - def separate_monomers(self): - """Returns a list of lists, where each sublist contains the line - numbers of the start and end of a monomer. - For example, receptor X has 3 chains in this order: A, B, C. - The method will return [[1, 6], [7, 9], [10, 15]]. - """ - line_numbers = [] - file = open(self.file_path, "r") - line = file.readline() - prev = None - curr_line = 0 - while line != '': - # the first line of the first monomer - if line[:12] == "ATOM 1 ": - prev = curr_line - 1 - # the last line of a monomer - elif line[:3] == 'TER': - # line_numbers.append(curr_line) - line_numbers.append([prev + 1, curr_line]) - prev = curr_line - curr_line += 1 - line = file.readline() - - return line_numbers - - -class Ligand: - """A class that represents a ligand. - - --- Attributes --- - name (str): the name of the receptor - file_path (str): the relative path to the receptors pdb file - """ - def __init__(self, name: str, file_path: str): - self.name = name - self.file_path = file_path - - -class Docking(ABC): - """An abstract class that represents the docking between a receptor and a - ligand. - - --- Attributes --- - receptor (Receptor): a Receptor object that represents a receptor - ligand (Ligand): a Ligand object that represents a ligand - results_path (str): the file path to where the results are stored - ligand_reserved_list (List[int]): a list of line numbers, one for each solution, - the indicates where the "Docked ligand" section begins - """ - - @abstractmethod - def __init__(self, receptor: Receptor, ligand: Ligand, results_path: str): - self.receptor = receptor - self.ligand = ligand - self.results_path = results_path - self.ligand_reserved_list = [] - - def hex_docking(self): - """Run hex docking using the command line. - """ - hex_output_file = open(self.results_path + 'hex_output.txt', "w") - - # Function to call Hex, including hard coded settings - - # max_docking_solutions set at 5 for testing - hex_command = """ open_receptor """ + self.receptor.file_path + """ - open_ligand """ + self.ligand.file_path + """ - docking_correlation 1 - docking_score_threshold 0 - max_docking_solutions 25 - docking_receptor_stepsize 5.50 - docking_ligand_stepsize 5.50 - docking_alpha_stepsize 2.80 - docking_main_scan 16 - receptor_origin C-825:VAL-O - commit_edits - activate_docking - save_range 1 100 """ \ - + self.results_path + """ %s pdb""" % (self.receptor.name + '_' + self.ligand.name) - subprocess.Popen(HEX_BIN_PATH, - stdin=subprocess.PIPE, - stderr=subprocess.STDOUT, - stdout=hex_output_file).communicate(bytes(hex_command.encode('utf-8'))) - hex_output_file.close() - ct = datetime.datetime.now() - print("current time:-", ct) - print("Hex docking completed") - - def crte_ligand_reserved_attr(self): - """This function populates the Docking instance's ligand_reserved_list attribute - with a list of line numbers. Each line number is where the Docked Ligand section - begins for each result. - For example, [1500, 1499, 1500] means that there are three solutions. In the first - solution, the "Docked Ligand" section begins at line 1500. In the second solution, - it begins at line 1499, and so on ... - """ - line_numbers = [] - for filename in os.listdir(self.results_path): - if filename[-3:] == 'pdb': - file = open(self.results_path + filename, "r") - lines = file.readlines() - for i in range(len(lines)): - if "Docked ligand coordinates..." in lines[i]: - line_numbers.append(i) - break - self.ligand_reserved_list = line_numbers - - def parse_hex_output(self): - """Returns a dictionary where the key is the cluster number and the - value is a list of solution numbers. One of the keys is "num_soln", - where its value is the total number of solutions. - For example: {num_soln : 5, 1 : [2, 4], 2 : [1, 3, 5]} - """ - hex_output = open(self.results_path + 'hex_output.txt', "r") - lines = hex_output.readlines() - # line number where the clustering starts and ends - result_start = 0 - result_end = 0 - for i in range(len(lines)): - splitted_line = lines[i].split(" ") - if len(splitted_line) > 8 and splitted_line[0] == "Clst": - result_start = i + 2 - if len(splitted_line) > 2 and "save_range" in splitted_line: - result_end = i - 2 - clustering_lines = lines[result_start:result_end] - clusters = {} - clusters["num_soln"] = len(clustering_lines) - for line in clustering_lines: - cleaned_line = line.strip().split(" ") - res = [] - # only keep non-blank items in line - for ch in cleaned_line: - if ch != "": - res.append(ch) - clst = int(res[0]) - sln = int(res[1]) - if clst not in clusters: - clusters[clst] = [sln] - else: - clusters[clst].append(sln) - return clusters - - def result_dict_generator(self, monomer_number, threshold): - """Return a dictionary where each key is a residue and each value is - the energy. The distance between each residue in the monomer and each - atom in the ligand is calculated, and only residues with distances - below the threshold are included. - """ - receptor_file = open(self.receptor.file_path, "r") - - if monomer_number != -1: # if -1, go to monomer logic - # get the start and end line numbers of the monomer in the receptor pdb - monomer_start = self.receptor.line_numbers[monomer_number][0] - monomer_end = self.receptor.line_numbers[monomer_number][1] - - # get the lines for that receptor only - receptor_file_lines = receptor_file.readlines()[monomer_start:monomer_end] - else: # Monomer logic - receptor_file_lines = receptor_file.readlines() - - # Store every receptor's atom coordinates information as a nested - # dictionary called 'reference' - reference = {} - for line in receptor_file_lines: - splitted_line = line.split() - if line[0:4] == 'ATOM': - coord = map(float, filter(None, splitted_line[6:9])) - if int(splitted_line[5]) in reference: - reference[int(splitted_line[5])][int(splitted_line[1])] = tuple(coord) - else: - reference[int(splitted_line[5])] = {int(splitted_line[1]) : tuple(coord)} - - # here, the structure of the reference dict is is {residue: {atom_num :(x, y, z)}}, - - # The energy for each reference element will be stored in dictionary 'ac' - ac = {} - result_list = [] - for filename in os.listdir(self.results_path): - if filename[-3:] == 'pdb': - result_list.append(filename) - - lowest_en = None # to keep track of lowest energy - all_residue_list = [] - - cluster_dict = self.parse_hex_output() - - for i in range(len(result_list)): - energy = '' - - # get the ligand_reserved section of the result file - file = open(self.results_path + result_list[i], 'r') - ligand_reserved_start = self.ligand_reserved_list[i] - ligand_reserved_section = file.readlines()[ligand_reserved_start:] - - # go through ligand reserved section to calculate energy - residue_set = set() - coor = [] - for line in ligand_reserved_section: - if 'REMARK' in line.split(' ') and 'Energy' in line.split(' '): - cluster_size = len(cluster_dict[i + 1]) - total_solutions = cluster_dict['num_soln'] - - # energy is weighed according to the number of solutions - # in that cluster - energy = ((float(line.split(' ')[6][:-1]))/total_solutions) * cluster_size - - # record values if lowest energy - if lowest_en is None or energy < lowest_en: - lowest_en = energy - elif line[:4] == 'ATOM': - # coordinates of one atom - coordinates = tuple(map(float, filter(None, line.split()[6:9]))) - coor.append(coordinates) - # each atom's coordinates is now stored in the list coordinates - - residue_set = set() - for res in reference.keys(): # for each amino acid in the receptor file: - distances = [] - - for atom in coor: # for each atom of the ligand - for aa in reference[res].keys(): # for each atom of that amino acid - # check if the distance between atoms of the ligands - # and of the amino acid are lower than chosen threshold (5) - distance = math.sqrt(sum([(reference[res][aa][0] - atom[0]) ** 2, - (reference[res][aa][1] - atom[1]) ** 2, - (reference[res][aa][2] - atom[2]) ** 2])) - - distances.append(distance) - - # if at least one of the distances is lower than the threshold, otherwise skip - if all(d >= threshold for d in distances): - continue - else: - # adding energy (previosly divided by the number of results) - # if found multiple times, we would get an average - if res in ac.keys(): - ac[res] += energy - else: - ac[res] = energy - - # Store the resi number into set - residue_set.add(res) - - all_residue_list.append(residue_set) - - return ac - - @abstractmethod - def best_result(self): - pass - - @abstractmethod - def crte_receptor_dict(self): - pass - - @abstractmethod - def normalize_results(self, threshold): - pass - - -class MonomerDocking(Docking): - """A class the represents a docking between a monomer receptor and a monomer. - - --- Attributes --- - receptor (MonomerReceptor): a Receptor object that represents a monomer receptor - ligand (Ligand): a Ligand object that represents a ligand - results_path (str): the file path to where the results are stored - ligand_reserved_list (List[int]): a list of line numbers, one for each solution, - the indicates where the "Docked ligand" section begins - """ - - def __init__(self, receptor: MonomerReceptor, ligand: Ligand, results_path: str): - super().__init__(receptor, ligand, results_path) - - def best_result(self): - pass - - def crte_receptor_dict(self, threshold): - """"Return a dictionary that contains the residue-energy - dictionary of the monomer. This is not necessary, but maintains - consistency between monomer and complex receptor dictionaries. - """ - receptor_res = {} - res_dict = self.result_dict_generator(-1, threshold) - ligand_res = {} - ligand_res[self.ligand.name] = res_dict - receptor_res[self.receptor.name] = ligand_res - return receptor_res - - def normalize_results(self, threshold): - """Return normalized residue-energy dictionaries for the - receptor. - """ - results_dict = self.crte_receptor_dict(threshold) - receptor_key = list(results_dict.keys())[0] - ligand_key = list(results_dict[receptor_key].keys())[0] - - inside_dict = results_dict[receptor_key][ligand_key] - abs_max = None - abs_min = None - - # To eliminate empty dictionaries that might cause division errors below - # normalized_mon_dicitonary calculations - if inside_dict != {}: - abs_min = min(inside_dict.values()) - abs_max = max(inside_dict.values()) - - all_normalized_results = {} - - normalized_mon_dict = {} - normalized_mon_dict[receptor_key] = {} - normalized_mon_dict[receptor_key][ligand_key] = {} - - # prevent substraction of equal values or values that doesn't make any sense in terms of accuracy - if abs_min == abs_max: - for k, v in inside_dict.items(): - normalized_mon_dict[receptor_key][ligand_key][k] = 1 - else: - for k, v in inside_dict.items(): - normalized_value = (v - abs_min) / (abs_max - abs_min) - normalized_mon_dict[receptor_key][ligand_key][k] = normalized_value - all_normalized_results.update(normalized_mon_dict) - return all_normalized_results - - -class ComplexDocking(Docking): - """A class that represents a docking between a complex receptor and a ligand. - - --- Attributes --- - receptor (ComplexReceptor): a Receptor object that represents a monomer receptor - ligand (Ligand): a Ligand object that represents a ligand - results_path (str): the file path to where the results are stored - ligand_reserved (List[int]): a list of line numbers, one for each solution, - which indicates where the "Docked ligand" section begins - split_results (List[List[Tuple[int]]]): a list where each sublist is a chain, - which contains a list of tuples. Each tuple indicates the line numbers - of the start and end of that chain in a results file. - """ - - def __init__(self, receptor: ComplexReceptor, ligand: Ligand, results_path: str): - super().__init__(receptor, ligand, results_path) - self.split_results = [] - - def separate_results(self): - """For each solution, record the start and end line number (0-based) of - each chain. Then, populate self.split_results with the final list. - - Each sublist represents one solution file. Each tuple in the sublist - contains the start and end of one chain. The order of the tuples in - the sublist is the same as the order of the monomers in the receptor's - monomers_list. - """ - results_files = os.listdir(self.results_path) - - # for each solution - for file in results_files: - if file[-3:] != "pdb": - break - result_file = open(self.results_path + file) - - # this list contains indices of the start and end of each chain - line_numbers = [] - line = result_file.readline() - curr_line = 0 - prev = None - while line != '': - # the start of the first chain - if line.split()[0] == "ATOM" and line.split()[1] == "1": - # if line.startswith('ATOM 1 '): - prev = curr_line - 1 - - # the end of a chain - elif line[0:3] == 'TER': - line_numbers.append([prev + 1, curr_line]) - prev = curr_line - - # read next line - line = result_file.readline() - curr_line += 1 - - # populate split_results attribute - self.split_results = line_numbers - - def best_result(self): - pass - - def crte_receptor_dict(self, threshold): - all_monomers = [] - for i in range(len(self.receptor.monomers_list)): - ligand_res = {} - res_dict = self.result_dict_generator(i, threshold) - ligand_res[self.ligand.name] = res_dict - all_monomers.append({self.receptor.name + '_' + self.receptor.monomers_list[i] : ligand_res}) - return all_monomers - - def normalize_results(self, threshold): - min_values = [] - max_values = [] - abs_max = None - abs_min = None - all_monomers_dict = self.crte_receptor_dict(threshold) - for i in range(len(all_monomers_dict)): - monomer_dict = all_monomers_dict[i] - monomer_key = list(monomer_dict.keys())[0] - ligand_key = list(monomer_dict[monomer_key].keys())[0] - - inside_dict = monomer_dict[monomer_key][ligand_key] - - # To eliminate empty dictionaries that might cause division errors below - # normalized_mon_dicitonary calculations - if inside_dict == {}: - continue - else: - mini = min(inside_dict.values()) - maxi = max(inside_dict.values()) - - min_values.append(mini) - max_values.append(maxi) - - abs_max = max(max_values) - abs_min = min(min_values) - - print("This is the maximum value: ", abs_max, file=sys.stderr) - print("This is the minimum value: ", abs_min, file=sys.stderr) - - # Now looping through every monomer, and calculating every residue energy to be - # normalized by using absolute minimum and maximum. - all_normalized_results = {} - for i in range(len(all_monomers_dict)): - monomer_dict = all_monomers_dict[i] - monomer_key = list(monomer_dict.keys())[0] - ligand_key = list(monomer_dict[monomer_key].keys())[0] - - inside_dict = monomer_dict[monomer_key][ligand_key] - - normalized_mon_dict = {} - normalized_mon_dict[monomer_key] = {} - normalized_mon_dict[monomer_key][ligand_key] = {} - - # prevent substraction of equal values or values that doesn't make any sense in terms of accuracy - if abs_min == abs_max: - for k, v in inside_dict.items(): - normalized_mon_dict[monomer_key][ligand_key][k] = 1 - else: - for k, v in inside_dict.items(): - normalized_value = (v - abs_min) / (abs_max - abs_min) - normalized_mon_dict[monomer_key][ligand_key][k] = normalized_value - all_normalized_results.update(normalized_mon_dict) - return all_normalized_results - - -class Docker: - """A class that represents the controller to create docking pairs and carry - out the docking. - """ - - @staticmethod - def start(receptor: str, ligand: str, docking_pdb_path: str): - """Start the docking process and analyze results. Return the - normalized residue-energyy dictionary. - """ - # create docking object - ct = datetime.datetime.now() - print("Starting the docking process at {}".format(ct)) - docking = Docker.create_docking(receptor, ligand, docking_pdb_path) - if docking is None: - receptor = receptor.split('.')[0] - results_path = docking_pdb_path + receptor + '_' + ligand + '/' - with open(results_path + "final.json") as json_file: - final_json = json.load(json_file) - return final_json - elif docking == "Receptor file not found": - return "Receptor file not found" - elif docking == "Ligand file not found": - return "Ligand file not found" - - docking.hex_docking() - if isinstance(docking, ComplexDocking): - docking.separate_results() - docking.crte_ligand_reserved_attr() - normalized_results = docking.normalize_results(5) - new_json = docking.results_path + "final.json" - with open(new_json, 'w') as file: - file.write(json.dumps(normalized_results)) - ct = datetime.datetime.now() - print("current time:-", ct) - return normalized_results - - def create_receptor(receptor_name: str, receptor_file_path: str): - """Return a new receptor with the name receptor_name, by parsing - the file at recepter_file_path. - """ - with open(receptor_file_path) as f: - is_monomer = True - for line in f.readlines(): - if re.match(r'COMPND \d CHAIN: \w, \w*', line) is not None: - is_monomer = False - # if the receptor would be a monomer the regex would be - # r'COMPND \d CHAIN: \w;' - - # To make a list of the monomers' labels - print(receptor_name + ' identified as a protein complex') - if line[11:16] == 'CHAIN': - monomers_list = line.split(': ')[-1].split(', ') - # The COMPND line ends with ';' therefore it needs to be - # removed from the last label - monomers_list[-1] = monomers_list[-1][0] - new_receptor = ComplexReceptor(receptor_name, - receptor_file_path, - monomers_list) - return new_receptor - print("Unknown pdb structure, need further investigation") - - if is_monomer: - new_receptor = MonomerReceptor(receptor_name, - receptor_file_path) - return new_receptor - - def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str): - """Return a docking pair, which contains a Receptor and a Ligand, as - specified by receptor_name and ligand_name, respectively. - """ - # check that the docking combination has not been run before - # results_path = docking_pdb_path + 'RESULTS/' + receptor_name + '_' + ligand_name + '/' - if '.' in receptor_name: - receptor_name = receptor_name[:receptor_name.index('.')] - results_path = docking_pdb_path + receptor_name + '_' + ligand_name + '/' - print(results_path) - if os.path.exists(results_path): #or \ - #os.path.exists(docking_pdb_path + receptor_name + '.1_' + ligand_name + '/'): - print("The docking between {0} and {1} has already been done.".format(receptor_name, - ligand_name)) - return None - - - os.makedirs(results_path) - - # find receptor file and create receptor object - receptor_folder = '/DATA/AF2-pdbs/Arabidopsis/AF2_Ath_PDBs_FAs_renamed/' - # receptor_folder = '/var/www/html/eplant/AF2_Ath_PDBs' - receptor_file_found = False - - for receptor_file in os.listdir(receptor_folder): - # if receptor_file[0] != '.' and len(receptor_file.split('.')) == 2 and \ - # receptor_file[-4:] == 'pdb' and \ - # receptor_file[:-4].lower() == receptor_name.lower(): - if receptor_file[0] != '.' and receptor_file[-4:] == '.pdb' and \ - (receptor_name in receptor_file): - receptor_file_found = True - receptor_file_path = receptor_folder + receptor_file - receptor = Docker.create_receptor(receptor_name, receptor_file_path) - - # find ligand file and create ligand object - # ligand_folder = docking_pdb_path + 'HEX_SELECTED_LIGANDS/' - ligand_folder = '/DATA/HEX_API/HEX_SELECTED_LIGANDS/' - ligand_file_found = False - - for ligand_file in os.listdir(ligand_folder): - if ligand_file[0] != '.' and len(ligand_file.split('.')) == 2 and \ - ligand_file.split('.')[1] == 'sdf' and \ - ligand_file[:-4].lower() == ligand_name.lower(): - ligand_file_found = True - ligand_file_path = ligand_folder + '/' + ligand_file - ligand = Ligand(ligand_name, ligand_file_path) - - if not receptor_file_found: - return "Receptor file not found" - elif not ligand_file_found: - return "Ligand file not found" - - # receptor and ligand objects are created and ready for docking - if isinstance(receptor, MonomerReceptor): - docking = MonomerDocking(receptor, ligand, results_path) - else: - docking = ComplexDocking(receptor, ligand, results_path) - return docking - - -if __name__ == "__main__": - # print(Docker.start("8g2j", "UPG", "/DATA/HEX_API/")) - print(Docker.start("AT1G66340", "6325_Ethylene", "/DATA/HEX_API/RESULTS/")) - diff --git a/api/utils/sdf_mapping.py b/api/utils/sdf_mapping.py deleted file mode 100644 index 9bf2033..0000000 --- a/api/utils/sdf_mapping.py +++ /dev/null @@ -1,49 +0,0 @@ -import os -import re -from typing import List - -def get_substance_name(filename: str, folder_path: str): - file = open(folder_path + filename, "r") - line = file.readline().strip() - if line == "": - return None - while line != "> " and line != "$$$$": - line = file.readline().strip() - # right now, line == "> " or line is empty - if line == "$$$$": - return None - line = file.readline().strip() - names = [] - while line != "": - if len(line) > 0 and line[0] == ">": - break - # while line != "": - # check regex to see if it contains lowercase - # matched = re.search("[a-z]", line) - # if matched is not None: - # return line - # else: - # line = file.readline().strip() - names.append(line) - line = file.readline().strip() - return names - -def create_mapping(folder_path: str): - mapped_sdf = {} - sdf_files = os.listdir(folder_path) - for file in sdf_files: - if file[0] != "." and file[-4:] == ".sdf": - file_number = file[:file.index("_")] - name = file[file.index("_") + 1:-4] - # the commented out section is for sdfs that have not been filtered - # names = get_substance_name(file, folder_path) - # print(name) - # sdf_number = file.split(".")[0] - # mapped_sdf[sdf_number] = ",".join(names) - mapped_sdf[file_number] = name # check if want to map file_number or file name - return mapped_sdf - -if __name__ == "__main__": - sdf_folder_paths = ['/home/diennguyen/BAR_API/HEX_API/HEX_SMALL_MOLECULES'] - print(create_mapping(sdf_folder_paths[0])) - From 4c208ce575873ec1d8c3783d312afb71107f045e Mon Sep 17 00:00:00 2001 From: Dien Nguyen Date: Fri, 16 Feb 2024 13:39:39 -0500 Subject: [PATCH 11/35] Fix bug to prevent creating results folder for invalid protein or ligand --- api/utils/docking_utils.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/api/utils/docking_utils.py b/api/utils/docking_utils.py index 030a7ca..cba1fe2 100755 --- a/api/utils/docking_utils.py +++ b/api/utils/docking_utils.py @@ -367,7 +367,8 @@ def normalize_results(self, threshold): normalized_mon_dict[receptor_key] = {} normalized_mon_dict[receptor_key][ligand_key] = {} - # prevent substraction of equal values or values that doesn't make any sense in terms of accuracy + # prevent substraction of equal values or values that doesn't make any sense + # in terms of accuracy if abs_min == abs_max: for k, v in inside_dict.items(): normalized_mon_dict[receptor_key][ligand_key][k] = 1 @@ -493,7 +494,8 @@ def normalize_results(self, threshold): normalized_mon_dict[monomer_key] = {} normalized_mon_dict[monomer_key][ligand_key] = {} - # prevent substraction of equal values or values that doesn't make any sense in terms of accuracy + # prevent substraction of equal values or values that doesn't make any sense + # in terms of accuracy if abs_min == abs_max: for k, v in inside_dict.items(): normalized_mon_dict[monomer_key][ligand_key][k] = 1 @@ -530,6 +532,11 @@ def start(receptor: str, ligand: str, docking_pdb_path: str): elif docking == "Ligand file not found": return "Ligand file not found" + results_path = docking_pdb_path + receptor + '_' + ligand + '/' + + # create folder to store docking results + os.makedirs(results_path) + docking.hex_docking() if isinstance(docking, ComplexDocking): docking.separate_results() @@ -587,17 +594,11 @@ def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str): ligand_name)) return None - os.makedirs(results_path) - # find receptor file and create receptor object receptor_folder = '/DATA/AF2-pdbs/Arabidopsis/AF2_Ath_PDBs_FAs_renamed/' - # receptor_folder = '/var/www/html/eplant/AF2_Ath_PDBs' receptor_file_found = False for receptor_file in os.listdir(receptor_folder): - # if receptor_file[0] != '.' and len(receptor_file.split('.')) == 2 and \ - # receptor_file[-4:] == 'pdb' and \ - # receptor_file[:-4].lower() == receptor_name.lower(): if receptor_file[0] != '.' and receptor_file[-4:] == '.pdb' and \ (receptor_name in receptor_file): receptor_file_found = True @@ -605,7 +606,6 @@ def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str): receptor = Docker.create_receptor(receptor_name, receptor_file_path) # find ligand file and create ligand object - # ligand_folder = docking_pdb_path + 'HEX_SELECTED_LIGANDS/' ligand_folder = '/DATA/HEX_API/HEX_SELECTED_LIGANDS/' ligand_file_found = False From 39b13eb86a66e00cf0b72227a9ceb20e4bc36065 Mon Sep 17 00:00:00 2001 From: Dien Nguyen Date: Tue, 20 Feb 2024 11:47:35 -0500 Subject: [PATCH 12/35] Add regex check to process bigger receptor Receptors with more than 1000 residues will have pdb files where there is no space between columns 4 and 5. For example, it can look like this: A1000, whereas for earlier residues, it looks like this: A 345. A regex check is added to see which lines need to be further processed to separate and extract the residue number. --- api/utils/docking_utils.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/api/utils/docking_utils.py b/api/utils/docking_utils.py index cba1fe2..b0724e5 100755 --- a/api/utils/docking_utils.py +++ b/api/utils/docking_utils.py @@ -221,10 +221,16 @@ def result_dict_generator(self, monomer_number, threshold): splitted_line = line.split() if line[0:4] == 'ATOM': coord = map(float, filter(None, splitted_line[6:9])) - if int(splitted_line[5]) in reference: - reference[int(splitted_line[5])][int(splitted_line[1])] = tuple(coord) + + # check if chain name and residue are in the same column, e.g. A1000 + if re.search(r'\d', splitted_line[4]) is None: + residue = splitted_line[5] + else: + residue = splitted_line[4][1:] + if int(residue) in reference: + reference[int(residue)][int(splitted_line[1])] = tuple(coord) else: - reference[int(splitted_line[5])] = {int(splitted_line[1]) : tuple(coord)} + reference[int(residue)] = {int(splitted_line[1]) : tuple(coord)} # here, the structure of the reference dict is is {residue: {atom_num :(x, y, z)}}, From a30f11794258c6dcb94330fc22143e30b79f33d4 Mon Sep 17 00:00:00 2001 From: Dien Nguyen Date: Tue, 20 Feb 2024 11:54:53 -0500 Subject: [PATCH 13/35] Add unittests and files for testing Files for testing include 1 small monomer receptor pdb, 1 small complex receptor pdb, 1 ligand sdf file and the hex results and outputs for these dockings. The complex receptor file was altered to create a small complex receptor, so it does not actually exist. The results for the dockings are stored in folders just as they would be in the /DATA/HEX_API/RESULTS folder. --- tests/data/6325_Ethylene.sdf | 107 ++++++++ tests/data/AF2_AT8G88888_complex.pdb | 198 +++++++++++++++ tests/data/AF2_AT9G99999_monomer.pdb | 197 +++++++++++++++ .../AT8G88888_complex_6325_Ethylene0001.pdb | 205 +++++++++++++++ .../hex_output.txt | 238 ++++++++++++++++++ .../AT8G88888_complex_6325_Ethylene0001.pdb | 205 +++++++++++++++ .../AT9G99999_monomer_6325_Ethylene0001.pdb | 205 +++++++++++++++ .../hex_output.txt | 218 ++++++++++++++++ tests/resources/test_docking_utils.py | 131 ++++++++++ 9 files changed, 1704 insertions(+) create mode 100644 tests/data/6325_Ethylene.sdf create mode 100644 tests/data/AF2_AT8G88888_complex.pdb create mode 100644 tests/data/AF2_AT9G99999_monomer.pdb create mode 100644 tests/data/AT8G88888_complex_6325_Ethylene/AT8G88888_complex_6325_Ethylene0001.pdb create mode 100644 tests/data/AT8G88888_complex_6325_Ethylene/hex_output.txt create mode 100644 tests/data/AT8G88888_complex_6325_Ethylene0001.pdb create mode 100644 tests/data/AT9G99999_monomer_6325_Ethylene/AT9G99999_monomer_6325_Ethylene0001.pdb create mode 100644 tests/data/AT9G99999_monomer_6325_Ethylene/hex_output.txt create mode 100644 tests/resources/test_docking_utils.py diff --git a/tests/data/6325_Ethylene.sdf b/tests/data/6325_Ethylene.sdf new file mode 100644 index 0000000..851e2d7 --- /dev/null +++ b/tests/data/6325_Ethylene.sdf @@ -0,0 +1,107 @@ +6325 + -OEChem-03192020393D + + 6 5 0 0 0 0 0 0 0999 V2000 + -0.6672 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.6672 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.2213 -0.9290 0.0708 H 0 0 0 0 0 0 0 0 0 0 0 0 + -1.2212 0.9290 -0.0708 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2213 0.9290 -0.0708 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2213 -0.9290 0.0708 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 2 0 0 0 0 + 1 3 1 0 0 0 0 + 1 4 1 0 0 0 0 + 2 5 1 0 0 0 0 + 2 6 1 0 0 0 0 +M END +> +6325 + +> +0.4 + +> +1 + +> +6 +1 -0.3 +2 -0.3 +3 0.15 +4 0.15 +5 0.15 +6 0.15 + +> +0 + +> +2 +1 1 hydrophobe +1 2 hydrophobe + +> +2 + +> +0 + +> +0 + +> +0 + +> +0 + +> +0 + +> +1 + +> +1 + +> +000018B500000001 + +> +0.1306 + +> +11.86 + +> +21015797 1 8574413327516572042 +260 1 8574713502780882945 + +> +41.16 +1.06 +0.62 +0.62 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + +> +58.377 + +> +31.7 + +> +2 +5 +10 + +$$$$ diff --git a/tests/data/AF2_AT8G88888_complex.pdb b/tests/data/AF2_AT8G88888_complex.pdb new file mode 100644 index 0000000..357f6ba --- /dev/null +++ b/tests/data/AF2_AT8G88888_complex.pdb @@ -0,0 +1,198 @@ +HEADER 01-JUN-22 +TITLE ALPHAFOLD MONOMER V2.0 PREDICTION FOR UNCHARACTERIZED PROTEIN +TITLE 2 (A0A1I9LPI9) +COMPND MOL_ID: 1; +COMPND 2 MOLECULE: UNCHARACTERIZED PROTEIN; +COMPND 3 CHAIN: A, B; +SOURCE MOL_ID: 1; +SOURCE 2 ORGANISM_SCIENTIFIC: ARABIDOPSIS THALIANA; +SOURCE 3 ORGANISM_TAXID: 3702 +REMARK 1 +REMARK 1 REFERENCE 1 +REMARK 1 AUTH JOHN JUMPER, RICHARD EVANS, ALEXANDER PRITZEL, TIM GREEN, +REMARK 1 AUTH 2 MICHAEL FIGURNOV, OLAF RONNEBERGER, KATHRYN TUNYASUVUNAKOOL, +REMARK 1 AUTH 3 RUSS BATES, AUGUSTIN ZIDEK, ANNA POTAPENKO, ALEX BRIDGLAND, +REMARK 1 AUTH 4 CLEMENS MEYER, SIMON A A KOHL, ANDREW J BALLARD, +REMARK 1 AUTH 5 ANDREW COWIE, BERNARDINO ROMERA-PAREDES, STANISLAV NIKOLOV, +REMARK 1 AUTH 6 RISHUB JAIN, JONAS ADLER, TREVOR BACK, STIG PETERSEN, +REMARK 1 AUTH 7 DAVID REIMAN, ELLEN CLANCY, MICHAL ZIELINSKI, +REMARK 1 AUTH 8 MARTIN STEINEGGER, MICHALINA PACHOLSKA, TAMAS BERGHAMMER, +REMARK 1 AUTH 9 DAVID SILVER, ORIOL VINYALS, ANDREW W SENIOR, +REMARK 1 AUTH10 KORAY KAVUKCUOGLU, PUSHMEET KOHLI, DEMIS HASSABIS +REMARK 1 TITL HIGHLY ACCURATE PROTEIN STRUCTURE PREDICTION WITH ALPHAFOLD +REMARK 1 REF NATURE V. 596 583 2021 +REMARK 1 REFN ISSN 0028-0836 +REMARK 1 PMID 34265844 +REMARK 1 DOI 10.1038/s41586-021-03819-2 +REMARK 1 +REMARK 1 DISCLAIMERS +REMARK 1 ALPHAFOLD DATA, COPYRIGHT (2021) DEEPMIND TECHNOLOGIES LIMITED. THE +REMARK 1 INFORMATION PROVIDED IS THEORETICAL MODELLING ONLY AND CAUTION SHOULD +REMARK 1 BE EXERCISED IN ITS USE. IT IS PROVIDED "AS-IS" WITHOUT ANY WARRANTY +REMARK 1 OF ANY KIND, WHETHER EXPRESSED OR IMPLIED. NO WARRANTY IS GIVEN THAT +REMARK 1 USE OF THE INFORMATION SHALL NOT INFRINGE THE RIGHTS OF ANY THIRD +REMARK 1 PARTY. THE INFORMATION IS NOT INTENDED TO BE A SUBSTITUTE FOR +REMARK 1 PROFESSIONAL MEDICAL ADVICE, DIAGNOSIS, OR TREATMENT, AND DOES NOT +REMARK 1 CONSTITUTE MEDICAL OR OTHER PROFESSIONAL ADVICE. IT IS AVAILABLE FOR +REMARK 1 ACADEMIC AND COMMERCIAL PURPOSES, UNDER CC-BY 4.0 LICENCE. +DBREF XXXX A 1 17 UNP A0A1I9LPI9 A0A1I9LPI9_ARATH 1 17 +SEQRES 1 A 17 MET PHE ARG PHE LEU ASP TRP ILE PHE THR VAL ALA THR +SEQRES 2 A 17 THR SER LEU ASP +CRYST1 1.000 1.000 1.000 90.00 90.00 90.00 P 1 1 +ORIGX1 1.000000 0.000000 0.000000 0.00000 +ORIGX2 0.000000 1.000000 0.000000 0.00000 +ORIGX3 0.000000 0.000000 1.000000 0.00000 +SCALE1 1.000000 0.000000 0.000000 0.00000 +SCALE2 0.000000 1.000000 0.000000 0.00000 +SCALE3 0.000000 0.000000 1.000000 0.00000 +MODEL 1 +ATOM 1 N MET A 1 -7.410 -1.552 9.979 1.00 71.91 N +ATOM 2 CA MET A 1 -6.641 -0.817 8.946 1.00 71.91 C +ATOM 3 C MET A 1 -5.303 -1.485 8.564 1.00 71.91 C +ATOM 4 CB MET A 1 -6.474 0.646 9.390 1.00 71.91 C +ATOM 5 O MET A 1 -4.645 -0.990 7.666 1.00 71.91 O +ATOM 6 CG MET A 1 -6.092 1.613 8.265 1.00 71.91 C +ATOM 7 SD MET A 1 -6.114 3.332 8.818 1.00 71.91 S +ATOM 8 CE MET A 1 -5.405 4.141 7.358 1.00 71.91 C +ATOM 9 N PHE A 2 -4.916 -2.639 9.137 1.00 82.96 N +ATOM 10 CA PHE A 2 -3.656 -3.336 8.797 1.00 82.96 C +ATOM 11 C PHE A 2 -3.544 -3.808 7.338 1.00 82.96 C +ATOM 12 CB PHE A 2 -3.477 -4.529 9.748 1.00 82.96 C +ATOM 13 O PHE A 2 -2.492 -3.654 6.732 1.00 82.96 O +ATOM 14 CG PHE A 2 -2.837 -4.145 11.064 1.00 82.96 C +ATOM 15 CD1 PHE A 2 -1.434 -4.085 11.145 1.00 82.96 C +ATOM 16 CD2 PHE A 2 -3.618 -3.837 12.195 1.00 82.96 C +ATOM 17 CE1 PHE A 2 -0.812 -3.714 12.349 1.00 82.96 C +ATOM 18 CE2 PHE A 2 -2.994 -3.457 13.397 1.00 82.96 C +ATOM 19 CZ PHE A 2 -1.591 -3.397 13.473 1.00 82.96 C +ATOM 20 N ARG A 3 -4.644 -4.300 6.744 1.00 88.23 N +ATOM 21 CA ARG A 3 -4.655 -4.785 5.350 1.00 88.23 C +ATOM 22 C ARG A 3 -4.257 -3.729 4.317 1.00 88.23 C +ATOM 23 CB ARG A 3 -6.038 -5.349 4.983 1.00 88.23 C +ATOM 24 O ARG A 3 -3.766 -4.087 3.258 1.00 88.23 O +ATOM 25 CG ARG A 3 -6.239 -6.783 5.494 1.00 88.23 C +ATOM 26 CD ARG A 3 -7.610 -7.350 5.094 1.00 88.23 C +ATOM 27 NE ARG A 3 -7.758 -7.481 3.629 1.00 88.23 N +ATOM 28 NH1 ARG A 3 -9.874 -8.376 3.605 1.00 88.23 N +ATOM 29 NH2 ARG A 3 -8.815 -7.999 1.679 1.00 88.23 N +ATOM 30 CZ ARG A 3 -8.811 -7.948 2.980 1.00 88.23 C +ATOM 31 N PHE A 4 -4.500 -2.449 4.602 1.00 95.39 N +ATOM 32 CA PHE A 4 -4.141 -1.385 3.667 1.00 95.39 C +ATOM 33 C PHE A 4 -2.630 -1.112 3.666 1.00 95.39 C +ATOM 34 CB PHE A 4 -4.950 -0.122 3.981 1.00 95.39 C +ATOM 35 O PHE A 4 -2.042 -0.961 2.601 1.00 95.39 O +ATOM 36 CG PHE A 4 -4.637 1.004 3.018 1.00 95.39 C +ATOM 37 CD1 PHE A 4 -3.744 2.026 3.392 1.00 95.39 C +ATOM 38 CD2 PHE A 4 -5.173 0.983 1.716 1.00 95.39 C +ATOM 39 CE1 PHE A 4 -3.393 3.025 2.468 1.00 95.39 C +ATOM 40 CE2 PHE A 4 -4.820 1.983 0.794 1.00 95.39 C +ATOM 41 CZ PHE A 4 -3.931 3.004 1.170 1.00 95.39 C +ATOM 42 N LEU A 5 -1.999 -1.093 4.848 1.00 95.07 N +ATOM 43 CA LEU A 5 -0.549 -0.898 4.972 1.00 95.07 C +ATOM 44 C LEU A 5 0.230 -2.068 4.366 1.00 95.07 C +ATOM 45 CB LEU A 5 -0.169 -0.721 6.454 1.00 95.07 C +ATOM 46 O LEU A 5 1.200 -1.845 3.652 1.00 95.07 O +ATOM 47 CG LEU A 5 -0.676 0.581 7.097 1.00 95.07 C +ATOM 48 CD1 LEU A 5 -0.344 0.575 8.590 1.00 95.07 C +ATOM 49 CD2 LEU A 5 -0.045 1.824 6.468 1.00 95.07 C +ATOM 50 N ASP A 6 -0.243 -3.291 4.604 1.00 94.98 N +ATOM 51 CA ASP A 6 0.310 -4.514 4.017 1.00 94.98 C +ATOM 52 C ASP A 6 0.268 -4.491 2.476 1.00 94.98 C +ATOM 53 CB ASP A 6 -0.502 -5.675 4.596 1.00 94.98 C +ATOM 54 O ASP A 6 1.259 -4.793 1.812 1.00 94.98 O +ATOM 55 CG ASP A 6 0.008 -7.032 4.128 1.00 94.98 C +ATOM 56 OD1 ASP A 6 -0.791 -7.719 3.454 1.00 94.98 O +ATOM 57 OD2 ASP A 6 1.140 -7.372 4.527 1.00 94.98 O +ATOM 58 N TRP A 7 -0.843 -4.021 1.895 1.00 94.94 N +ATOM 59 CA TRP A 7 -0.972 -3.855 0.445 1.00 94.94 C +ATOM 60 C TRP A 7 0.004 -2.814 -0.129 1.00 94.94 C +ATOM 61 CB TRP A 7 -2.424 -3.518 0.091 1.00 94.94 C +ATOM 62 O TRP A 7 0.686 -3.117 -1.103 1.00 94.94 O +ATOM 63 CG TRP A 7 -2.650 -3.240 -1.363 1.00 94.94 C +ATOM 64 CD1 TRP A 7 -2.691 -4.169 -2.344 1.00 94.94 C +ATOM 65 CD2 TRP A 7 -2.756 -1.947 -2.035 1.00 94.94 C +ATOM 66 CE2 TRP A 7 -2.861 -2.176 -3.439 1.00 94.94 C +ATOM 67 CE3 TRP A 7 -2.755 -0.604 -1.602 1.00 94.94 C +ATOM 68 NE1 TRP A 7 -2.829 -3.547 -3.569 1.00 94.94 N +ATOM 69 CH2 TRP A 7 -2.931 0.196 -3.906 1.00 94.94 C +ATOM 70 CZ2 TRP A 7 -2.944 -1.130 -4.369 1.00 94.94 C +ATOM 71 CZ3 TRP A 7 -2.849 0.455 -2.526 1.00 94.94 C +ATOM 72 N ILE A 8 0.122 -1.619 0.473 1.00 95.58 N +ATOM 73 CA ILE A 8 1.094 -0.599 0.022 1.00 95.58 C +ATOM 74 C ILE A 8 2.526 -1.128 0.124 1.00 95.58 C +ATOM 75 CB ILE A 8 0.955 0.719 0.824 1.00 95.58 C +ATOM 76 O ILE A 8 3.316 -0.913 -0.792 1.00 95.58 O +ATOM 77 CG1 ILE A 8 -0.332 1.498 0.479 1.00 95.58 C +ATOM 78 CG2 ILE A 8 2.174 1.654 0.660 1.00 95.58 C +ATOM 79 CD1 ILE A 8 -0.352 2.171 -0.903 1.00 95.58 C +ATOM 80 N PHE A 9 2.857 -1.824 1.215 1.00 95.75 N +ATOM 81 CA PHE A 9 4.181 -2.409 1.400 1.00 95.75 C +ATOM 82 C PHE A 9 4.484 -3.426 0.296 1.00 95.75 C +ATOM 83 CB PHE A 9 4.269 -3.029 2.799 1.00 95.75 C +ATOM 84 O PHE A 9 5.521 -3.325 -0.350 1.00 95.75 O +ATOM 85 CG PHE A 9 5.685 -3.372 3.208 1.00 95.75 C +ATOM 86 CD1 PHE A 9 6.205 -4.658 2.976 1.00 95.75 C +ATOM 87 CD2 PHE A 9 6.494 -2.385 3.801 1.00 95.75 C +ATOM 88 CE1 PHE A 9 7.529 -4.956 3.343 1.00 95.75 C +ATOM 89 CE2 PHE A 9 7.817 -2.684 4.169 1.00 95.75 C +ATOM 90 CZ PHE A 9 8.334 -3.970 3.939 1.00 95.75 C +ATOM 91 N THR A 10 3.528 -4.316 0.010 1.00 95.85 N +ATOM 92 CA THR A 10 3.622 -5.299 -1.079 1.00 95.85 C +ATOM 93 C THR A 10 3.835 -4.624 -2.434 1.00 95.85 C +ATOM 94 CB THR A 10 2.357 -6.170 -1.140 1.00 95.85 C +ATOM 95 O THR A 10 4.733 -5.013 -3.175 1.00 95.85 O +ATOM 96 CG2 THR A 10 2.445 -7.255 -2.212 1.00 95.85 C +ATOM 97 OG1 THR A 10 2.146 -6.838 0.082 1.00 95.85 O +ATOM 98 N VAL A 11 3.047 -3.592 -2.759 1.00 96.10 N +ATOM 99 CA VAL A 11 3.177 -2.859 -4.029 1.00 96.10 C +ATOM 100 C VAL A 11 4.548 -2.188 -4.140 1.00 96.10 C +ATOM 101 CB VAL A 11 2.034 -1.841 -4.204 1.00 96.10 C +ATOM 102 O VAL A 11 5.202 -2.317 -5.174 1.00 96.10 O +ATOM 103 CG1 VAL A 11 2.239 -0.928 -5.422 1.00 96.10 C +ATOM 104 CG2 VAL A 11 0.692 -2.558 -4.412 1.00 96.10 C +ATOM 105 N ALA A 12 5.008 -1.520 -3.077 1.00 95.59 N +ATOM 106 CA ALA A 12 6.301 -0.839 -3.056 1.00 95.59 C +ATOM 107 C ALA A 12 7.470 -1.817 -3.255 1.00 95.59 C +ATOM 108 CB ALA A 12 6.430 -0.061 -1.741 1.00 95.59 C +ATOM 109 O ALA A 12 8.385 -1.514 -4.019 1.00 95.59 O +ATOM 110 N THR A 13 7.419 -3.001 -2.635 1.00 94.11 N +ATOM 111 CA THR A 13 8.445 -4.039 -2.818 1.00 94.11 C +ATOM 112 C THR A 13 8.398 -4.653 -4.216 1.00 94.11 C +ATOM 113 CB THR A 13 8.369 -5.131 -1.738 1.00 94.11 C +ATOM 114 O THR A 13 9.435 -4.745 -4.854 1.00 94.11 O +ATOM 115 CG2 THR A 13 8.715 -4.573 -0.355 1.00 94.11 C +ATOM 116 OG1 THR A 13 7.079 -5.693 -1.633 1.00 94.11 O +ATOM 117 N THR A 14 7.213 -4.968 -4.760 1.00 93.90 N +ATOM 118 CA THR A 14 7.092 -5.543 -6.121 1.00 93.90 C +ATOM 119 C THR A 14 7.535 -4.612 -7.245 1.00 93.90 C +ATOM 120 CB THR A 14 5.651 -5.960 -6.455 1.00 93.90 C +ATOM 121 O THR A 14 7.745 -5.072 -8.357 1.00 93.90 O +ATOM 122 CG2 THR A 14 5.207 -7.198 -5.681 1.00 93.90 C +ATOM 123 OG1 THR A 14 4.714 -4.932 -6.199 1.00 93.90 O +ATOM 124 N SER A 15 7.585 -3.303 -6.989 1.00 91.22 N +ATOM 125 CA SER A 15 8.091 -2.321 -7.953 1.00 91.22 C +ATOM 126 C SER A 15 9.588 -2.045 -7.815 1.00 91.22 C +ATOM 127 CB SER A 15 7.310 -1.014 -7.807 1.00 91.22 C +ATOM 128 O SER A 15 10.166 -1.402 -8.689 1.00 91.22 O +ATOM 129 OG SER A 15 7.463 -0.403 -6.533 1.00 91.22 O +ATOM 130 N LEU A 16 10.169 -2.425 -6.673 1.00 85.38 N +ATOM 131 CA LEU A 16 11.572 -2.198 -6.342 1.00 85.38 C +ATOM 132 C LEU A 16 12.447 -3.386 -6.770 1.00 85.38 C +TER 132 LEU A 16 +ATOM 133 CB LEU B 16 11.669 -1.927 -4.830 1.00 85.38 C +ATOM 134 O LEU B 16 13.583 -3.156 -7.181 1.00 85.38 O +ATOM 135 CG LEU B 16 13.080 -1.545 -4.347 1.00 85.38 C +ATOM 136 CD1 LEU B 16 13.490 -0.148 -4.827 1.00 85.38 C +ATOM 137 CD2 LEU B 16 13.114 -1.545 -2.817 1.00 85.38 C +ATOM 138 N ASP B 17 11.914 -4.607 -6.661 1.00 73.64 N +ATOM 139 CA ASP B 17 12.422 -5.816 -7.331 1.00 73.64 C +ATOM 140 C ASP B 17 12.082 -5.810 -8.833 1.00 73.64 C +ATOM 141 CB ASP B 17 11.841 -7.073 -6.638 1.00 73.64 C +ATOM 142 O ASP B 17 12.961 -6.201 -9.638 1.00 73.64 O +ATOM 143 CG ASP B 17 12.463 -7.412 -5.269 1.00 73.64 C +ATOM 144 OD1 ASP B 17 13.655 -7.806 -5.237 1.00 73.64 O +ATOM 145 OD2 ASP B 17 11.729 -7.366 -4.249 1.00 73.64 O +ATOM 146 OXT ASP B 17 10.937 -5.420 -9.159 1.00 73.64 O +TER 147 ASP B 17 +ENDMDL +END \ No newline at end of file diff --git a/tests/data/AF2_AT9G99999_monomer.pdb b/tests/data/AF2_AT9G99999_monomer.pdb new file mode 100644 index 0000000..f64d2a7 --- /dev/null +++ b/tests/data/AF2_AT9G99999_monomer.pdb @@ -0,0 +1,197 @@ +HEADER 01-JUN-22 +TITLE ALPHAFOLD MONOMER V2.0 PREDICTION FOR UNCHARACTERIZED PROTEIN +TITLE 2 (A0A1I9LPI9) +COMPND MOL_ID: 1; +COMPND 2 MOLECULE: UNCHARACTERIZED PROTEIN; +COMPND 3 CHAIN: A +SOURCE MOL_ID: 1; +SOURCE 2 ORGANISM_SCIENTIFIC: ARABIDOPSIS THALIANA; +SOURCE 3 ORGANISM_TAXID: 3702 +REMARK 1 +REMARK 1 REFERENCE 1 +REMARK 1 AUTH JOHN JUMPER, RICHARD EVANS, ALEXANDER PRITZEL, TIM GREEN, +REMARK 1 AUTH 2 MICHAEL FIGURNOV, OLAF RONNEBERGER, KATHRYN TUNYASUVUNAKOOL, +REMARK 1 AUTH 3 RUSS BATES, AUGUSTIN ZIDEK, ANNA POTAPENKO, ALEX BRIDGLAND, +REMARK 1 AUTH 4 CLEMENS MEYER, SIMON A A KOHL, ANDREW J BALLARD, +REMARK 1 AUTH 5 ANDREW COWIE, BERNARDINO ROMERA-PAREDES, STANISLAV NIKOLOV, +REMARK 1 AUTH 6 RISHUB JAIN, JONAS ADLER, TREVOR BACK, STIG PETERSEN, +REMARK 1 AUTH 7 DAVID REIMAN, ELLEN CLANCY, MICHAL ZIELINSKI, +REMARK 1 AUTH 8 MARTIN STEINEGGER, MICHALINA PACHOLSKA, TAMAS BERGHAMMER, +REMARK 1 AUTH 9 DAVID SILVER, ORIOL VINYALS, ANDREW W SENIOR, +REMARK 1 AUTH10 KORAY KAVUKCUOGLU, PUSHMEET KOHLI, DEMIS HASSABIS +REMARK 1 TITL HIGHLY ACCURATE PROTEIN STRUCTURE PREDICTION WITH ALPHAFOLD +REMARK 1 REF NATURE V. 596 583 2021 +REMARK 1 REFN ISSN 0028-0836 +REMARK 1 PMID 34265844 +REMARK 1 DOI 10.1038/s41586-021-03819-2 +REMARK 1 +REMARK 1 DISCLAIMERS +REMARK 1 ALPHAFOLD DATA, COPYRIGHT (2021) DEEPMIND TECHNOLOGIES LIMITED. THE +REMARK 1 INFORMATION PROVIDED IS THEORETICAL MODELLING ONLY AND CAUTION SHOULD +REMARK 1 BE EXERCISED IN ITS USE. IT IS PROVIDED "AS-IS" WITHOUT ANY WARRANTY +REMARK 1 OF ANY KIND, WHETHER EXPRESSED OR IMPLIED. NO WARRANTY IS GIVEN THAT +REMARK 1 USE OF THE INFORMATION SHALL NOT INFRINGE THE RIGHTS OF ANY THIRD +REMARK 1 PARTY. THE INFORMATION IS NOT INTENDED TO BE A SUBSTITUTE FOR +REMARK 1 PROFESSIONAL MEDICAL ADVICE, DIAGNOSIS, OR TREATMENT, AND DOES NOT +REMARK 1 CONSTITUTE MEDICAL OR OTHER PROFESSIONAL ADVICE. IT IS AVAILABLE FOR +REMARK 1 ACADEMIC AND COMMERCIAL PURPOSES, UNDER CC-BY 4.0 LICENCE. +DBREF XXXX A 1 17 UNP A0A1I9LPI9 A0A1I9LPI9_ARATH 1 17 +SEQRES 1 A 17 MET PHE ARG PHE LEU ASP TRP ILE PHE THR VAL ALA THR +SEQRES 2 A 17 THR SER LEU ASP +CRYST1 1.000 1.000 1.000 90.00 90.00 90.00 P 1 1 +ORIGX1 1.000000 0.000000 0.000000 0.00000 +ORIGX2 0.000000 1.000000 0.000000 0.00000 +ORIGX3 0.000000 0.000000 1.000000 0.00000 +SCALE1 1.000000 0.000000 0.000000 0.00000 +SCALE2 0.000000 1.000000 0.000000 0.00000 +SCALE3 0.000000 0.000000 1.000000 0.00000 +MODEL 1 +ATOM 1 N MET A 1 -7.410 -1.552 9.979 1.00 71.91 N +ATOM 2 CA MET A 1 -6.641 -0.817 8.946 1.00 71.91 C +ATOM 3 C MET A 1 -5.303 -1.485 8.564 1.00 71.91 C +ATOM 4 CB MET A 1 -6.474 0.646 9.390 1.00 71.91 C +ATOM 5 O MET A 1 -4.645 -0.990 7.666 1.00 71.91 O +ATOM 6 CG MET A 1 -6.092 1.613 8.265 1.00 71.91 C +ATOM 7 SD MET A 1 -6.114 3.332 8.818 1.00 71.91 S +ATOM 8 CE MET A 1 -5.405 4.141 7.358 1.00 71.91 C +ATOM 9 N PHE A 2 -4.916 -2.639 9.137 1.00 82.96 N +ATOM 10 CA PHE A 2 -3.656 -3.336 8.797 1.00 82.96 C +ATOM 11 C PHE A 2 -3.544 -3.808 7.338 1.00 82.96 C +ATOM 12 CB PHE A 2 -3.477 -4.529 9.748 1.00 82.96 C +ATOM 13 O PHE A 2 -2.492 -3.654 6.732 1.00 82.96 O +ATOM 14 CG PHE A 2 -2.837 -4.145 11.064 1.00 82.96 C +ATOM 15 CD1 PHE A 2 -1.434 -4.085 11.145 1.00 82.96 C +ATOM 16 CD2 PHE A 2 -3.618 -3.837 12.195 1.00 82.96 C +ATOM 17 CE1 PHE A 2 -0.812 -3.714 12.349 1.00 82.96 C +ATOM 18 CE2 PHE A 2 -2.994 -3.457 13.397 1.00 82.96 C +ATOM 19 CZ PHE A 2 -1.591 -3.397 13.473 1.00 82.96 C +ATOM 20 N ARG A 3 -4.644 -4.300 6.744 1.00 88.23 N +ATOM 21 CA ARG A 3 -4.655 -4.785 5.350 1.00 88.23 C +ATOM 22 C ARG A 3 -4.257 -3.729 4.317 1.00 88.23 C +ATOM 23 CB ARG A 3 -6.038 -5.349 4.983 1.00 88.23 C +ATOM 24 O ARG A 3 -3.766 -4.087 3.258 1.00 88.23 O +ATOM 25 CG ARG A 3 -6.239 -6.783 5.494 1.00 88.23 C +ATOM 26 CD ARG A 3 -7.610 -7.350 5.094 1.00 88.23 C +ATOM 27 NE ARG A 3 -7.758 -7.481 3.629 1.00 88.23 N +ATOM 28 NH1 ARG A 3 -9.874 -8.376 3.605 1.00 88.23 N +ATOM 29 NH2 ARG A 3 -8.815 -7.999 1.679 1.00 88.23 N +ATOM 30 CZ ARG A 3 -8.811 -7.948 2.980 1.00 88.23 C +ATOM 31 N PHE A 4 -4.500 -2.449 4.602 1.00 95.39 N +ATOM 32 CA PHE A 4 -4.141 -1.385 3.667 1.00 95.39 C +ATOM 33 C PHE A 4 -2.630 -1.112 3.666 1.00 95.39 C +ATOM 34 CB PHE A 4 -4.950 -0.122 3.981 1.00 95.39 C +ATOM 35 O PHE A 4 -2.042 -0.961 2.601 1.00 95.39 O +ATOM 36 CG PHE A 4 -4.637 1.004 3.018 1.00 95.39 C +ATOM 37 CD1 PHE A 4 -3.744 2.026 3.392 1.00 95.39 C +ATOM 38 CD2 PHE A 4 -5.173 0.983 1.716 1.00 95.39 C +ATOM 39 CE1 PHE A 4 -3.393 3.025 2.468 1.00 95.39 C +ATOM 40 CE2 PHE A 4 -4.820 1.983 0.794 1.00 95.39 C +ATOM 41 CZ PHE A 4 -3.931 3.004 1.170 1.00 95.39 C +ATOM 42 N LEU A 5 -1.999 -1.093 4.848 1.00 95.07 N +ATOM 43 CA LEU A 5 -0.549 -0.898 4.972 1.00 95.07 C +ATOM 44 C LEU A 5 0.230 -2.068 4.366 1.00 95.07 C +ATOM 45 CB LEU A 5 -0.169 -0.721 6.454 1.00 95.07 C +ATOM 46 O LEU A 5 1.200 -1.845 3.652 1.00 95.07 O +ATOM 47 CG LEU A 5 -0.676 0.581 7.097 1.00 95.07 C +ATOM 48 CD1 LEU A 5 -0.344 0.575 8.590 1.00 95.07 C +ATOM 49 CD2 LEU A 5 -0.045 1.824 6.468 1.00 95.07 C +ATOM 50 N ASP A 6 -0.243 -3.291 4.604 1.00 94.98 N +ATOM 51 CA ASP A 6 0.310 -4.514 4.017 1.00 94.98 C +ATOM 52 C ASP A 6 0.268 -4.491 2.476 1.00 94.98 C +ATOM 53 CB ASP A 6 -0.502 -5.675 4.596 1.00 94.98 C +ATOM 54 O ASP A 6 1.259 -4.793 1.812 1.00 94.98 O +ATOM 55 CG ASP A 6 0.008 -7.032 4.128 1.00 94.98 C +ATOM 56 OD1 ASP A 6 -0.791 -7.719 3.454 1.00 94.98 O +ATOM 57 OD2 ASP A 6 1.140 -7.372 4.527 1.00 94.98 O +ATOM 58 N TRP A 7 -0.843 -4.021 1.895 1.00 94.94 N +ATOM 59 CA TRP A 7 -0.972 -3.855 0.445 1.00 94.94 C +ATOM 60 C TRP A 7 0.004 -2.814 -0.129 1.00 94.94 C +ATOM 61 CB TRP A 7 -2.424 -3.518 0.091 1.00 94.94 C +ATOM 62 O TRP A 7 0.686 -3.117 -1.103 1.00 94.94 O +ATOM 63 CG TRP A 7 -2.650 -3.240 -1.363 1.00 94.94 C +ATOM 64 CD1 TRP A 7 -2.691 -4.169 -2.344 1.00 94.94 C +ATOM 65 CD2 TRP A 7 -2.756 -1.947 -2.035 1.00 94.94 C +ATOM 66 CE2 TRP A 7 -2.861 -2.176 -3.439 1.00 94.94 C +ATOM 67 CE3 TRP A 7 -2.755 -0.604 -1.602 1.00 94.94 C +ATOM 68 NE1 TRP A 7 -2.829 -3.547 -3.569 1.00 94.94 N +ATOM 69 CH2 TRP A 7 -2.931 0.196 -3.906 1.00 94.94 C +ATOM 70 CZ2 TRP A 7 -2.944 -1.130 -4.369 1.00 94.94 C +ATOM 71 CZ3 TRP A 7 -2.849 0.455 -2.526 1.00 94.94 C +ATOM 72 N ILE A 8 0.122 -1.619 0.473 1.00 95.58 N +ATOM 73 CA ILE A 8 1.094 -0.599 0.022 1.00 95.58 C +ATOM 74 C ILE A 8 2.526 -1.128 0.124 1.00 95.58 C +ATOM 75 CB ILE A 8 0.955 0.719 0.824 1.00 95.58 C +ATOM 76 O ILE A 8 3.316 -0.913 -0.792 1.00 95.58 O +ATOM 77 CG1 ILE A 8 -0.332 1.498 0.479 1.00 95.58 C +ATOM 78 CG2 ILE A 8 2.174 1.654 0.660 1.00 95.58 C +ATOM 79 CD1 ILE A 8 -0.352 2.171 -0.903 1.00 95.58 C +ATOM 80 N PHE A 9 2.857 -1.824 1.215 1.00 95.75 N +ATOM 81 CA PHE A 9 4.181 -2.409 1.400 1.00 95.75 C +ATOM 82 C PHE A 9 4.484 -3.426 0.296 1.00 95.75 C +ATOM 83 CB PHE A 9 4.269 -3.029 2.799 1.00 95.75 C +ATOM 84 O PHE A 9 5.521 -3.325 -0.350 1.00 95.75 O +ATOM 85 CG PHE A 9 5.685 -3.372 3.208 1.00 95.75 C +ATOM 86 CD1 PHE A 9 6.205 -4.658 2.976 1.00 95.75 C +ATOM 87 CD2 PHE A 9 6.494 -2.385 3.801 1.00 95.75 C +ATOM 88 CE1 PHE A 9 7.529 -4.956 3.343 1.00 95.75 C +ATOM 89 CE2 PHE A 9 7.817 -2.684 4.169 1.00 95.75 C +ATOM 90 CZ PHE A 9 8.334 -3.970 3.939 1.00 95.75 C +ATOM 91 N THR A 10 3.528 -4.316 0.010 1.00 95.85 N +ATOM 92 CA THR A 10 3.622 -5.299 -1.079 1.00 95.85 C +ATOM 93 C THR A 10 3.835 -4.624 -2.434 1.00 95.85 C +ATOM 94 CB THR A 10 2.357 -6.170 -1.140 1.00 95.85 C +ATOM 95 O THR A 10 4.733 -5.013 -3.175 1.00 95.85 O +ATOM 96 CG2 THR A 10 2.445 -7.255 -2.212 1.00 95.85 C +ATOM 97 OG1 THR A 10 2.146 -6.838 0.082 1.00 95.85 O +ATOM 98 N VAL A 11 3.047 -3.592 -2.759 1.00 96.10 N +ATOM 99 CA VAL A 11 3.177 -2.859 -4.029 1.00 96.10 C +ATOM 100 C VAL A 11 4.548 -2.188 -4.140 1.00 96.10 C +ATOM 101 CB VAL A 11 2.034 -1.841 -4.204 1.00 96.10 C +ATOM 102 O VAL A 11 5.202 -2.317 -5.174 1.00 96.10 O +ATOM 103 CG1 VAL A 11 2.239 -0.928 -5.422 1.00 96.10 C +ATOM 104 CG2 VAL A 11 0.692 -2.558 -4.412 1.00 96.10 C +ATOM 105 N ALA A 12 5.008 -1.520 -3.077 1.00 95.59 N +ATOM 106 CA ALA A 12 6.301 -0.839 -3.056 1.00 95.59 C +ATOM 107 C ALA A 12 7.470 -1.817 -3.255 1.00 95.59 C +ATOM 108 CB ALA A 12 6.430 -0.061 -1.741 1.00 95.59 C +ATOM 109 O ALA A 12 8.385 -1.514 -4.019 1.00 95.59 O +ATOM 110 N THR A 13 7.419 -3.001 -2.635 1.00 94.11 N +ATOM 111 CA THR A 13 8.445 -4.039 -2.818 1.00 94.11 C +ATOM 112 C THR A 13 8.398 -4.653 -4.216 1.00 94.11 C +ATOM 113 CB THR A 13 8.369 -5.131 -1.738 1.00 94.11 C +ATOM 114 O THR A 13 9.435 -4.745 -4.854 1.00 94.11 O +ATOM 115 CG2 THR A 13 8.715 -4.573 -0.355 1.00 94.11 C +ATOM 116 OG1 THR A 13 7.079 -5.693 -1.633 1.00 94.11 O +ATOM 117 N THR A 14 7.213 -4.968 -4.760 1.00 93.90 N +ATOM 118 CA THR A 14 7.092 -5.543 -6.121 1.00 93.90 C +ATOM 119 C THR A 14 7.535 -4.612 -7.245 1.00 93.90 C +ATOM 120 CB THR A 14 5.651 -5.960 -6.455 1.00 93.90 C +ATOM 121 O THR A 14 7.745 -5.072 -8.357 1.00 93.90 O +ATOM 122 CG2 THR A 14 5.207 -7.198 -5.681 1.00 93.90 C +ATOM 123 OG1 THR A 14 4.714 -4.932 -6.199 1.00 93.90 O +ATOM 124 N SER A 15 7.585 -3.303 -6.989 1.00 91.22 N +ATOM 125 CA SER A 15 8.091 -2.321 -7.953 1.00 91.22 C +ATOM 126 C SER A 15 9.588 -2.045 -7.815 1.00 91.22 C +ATOM 127 CB SER A 15 7.310 -1.014 -7.807 1.00 91.22 C +ATOM 128 O SER A 15 10.166 -1.402 -8.689 1.00 91.22 O +ATOM 129 OG SER A 15 7.463 -0.403 -6.533 1.00 91.22 O +ATOM 130 N LEU A 16 10.169 -2.425 -6.673 1.00 85.38 N +ATOM 131 CA LEU A 16 11.572 -2.198 -6.342 1.00 85.38 C +ATOM 132 C LEU A 16 12.447 -3.386 -6.770 1.00 85.38 C +ATOM 133 CB LEU A 16 11.669 -1.927 -4.830 1.00 85.38 C +ATOM 134 O LEU A 16 13.583 -3.156 -7.181 1.00 85.38 O +ATOM 135 CG LEU A 16 13.080 -1.545 -4.347 1.00 85.38 C +ATOM 136 CD1 LEU A 16 13.490 -0.148 -4.827 1.00 85.38 C +ATOM 137 CD2 LEU A 16 13.114 -1.545 -2.817 1.00 85.38 C +ATOM 138 N ASP A 17 11.914 -4.607 -6.661 1.00 73.64 N +ATOM 139 CA ASP A 17 12.422 -5.816 -7.331 1.00 73.64 C +ATOM 140 C ASP A 17 12.082 -5.810 -8.833 1.00 73.64 C +ATOM 141 CB ASP A 17 11.841 -7.073 -6.638 1.00 73.64 C +ATOM 142 O ASP A 17 12.961 -6.201 -9.638 1.00 73.64 O +ATOM 143 CG ASP A 17 12.463 -7.412 -5.269 1.00 73.64 C +ATOM 144 OD1 ASP A 17 13.655 -7.806 -5.237 1.00 73.64 O +ATOM 145 OD2 ASP A 17 11.729 -7.366 -4.249 1.00 73.64 O +ATOM 146 OXT ASP A 17 10.937 -5.420 -9.159 1.00 73.64 O +TER 147 ASP A 17 +ENDMDL +END \ No newline at end of file diff --git a/tests/data/AT8G88888_complex_6325_Ethylene/AT8G88888_complex_6325_Ethylene0001.pdb b/tests/data/AT8G88888_complex_6325_Ethylene/AT8G88888_complex_6325_Ethylene0001.pdb new file mode 100644 index 0000000..9b5cc72 --- /dev/null +++ b/tests/data/AT8G88888_complex_6325_Ethylene/AT8G88888_complex_6325_Ethylene0001.pdb @@ -0,0 +1,205 @@ +REMARK File generated by Hex 8.0.0 on Mon Feb 19 14:04:05 2024. +REMARK Source: tests/data/AF2_AT8G88888_complex.pdb +REMARK Docked receptor coordinates... +REMARK Solution 1, from model "AF2_AT8G88888_complex", ID: 0040000b03710053 +REMARK Energy -7.055752e+01, RMS -1.00 +REMARK Overlap Volume 0.0, Clash Volume 0.0 +REMARK Box_min: -10.654 -8.723 -9.638 +REMARK Box_max: 13.655 4.141 13.473 +REMARK Cube_min: -10.654 -14.446 -10.237 +REMARK Cube_max: 13.655 9.863 14.072 +REMARK Symmetry Type: Default +REMARK Symmetry Matrix: 0 +ATOM 1 N MET A 1 -7.410 -1.552 9.979 1.00 71.91 +ATOM 2 CA MET A 1 -6.641 -0.817 8.946 1.00 71.91 +ATOM 3 C MET A 1 -5.303 -1.485 8.564 1.00 71.91 +ATOM 4 CB MET A 1 -6.474 0.646 9.390 1.00 71.91 +ATOM 5 O MET A 1 -4.645 -0.990 7.666 1.00 71.91 +ATOM 6 CG MET A 1 -6.092 1.613 8.265 1.00 71.91 +ATOM 7 SD MET A 1 -6.114 3.332 8.818 1.00 71.91 +ATOM 8 CE MET A 1 -5.405 4.141 7.358 1.00 71.91 +ATOM 9 1H MET A 1 -8.294 -1.042 10.181 1.00 99.99 +ATOM 10 2H MET A 1 -7.633 -2.507 9.631 1.00 99.99 +ATOM 11 3H MET A 1 -6.843 -1.622 10.848 1.00 99.99 +ATOM 12 N PHE A 2 -4.916 -2.639 9.137 1.00 82.96 +ATOM 13 CA PHE A 2 -3.656 -3.336 8.797 1.00 82.96 +ATOM 14 C PHE A 2 -3.544 -3.808 7.338 1.00 82.96 +ATOM 15 CB PHE A 2 -3.477 -4.529 9.748 1.00 82.96 +ATOM 16 O PHE A 2 -2.492 -3.654 6.732 1.00 82.96 +ATOM 17 CG PHE A 2 -2.837 -4.145 11.064 1.00 82.96 +ATOM 18 CD1 PHE A 2 -1.434 -4.085 11.145 1.00 82.96 +ATOM 19 CD2 PHE A 2 -3.618 -3.837 12.195 1.00 82.96 +ATOM 20 CE1 PHE A 2 -0.812 -3.714 12.349 1.00 82.96 +ATOM 21 CE2 PHE A 2 -2.994 -3.457 13.397 1.00 82.96 +ATOM 22 CZ PHE A 2 -1.591 -3.397 13.473 1.00 82.96 +ATOM 23 H PHE A 2 -5.539 -3.014 9.824 1.00 99.99 +ATOM 24 N ARG A 3 -4.644 -4.300 6.744 1.00 88.23 +ATOM 25 CA ARG A 3 -4.655 -4.785 5.350 1.00 88.23 +ATOM 26 C ARG A 3 -4.257 -3.729 4.317 1.00 88.23 +ATOM 27 CB ARG A 3 -6.038 -5.349 4.983 1.00 88.23 +ATOM 28 O ARG A 3 -3.766 -4.087 3.258 1.00 88.23 +ATOM 29 CG ARG A 3 -6.239 -6.783 5.494 1.00 88.23 +ATOM 30 CD ARG A 3 -7.610 -7.350 5.094 1.00 88.23 +ATOM 31 NE ARG A 3 -7.758 -7.481 3.629 1.00 88.23 +ATOM 32 NH1 ARG A 3 -9.874 -8.376 3.605 1.00 88.23 +ATOM 33 NH2 ARG A 3 -8.815 -7.999 1.679 1.00 88.23 +ATOM 34 CZ ARG A 3 -8.811 -7.948 2.980 1.00 88.23 +ATOM 35 H ARG A 3 -5.475 -4.324 7.301 1.00 99.99 +ATOM 36 HE ARG A 3 -6.985 -7.188 3.071 1.00 99.99 +ATOM 37 1HH1 ARG A 3 -10.654 -8.723 3.085 1.00 99.99 +ATOM 38 2HH1 ARG A 3 -9.908 -8.357 4.604 1.00 99.99 +ATOM 39 1HH2 ARG A 3 -9.615 -8.353 1.195 1.00 99.99 +ATOM 40 2HH2 ARG A 3 -8.018 -7.684 1.164 1.00 99.99 +ATOM 41 N PHE A 4 -4.500 -2.449 4.602 1.00 95.39 +ATOM 42 CA PHE A 4 -4.141 -1.385 3.667 1.00 95.39 +ATOM 43 C PHE A 4 -2.630 -1.112 3.666 1.00 95.39 +ATOM 44 CB PHE A 4 -4.950 -0.122 3.981 1.00 95.39 +ATOM 45 O PHE A 4 -2.042 -0.961 2.601 1.00 95.39 +ATOM 46 CG PHE A 4 -4.637 1.004 3.018 1.00 95.39 +ATOM 47 CD1 PHE A 4 -3.744 2.026 3.392 1.00 95.39 +ATOM 48 CD2 PHE A 4 -5.173 0.983 1.716 1.00 95.39 +ATOM 49 CE1 PHE A 4 -3.393 3.025 2.468 1.00 95.39 +ATOM 50 CE2 PHE A 4 -4.820 1.983 0.794 1.00 95.39 +ATOM 51 CZ PHE A 4 -3.931 3.004 1.170 1.00 95.39 +ATOM 52 H PHE A 4 -4.937 -2.236 5.477 1.00 99.99 +ATOM 53 N LEU A 5 -1.999 -1.093 4.848 1.00 95.07 +ATOM 54 CA LEU A 5 -0.549 -0.898 4.972 1.00 95.07 +ATOM 55 C LEU A 5 0.230 -2.068 4.366 1.00 95.07 +ATOM 56 CB LEU A 5 -0.169 -0.721 6.454 1.00 95.07 +ATOM 57 O LEU A 5 1.200 -1.845 3.652 1.00 95.07 +ATOM 58 CG LEU A 5 -0.676 0.581 7.097 1.00 95.07 +ATOM 59 CD1 LEU A 5 -0.344 0.575 8.590 1.00 95.07 +ATOM 60 CD2 LEU A 5 -0.045 1.824 6.468 1.00 95.07 +ATOM 61 H LEU A 5 -2.570 -1.220 5.660 1.00 99.99 +ATOM 62 N ASP A 6 -0.243 -3.291 4.604 1.00 94.98 +ATOM 63 CA ASP A 6 0.310 -4.514 4.017 1.00 94.98 +ATOM 64 C ASP A 6 0.268 -4.491 2.476 1.00 94.98 +ATOM 65 CB ASP A 6 -0.502 -5.675 4.596 1.00 94.98 +ATOM 66 O ASP A 6 1.259 -4.793 1.812 1.00 94.98 +ATOM 67 CG ASP A 6 0.008 -7.032 4.128 1.00 94.98 +ATOM 68 OD1 ASP A 6 -0.791 -7.719 3.454 1.00 94.98 +ATOM 69 OD2 ASP A 6 1.140 -7.372 4.527 1.00 94.98 +ATOM 70 H ASP A 6 -1.025 -3.331 5.228 1.00 99.99 +ATOM 71 N TRP A 7 -0.843 -4.021 1.895 1.00 94.94 +ATOM 72 CA TRP A 7 -0.972 -3.855 0.445 1.00 94.94 +ATOM 73 C TRP A 7 0.004 -2.814 -0.129 1.00 94.94 +ATOM 74 CB TRP A 7 -2.424 -3.518 0.091 1.00 94.94 +ATOM 75 O TRP A 7 0.686 -3.117 -1.103 1.00 94.94 +ATOM 76 CG TRP A 7 -2.650 -3.240 -1.363 1.00 94.94 +ATOM 77 CD1 TRP A 7 -2.691 -4.169 -2.344 1.00 94.94 +ATOM 78 CD2 TRP A 7 -2.756 -1.947 -2.035 1.00 94.94 +ATOM 79 CE2 TRP A 7 -2.861 -2.176 -3.439 1.00 94.94 +ATOM 80 CE3 TRP A 7 -2.755 -0.604 -1.602 1.00 94.94 +ATOM 81 NE1 TRP A 7 -2.829 -3.547 -3.569 1.00 94.94 +ATOM 82 CH2 TRP A 7 -2.931 0.196 -3.906 1.00 94.94 +ATOM 83 CZ2 TRP A 7 -2.944 -1.130 -4.369 1.00 94.94 +ATOM 84 CZ3 TRP A 7 -2.849 0.455 -2.526 1.00 94.94 +ATOM 85 H TRP A 7 -1.598 -3.783 2.507 1.00 99.99 +ATOM 86 HE1 TRP A 7 -2.899 -3.980 -4.446 1.00 99.99 +ATOM 87 N ILE A 8 0.122 -1.619 0.473 1.00 95.58 +ATOM 88 CA ILE A 8 1.094 -0.599 0.022 1.00 95.58 +ATOM 89 C ILE A 8 2.526 -1.128 0.124 1.00 95.58 +ATOM 90 CB ILE A 8 0.955 0.719 0.824 1.00 95.58 +ATOM 91 O ILE A 8 3.316 -0.913 -0.792 1.00 95.58 +ATOM 92 CG1 ILE A 8 -0.332 1.498 0.479 1.00 95.58 +ATOM 93 CG2 ILE A 8 2.174 1.654 0.660 1.00 95.58 +ATOM 94 CD1 ILE A 8 -0.352 2.171 -0.903 1.00 95.58 +ATOM 95 H ILE A 8 -0.482 -1.442 1.251 1.00 99.99 +ATOM 96 N PHE A 9 2.857 -1.824 1.215 1.00 95.75 +ATOM 97 CA PHE A 9 4.181 -2.409 1.400 1.00 95.75 +ATOM 98 C PHE A 9 4.484 -3.426 0.296 1.00 95.75 +ATOM 99 CB PHE A 9 4.269 -3.029 2.799 1.00 95.75 +ATOM 100 O PHE A 9 5.521 -3.325 -0.350 1.00 95.75 +ATOM 101 CG PHE A 9 5.685 -3.372 3.208 1.00 95.75 +ATOM 102 CD1 PHE A 9 6.205 -4.658 2.976 1.00 95.75 +ATOM 103 CD2 PHE A 9 6.494 -2.385 3.801 1.00 95.75 +ATOM 104 CE1 PHE A 9 7.529 -4.956 3.343 1.00 95.75 +ATOM 105 CE2 PHE A 9 7.817 -2.684 4.169 1.00 95.75 +ATOM 106 CZ PHE A 9 8.334 -3.970 3.939 1.00 95.75 +ATOM 107 H PHE A 9 2.148 -1.930 1.913 1.00 99.99 +ATOM 108 N THR A 10 3.528 -4.316 0.010 1.00 95.85 +ATOM 109 CA THR A 10 3.622 -5.299 -1.079 1.00 95.85 +ATOM 110 C THR A 10 3.835 -4.624 -2.434 1.00 95.85 +ATOM 111 CB THR A 10 2.357 -6.170 -1.140 1.00 95.85 +ATOM 112 O THR A 10 4.733 -5.013 -3.175 1.00 95.85 +ATOM 113 CG2 THR A 10 2.445 -7.255 -2.212 1.00 95.85 +ATOM 114 OG1 THR A 10 2.146 -6.838 0.082 1.00 95.85 +ATOM 115 H THR A 10 2.717 -4.277 0.596 1.00 99.99 +ATOM 116 HG1 THR A 10 1.357 -7.353 -0.022 1.00 99.99 +ATOM 117 N VAL A 11 3.047 -3.592 -2.759 1.00 96.10 +ATOM 118 CA VAL A 11 3.177 -2.859 -4.029 1.00 96.10 +ATOM 119 C VAL A 11 4.548 -2.188 -4.140 1.00 96.10 +ATOM 120 CB VAL A 11 2.034 -1.841 -4.204 1.00 96.10 +ATOM 121 O VAL A 11 5.202 -2.317 -5.174 1.00 96.10 +ATOM 122 CG1 VAL A 11 2.239 -0.928 -5.422 1.00 96.10 +ATOM 123 CG2 VAL A 11 0.692 -2.558 -4.412 1.00 96.10 +ATOM 124 H VAL A 11 2.351 -3.334 -2.088 1.00 99.99 +ATOM 125 N ALA A 12 5.008 -1.520 -3.077 1.00 95.59 +ATOM 126 CA ALA A 12 6.301 -0.839 -3.056 1.00 95.59 +ATOM 127 C ALA A 12 7.470 -1.817 -3.255 1.00 95.59 +ATOM 128 CB ALA A 12 6.430 -0.061 -1.741 1.00 95.59 +ATOM 129 O ALA A 12 8.385 -1.514 -4.019 1.00 95.59 +ATOM 130 H ALA A 12 4.410 -1.508 -2.275 1.00 99.99 +ATOM 131 N THR A 13 7.419 -3.001 -2.635 1.00 94.11 +ATOM 132 CA THR A 13 8.445 -4.039 -2.818 1.00 94.11 +ATOM 133 C THR A 13 8.398 -4.653 -4.216 1.00 94.11 +ATOM 134 CB THR A 13 8.369 -5.131 -1.738 1.00 94.11 +ATOM 135 O THR A 13 9.435 -4.745 -4.854 1.00 94.11 +ATOM 136 CG2 THR A 13 8.715 -4.573 -0.355 1.00 94.11 +ATOM 137 OG1 THR A 13 7.079 -5.693 -1.633 1.00 94.11 +ATOM 138 H THR A 13 6.638 -3.155 -2.029 1.00 99.99 +ATOM 139 HG1 THR A 13 7.123 -6.350 -0.951 1.00 99.99 +ATOM 140 N THR A 14 7.213 -4.968 -4.760 1.00 93.90 +ATOM 141 CA THR A 14 7.092 -5.543 -6.121 1.00 93.90 +ATOM 142 C THR A 14 7.535 -4.612 -7.245 1.00 93.90 +ATOM 143 CB THR A 14 5.651 -5.960 -6.455 1.00 93.90 +ATOM 144 O THR A 14 7.745 -5.072 -8.357 1.00 93.90 +ATOM 145 CG2 THR A 14 5.207 -7.198 -5.681 1.00 93.90 +ATOM 146 OG1 THR A 14 4.714 -4.932 -6.199 1.00 93.90 +ATOM 147 H THR A 14 6.400 -4.797 -4.202 1.00 99.99 +ATOM 148 HG1 THR A 14 3.862 -5.276 -6.432 1.00 99.99 +ATOM 149 N SER A 15 7.585 -3.303 -6.989 1.00 91.22 +ATOM 150 CA SER A 15 8.091 -2.321 -7.953 1.00 91.22 +ATOM 151 C SER A 15 9.588 -2.045 -7.815 1.00 91.22 +ATOM 152 CB SER A 15 7.310 -1.014 -7.807 1.00 91.22 +ATOM 153 O SER A 15 10.166 -1.402 -8.689 1.00 91.22 +ATOM 154 OG SER A 15 7.463 -0.403 -6.533 1.00 91.22 +ATOM 155 H SER A 15 7.254 -3.014 -6.090 1.00 99.99 +ATOM 156 HG SER A 15 6.945 0.391 -6.547 1.00 99.99 +ATOM 157 N LEU A 16 10.169 -2.425 -6.673 1.00 85.38 +ATOM 158 CA LEU A 16 11.572 -2.198 -6.342 1.00 85.38 +ATOM 159 C LEU A 16 12.447 -3.386 -6.770 1.00 85.38 +ATOM 160 H LEU A 16 9.561 -2.894 -6.031 1.00 99.99 +TER 161 LEU A 16 +ATOM 162 CB LEU B 16 11.669 -1.927 -4.830 1.00 85.38 +ATOM 163 O LEU B 16 13.583 -3.156 -7.181 1.00 85.38 +ATOM 164 CG LEU B 16 13.080 -1.545 -4.347 1.00 85.38 +ATOM 165 CD1 LEU B 16 13.490 -0.148 -4.827 1.00 85.38 +ATOM 166 CD2 LEU B 16 13.114 -1.545 -2.817 1.00 85.38 +ATOM 167 N ASP B 17 11.914 -4.607 -6.661 1.00 73.64 +ATOM 168 CA ASP B 17 12.422 -5.816 -7.331 1.00 73.64 +ATOM 169 C ASP B 17 12.082 -5.810 -8.833 1.00 73.64 +ATOM 170 CB ASP B 17 11.841 -7.073 -6.638 1.00 73.64 +ATOM 171 O ASP B 17 12.961 -6.201 -9.638 1.00 73.64 +ATOM 172 CG ASP B 17 12.463 -7.412 -5.269 1.00 73.64 +ATOM 173 OD1 ASP B 17 13.655 -7.806 -5.237 1.00 73.64 +ATOM 174 OD2 ASP B 17 11.729 -7.366 -4.249 1.00 73.64 +ATOM 175 OXT ASP B 17 10.937 -5.420 -9.159 1.00 73.64 +TER 176 ASP B 17 +REMARK Source: tests/data/6325_Ethylene.sdf +REMARK Docked ligand coordinates... +REMARK Solution 1, from model "6325_Ethylene", ID: 0040000b03710053 +REMARK Energy -7.055752e+01, RMS -1.00 +REMARK Overlap Volume 0.0, Clash Volume 0.0 +REMARK Box_min: -10.654 -8.723 -9.638 +REMARK Box_max: 13.655 4.141 13.473 +REMARK Cube_min: -10.654 -14.446 -10.237 +REMARK Cube_max: 13.655 9.863 14.072 +REMARK Symmetry Type: Default +REMARK Symmetry Matrix: 0 +ATOM 177 C SDF A 1 -0.275 -0.845 -2.305 1.00 99.99 +ATOM 178 C SDF A 1 0.981 -0.395 -2.301 1.00 99.99 +ATOM 179 H SDF A 1 -1.106 -0.166 -2.458 1.00 99.99 +ATOM 180 H SDF A 1 -0.487 -1.898 -2.156 1.00 99.99 +ATOM 181 H SDF A 1 1.813 -1.073 -2.148 1.00 99.99 +ATOM 182 H SDF A 1 1.193 0.658 -2.449 1.00 99.99 diff --git a/tests/data/AT8G88888_complex_6325_Ethylene/hex_output.txt b/tests/data/AT8G88888_complex_6325_Ethylene/hex_output.txt new file mode 100644 index 0000000..f5d4f1e --- /dev/null +++ b/tests/data/AT8G88888_complex_6325_Ethylene/hex_output.txt @@ -0,0 +1,238 @@ +Hex 8.0.0 starting at Mon Feb 19 14:03:40 2024 on host bar. + +Hostname: bar +Main memory: 128813 Mb +CPUs online: 32 +GPUs online: 0 +Compute threads: 24 + +Sizeof(short): 2 +Sizeof(int): 4 +Sizeof(long): 8 +Sizeof(long int):8 +Sizeof(float): 4 +Sizeof(double): 8 +Sizeof(octa): 8 +Sizeof(int *): 8 +Sizeof(long *): 8 +Sizeof(void *): 8 + +Default path: /home/dnguyen/BAR_API_forked/BAR_API +PATH: /home/dnguyen/hex/exe:/home/dnguyen/hex/bin:/home/dnguyen/BAR_API_forked/BAR_API/venv/bin:/home/dnguyen/.vscode-server/bin/8b3775030ed1a69b13e4f4c628c612102e30a681/bin/remote-cli:/usr/local/bin:/usr/bin:/bin:/usr/local/games:/usr/games:/ho +Hex program: /home/dnguyen/hex/exe/hex8.0.0-nogui.x64 +HEX_ROOT: /home/dnguyen/hex +HEX_STRIDE: stride.x64 +HEX_PDB: NULL +HEX_DATA: NULL +HEX_MACROS: NULL +HEX_COLOURS: NULL +HEX_STARTUP: NULL +HEX_FONTFILE: NULL +HEX_PIPE: NULL +HEX_CACHE: /home/dnguyen/hex_cache +HEX_LOG: NULL +HEX_CPUS: NULL +HEX_GPUS: NULL +HEX_FIRST_GPU: NULL +HEX_GTO_SCALE: NULL +HEX_ETO_SCALE: NULL + +CUDA enabled: No + + + +Running HEX_STARTUP file: /home/dnguyen/hex/data/startup_v5.mac +Disc Cache enabled. Using directory: /home/dnguyen/hex_cache + + +Reading commands from stdin ... +>> open_receptor tests/data/AF2_AT8G88888_complex.pdb +Assuming tests/data/AF2_AT8G88888_complex.pdb is a PDB file... + +Opened PDB file: tests/data/AF2_AT8G88888_complex.pdb, ID = AF2_AT8G88888_complex +*Warning* Can't add all hydrogens to incomplete residue: B 16:LEU +*Warning* Can't add all hydrogens to incomplete residue: B 17:ASP +Loaded PDB file: tests/data/AF2_AT8G88888_complex.pdb, (20 residues, 176 atoms, 1 models) +*Warning* Fractional charge (-0.46) for terminal residue: B 16:LEU + LEU:CB Radius = 1.70, Charge = 0.02 + LEU:O Radius = 1.50, Charge = -0.50 + LEU:CG Radius = 1.50, Charge = 0.05 + LEU:CD1 Radius = 1.90, Charge = -0.01 + LEU:CD2 Radius = 1.90, Charge = -0.01 +*Warning* Fractional charge (-2.27) for terminal residue: B 17:ASP + ASP:N Radius = 1.40, Charge = -0.52 + ASP:CA Radius = 1.50, Charge = 0.25 + ASP:C Radius = 1.40, Charge = 0.14 + ASP:CB Radius = 1.70, Charge = -0.21 + ASP:O Radius = 1.50, Charge = -0.57 + ASP:CG Radius = 1.40, Charge = 0.62 + ASP:OD1 Radius = 1.50, Charge = -0.71 + ASP:OD2 Radius = 1.50, Charge = -0.71 + ASP:OXT Radius = 1.50, Charge = -0.57 +Counted 1 +ve and 2 -ve formal charged residues: Net formal charge: -1 +>AF2_AT8G88888_complex A +MFRFLDWIFTVATTSL +>AF2_AT8G88888_complex B +LD +>> open_ligand tests/data/6325_Ethylene.sdf +Assuming tests/data/6325_Ethylene.sdf is an SDF file... + +Opened SDF file: tests/data/6325_Ethylene.sdf, ID = 6325_Ethylene +>> docking_correlation 1 +Docking with shape+electrostatics. +>> docking_score_threshold 0 +>> max_docking_solutions 25 +>> docking_receptor_stepsize 5.50 +Receptor step size: 5.50 deg. B=32, M=64, T=1692. +>> docking_ligand_stepsize 5.50 +Ligand step size: 5.50 deg. B=32, M=64, T=1692. +>> docking_alpha_stepsize 2.80 +Twist step size: 2.80 deg. B=64, M=128. +>> docking_main_scan 16 +>> receptor_origin C-825:VAL-O +*Warning* No Match: C-825:VAL-O -> C-825:VAL-O (Chain-ResidueID:ResidueName-AtomName) +>> commit_edits +>> activate_docking + +Contouring surface for molecule AF2_AT8G88888_complex. +Polar probe = 1.40A, Apolar probe = 1.40A +Gaussian sampling over 146 atoms done in 0.03 seconds. +Contoured 37680 triangles (18842 vertices) in 0.03 seconds. +Surface traversal done in 0.01 seconds - Found 1 surface segments. +Primary surface: Area = 1893.33, Volume = 5111.39. +Culled 0 small segments in 0.01 seconds. +Total contouring time: 0.05 seconds. + + +Contouring surface for molecule 6325_Ethylene. +Polar probe = 1.40A, Apolar probe = 1.40A +Gaussian sampling over 2 atoms done in 0.00 seconds. +Contoured 3240 triangles (1622 vertices) in 0.00 seconds. +Surface traversal done in 0.00 seconds - Found 1 surface segments. +Primary surface: Area = 164.05, Volume = 195.88. +Culled 0 small segments in 0.00 seconds. +Total contouring time: 0.00 seconds. + +Sampling surface and interior volumes for molecule AF2_AT8G88888_complex. +Generated 14716 exterior and 5201 interior skin grid cells. +Exterior skin volume = 3178.66; interior skin volume = 1123.42. +Volume sampling done in 0.04 seconds. +Sampling surface and interior volumes for molecule 6325_Ethylene. +Generated 966 exterior and 123 interior skin grid cells. +Exterior skin volume = 208.66; interior skin volume = 26.57. +Volume sampling done in 0.00 seconds. + +Calculating potential to N = 25 (5525 coefficients) using 24 Tasks ... +Grid: 74x74x74 = 405224 cells (20323 non-zero) of 0.60 Angstroms. +Done integration over 20323 cells in 0.06s (335995/s). + +Calculating electrostatics for molecule AF2_AT8G88888_complex. +Charge density for molecule AF2_AT8G88888_complex to N = 25: 172 atoms done in 0.00 seconds. +Potential for molecule AF2_AT8G88888_complex to N = 25 done in 0.01 seconds. +Calculating electrostatics for molecule 6325_Ethylene. +Charge density for molecule 6325_Ethylene to N = 25: 0 atoms done in 0.00 seconds. +Potential for molecule 6325_Ethylene to N = 25 done in 0.01 seconds. + +------------------------------------------------------------------------------ +Docking will output a maximum of 25 solutions per pair... + +------------------------------------------------------------------------------ +Docking 1 pair of starting orientations... + +Docking receptor: AF2_AT8G88888_complex and ligand: 6325_Ethylene... + +Receptor AF2_AT8G88888_complex: Tag = AF2_AT8G88888_complex +Ligand 6325_Ethylene: Tag = 6325_Ethylene + +Setting up shape + electrostatics correlation. + +Starting SPF search. +Setting docking_score threshold = 0.0 +Setting 30 distance samples from 0.00 to 23.20, with steps of 0.80. + + +Total 6D space: Iterate[30,1692,1] x FFT[128,32,64] = 13306429440. +Initial rotational increments (N=16) Receptor: 1692 (39Mb), Ligand: 1692 (39Mb) +Applying 1692+1692 coefficient rotations on 24 CPUs for N=16. +Done 3384 rotations in a total of 0.07s (48465/s). + +Starting 3D FFT search using 24 CPUs and 0 GPUs with N=16, Nalpha=128/128. +Estart = 68.70. +Done 13306429440 orientations in 23.85s (558001702/s). +Found 51655052/13306429440 within score threshold = 0.0 NOT including start guess. + +Time spent culling 42*1600000 solutions = 8.56s. +Starting guess not found in top 1255052 solutions. +Emin = -67.49, Emax = -0.00 + +Re-sampling top 40000 orientations -> top 28050 retained. +Surviving rotational steps (N=25) Receptor: 48 (9Mb), Ligand: 1441 (243Mb) +Applying 48+1441 coefficient rotations on 24 CPUs for N=25. +Done 1489 rotations in a total of 0.12s (12658/s). + +Starting 1D FFT refinement using 24 CPUs and 0 GPUs with N=25, Nalpha=128/128. +Estart = 59.64. +Done 3590400 orientations in 0.24s (14979911/s). +Found 70715/3590400 within score threshold = 0.0 NOT including start guess. + + +Solution buffer reached 70715/200000 = 35.4% occupancy with no culling. +Starting guess not found in top 70715 solutions. +Emin = -70.56, Emax = -7.51 + +Docking correlation summary by RMS deviation and steric clashes +------------------------------------------------------------------------- + Soln Etotal Eshape Eforce Eair RMS Bumps + ---- --------- --------- --------- --------- ---------------- ----- + + +Docked structures AF2_AT8G88888_complex:6325_Ethylene in a total of 0 min, 25 sec. + + +------------------------------------------------------------------------------ +Saving top 25 orientations. + +Docking done in a total of 0 min, 25 sec. + + +------------------------------------------------------------------------------ + +No AIRs enabled or defined. Skipping restraint checks. +Clustering found 1 clusters from 25 docking solutions in 0.00 seconds. + +---- ---- ------- ------- ------- ------- ------- --- ----- +Clst Soln Models Etotal Eshape Eforce Eair Bmp RMS +---- ---- ------- ------- ------- ------- ------- --- ----- + 1 1 001:001 -70.6 -70.6 0.0 0.0 -1 -1.00 + 1 2 001:001 -70.4 -70.4 0.0 0.0 -1 -1.00 + 1 3 001:001 -70.4 -70.4 0.0 0.0 -1 -1.00 + 1 4 001:001 -70.4 -70.4 0.0 0.0 -1 -1.00 + 1 5 001:001 -70.4 -70.4 0.0 0.0 -1 -1.00 + 1 6 001:001 -70.3 -70.3 0.0 0.0 -1 -1.00 + 1 7 001:001 -70.2 -70.2 0.0 0.0 -1 -1.00 + 1 8 001:001 -70.2 -70.2 0.0 0.0 -1 -1.00 + 1 9 001:001 -70.1 -70.1 0.0 0.0 -1 -1.00 + 1 10 001:001 -70.1 -70.1 0.0 0.0 -1 -1.00 + 1 11 001:001 -70.1 -70.1 0.0 0.0 -1 -1.00 + 1 12 001:001 -70.1 -70.1 0.0 0.0 -1 -1.00 + 1 13 001:001 -70.0 -70.0 0.0 0.0 -1 -1.00 + 1 14 001:001 -70.0 -70.0 0.0 0.0 -1 -1.00 + 1 15 001:001 -70.0 -70.0 0.0 0.0 -1 -1.00 + 1 16 001:001 -70.0 -70.0 0.0 0.0 -1 -1.00 + 1 17 001:001 -70.0 -70.0 0.0 0.0 -1 -1.00 + 1 18 001:001 -70.0 -70.0 0.0 0.0 -1 -1.00 + 1 19 001:001 -70.0 -70.0 0.0 0.0 -1 -1.00 + 1 20 001:001 -70.0 -70.0 0.0 0.0 -1 -1.00 + 1 21 001:001 -69.9 -69.9 0.0 0.0 -1 -1.00 + 1 22 001:001 -69.9 -69.9 0.0 0.0 -1 -1.00 + 1 23 001:001 -69.9 -69.9 0.0 0.0 -1 -1.00 + 1 24 001:001 -69.9 -69.9 0.0 0.0 -1 -1.00 + 1 25 001:001 -69.9 -69.9 0.0 0.0 -1 -1.00 +------------------------------------------------------------ + 1 1 001:001 -70.6 -70.6 0.0 0.0 -1 -1.00 +>> save_range 1 100 tests/data/AT8G88888_complex_6325_Ethylene/ AT8G88888_complex_6325_Ethylene pdb +Saving orientation 1 (solution 1) to tests/data/AT8G88888_complex_6325_Ethylene/AT8G88888_complex_6325_Ethylene0001.pdb + +Max application memory used: 384.62 MB virtual + 0.00 KB shared. + +Hex stopping: Mon Feb 19 14:04:05 2024 diff --git a/tests/data/AT8G88888_complex_6325_Ethylene0001.pdb b/tests/data/AT8G88888_complex_6325_Ethylene0001.pdb new file mode 100644 index 0000000..54bc1e8 --- /dev/null +++ b/tests/data/AT8G88888_complex_6325_Ethylene0001.pdb @@ -0,0 +1,205 @@ +REMARK File generated by Hex 8.0.0 on Mon Feb 19 13:27:06 2024. +REMARK Source: tests/data/AF2_AT8G88888_complex.pdb +REMARK Docked receptor coordinates... +REMARK Solution 1, from model "AF2_AT8G88888_complex", ID: 0040000b03710053 +REMARK Energy -7.055624e+01, RMS -1.00 +REMARK Overlap Volume 0.0, Clash Volume 0.0 +REMARK Box_min: -10.654 -8.723 -9.638 +REMARK Box_max: 13.655 4.141 13.473 +REMARK Cube_min: -10.654 -14.446 -10.237 +REMARK Cube_max: 13.655 9.863 14.072 +REMARK Symmetry Type: Default +REMARK Symmetry Matrix: 0 +ATOM 1 N MET A 1 -7.410 -1.552 9.979 1.00 71.91 +ATOM 2 CA MET A 1 -6.641 -0.817 8.946 1.00 71.91 +ATOM 3 C MET A 1 -5.303 -1.485 8.564 1.00 71.91 +ATOM 4 CB MET A 1 -6.474 0.646 9.390 1.00 71.91 +ATOM 5 O MET A 1 -4.645 -0.990 7.666 1.00 71.91 +ATOM 6 CG MET A 1 -6.092 1.613 8.265 1.00 71.91 +ATOM 7 SD MET A 1 -6.114 3.332 8.818 1.00 71.91 +ATOM 8 CE MET A 1 -5.405 4.141 7.358 1.00 71.91 +ATOM 9 1H MET A 1 -8.294 -1.042 10.181 1.00 99.99 +ATOM 10 2H MET A 1 -7.633 -2.507 9.631 1.00 99.99 +ATOM 11 3H MET A 1 -6.843 -1.622 10.848 1.00 99.99 +ATOM 12 N PHE A 2 -4.916 -2.639 9.137 1.00 82.96 +ATOM 13 CA PHE A 2 -3.656 -3.336 8.797 1.00 82.96 +ATOM 14 C PHE A 2 -3.544 -3.808 7.338 1.00 82.96 +ATOM 15 CB PHE A 2 -3.477 -4.529 9.748 1.00 82.96 +ATOM 16 O PHE A 2 -2.492 -3.654 6.732 1.00 82.96 +ATOM 17 CG PHE A 2 -2.837 -4.145 11.064 1.00 82.96 +ATOM 18 CD1 PHE A 2 -1.434 -4.085 11.145 1.00 82.96 +ATOM 19 CD2 PHE A 2 -3.618 -3.837 12.195 1.00 82.96 +ATOM 20 CE1 PHE A 2 -0.812 -3.714 12.349 1.00 82.96 +ATOM 21 CE2 PHE A 2 -2.994 -3.457 13.397 1.00 82.96 +ATOM 22 CZ PHE A 2 -1.591 -3.397 13.473 1.00 82.96 +ATOM 23 H PHE A 2 -5.539 -3.014 9.824 1.00 99.99 +ATOM 24 N ARG A 3 -4.644 -4.300 6.744 1.00 88.23 +ATOM 25 CA ARG A 3 -4.655 -4.785 5.350 1.00 88.23 +ATOM 26 C ARG A 3 -4.257 -3.729 4.317 1.00 88.23 +ATOM 27 CB ARG A 3 -6.038 -5.349 4.983 1.00 88.23 +ATOM 28 O ARG A 3 -3.766 -4.087 3.258 1.00 88.23 +ATOM 29 CG ARG A 3 -6.239 -6.783 5.494 1.00 88.23 +ATOM 30 CD ARG A 3 -7.610 -7.350 5.094 1.00 88.23 +ATOM 31 NE ARG A 3 -7.758 -7.481 3.629 1.00 88.23 +ATOM 32 NH1 ARG A 3 -9.874 -8.376 3.605 1.00 88.23 +ATOM 33 NH2 ARG A 3 -8.815 -7.999 1.679 1.00 88.23 +ATOM 34 CZ ARG A 3 -8.811 -7.948 2.980 1.00 88.23 +ATOM 35 H ARG A 3 -5.475 -4.324 7.301 1.00 99.99 +ATOM 36 HE ARG A 3 -6.985 -7.188 3.071 1.00 99.99 +ATOM 37 1HH1 ARG A 3 -10.654 -8.723 3.085 1.00 99.99 +ATOM 38 2HH1 ARG A 3 -9.908 -8.357 4.604 1.00 99.99 +ATOM 39 1HH2 ARG A 3 -9.615 -8.353 1.195 1.00 99.99 +ATOM 40 2HH2 ARG A 3 -8.018 -7.684 1.164 1.00 99.99 +ATOM 41 N PHE A 4 -4.500 -2.449 4.602 1.00 95.39 +ATOM 42 CA PHE A 4 -4.141 -1.385 3.667 1.00 95.39 +ATOM 43 C PHE A 4 -2.630 -1.112 3.666 1.00 95.39 +ATOM 44 CB PHE A 4 -4.950 -0.122 3.981 1.00 95.39 +ATOM 45 O PHE A 4 -2.042 -0.961 2.601 1.00 95.39 +ATOM 46 CG PHE A 4 -4.637 1.004 3.018 1.00 95.39 +ATOM 47 CD1 PHE A 4 -3.744 2.026 3.392 1.00 95.39 +ATOM 48 CD2 PHE A 4 -5.173 0.983 1.716 1.00 95.39 +ATOM 49 CE1 PHE A 4 -3.393 3.025 2.468 1.00 95.39 +ATOM 50 CE2 PHE A 4 -4.820 1.983 0.794 1.00 95.39 +ATOM 51 CZ PHE A 4 -3.931 3.004 1.170 1.00 95.39 +ATOM 52 H PHE A 4 -4.937 -2.236 5.477 1.00 99.99 +ATOM 53 N LEU A 5 -1.999 -1.093 4.848 1.00 95.07 +ATOM 54 CA LEU A 5 -0.549 -0.898 4.972 1.00 95.07 +ATOM 55 C LEU A 5 0.230 -2.068 4.366 1.00 95.07 +ATOM 56 CB LEU A 5 -0.169 -0.721 6.454 1.00 95.07 +ATOM 57 O LEU A 5 1.200 -1.845 3.652 1.00 95.07 +ATOM 58 CG LEU A 5 -0.676 0.581 7.097 1.00 95.07 +ATOM 59 CD1 LEU A 5 -0.344 0.575 8.590 1.00 95.07 +ATOM 60 CD2 LEU A 5 -0.045 1.824 6.468 1.00 95.07 +ATOM 61 H LEU A 5 -2.570 -1.220 5.660 1.00 99.99 +ATOM 62 N ASP A 6 -0.243 -3.291 4.604 1.00 94.98 +ATOM 63 CA ASP A 6 0.310 -4.514 4.017 1.00 94.98 +ATOM 64 C ASP A 6 0.268 -4.491 2.476 1.00 94.98 +ATOM 65 CB ASP A 6 -0.502 -5.675 4.596 1.00 94.98 +ATOM 66 O ASP A 6 1.259 -4.793 1.812 1.00 94.98 +ATOM 67 CG ASP A 6 0.008 -7.032 4.128 1.00 94.98 +ATOM 68 OD1 ASP A 6 -0.791 -7.719 3.454 1.00 94.98 +ATOM 69 OD2 ASP A 6 1.140 -7.372 4.527 1.00 94.98 +ATOM 70 H ASP A 6 -1.025 -3.331 5.228 1.00 99.99 +ATOM 71 N TRP A 7 -0.843 -4.021 1.895 1.00 94.94 +ATOM 72 CA TRP A 7 -0.972 -3.855 0.445 1.00 94.94 +ATOM 73 C TRP A 7 0.004 -2.814 -0.129 1.00 94.94 +ATOM 74 CB TRP A 7 -2.424 -3.518 0.091 1.00 94.94 +ATOM 75 O TRP A 7 0.686 -3.117 -1.103 1.00 94.94 +ATOM 76 CG TRP A 7 -2.650 -3.240 -1.363 1.00 94.94 +ATOM 77 CD1 TRP A 7 -2.691 -4.169 -2.344 1.00 94.94 +ATOM 78 CD2 TRP A 7 -2.756 -1.947 -2.035 1.00 94.94 +ATOM 79 CE2 TRP A 7 -2.861 -2.176 -3.439 1.00 94.94 +ATOM 80 CE3 TRP A 7 -2.755 -0.604 -1.602 1.00 94.94 +ATOM 81 NE1 TRP A 7 -2.829 -3.547 -3.569 1.00 94.94 +ATOM 82 CH2 TRP A 7 -2.931 0.196 -3.906 1.00 94.94 +ATOM 83 CZ2 TRP A 7 -2.944 -1.130 -4.369 1.00 94.94 +ATOM 84 CZ3 TRP A 7 -2.849 0.455 -2.526 1.00 94.94 +ATOM 85 H TRP A 7 -1.598 -3.783 2.507 1.00 99.99 +ATOM 86 HE1 TRP A 7 -2.899 -3.980 -4.446 1.00 99.99 +ATOM 87 N ILE A 8 0.122 -1.619 0.473 1.00 95.58 +ATOM 88 CA ILE A 8 1.094 -0.599 0.022 1.00 95.58 +ATOM 89 C ILE A 8 2.526 -1.128 0.124 1.00 95.58 +ATOM 90 CB ILE A 8 0.955 0.719 0.824 1.00 95.58 +ATOM 91 O ILE A 8 3.316 -0.913 -0.792 1.00 95.58 +ATOM 92 CG1 ILE A 8 -0.332 1.498 0.479 1.00 95.58 +ATOM 93 CG2 ILE A 8 2.174 1.654 0.660 1.00 95.58 +ATOM 94 CD1 ILE A 8 -0.352 2.171 -0.903 1.00 95.58 +ATOM 95 H ILE A 8 -0.482 -1.442 1.251 1.00 99.99 +ATOM 96 N PHE A 9 2.857 -1.824 1.215 1.00 95.75 +ATOM 97 CA PHE A 9 4.181 -2.409 1.400 1.00 95.75 +ATOM 98 C PHE A 9 4.484 -3.426 0.296 1.00 95.75 +ATOM 99 CB PHE A 9 4.269 -3.029 2.799 1.00 95.75 +ATOM 100 O PHE A 9 5.521 -3.325 -0.350 1.00 95.75 +ATOM 101 CG PHE A 9 5.685 -3.372 3.208 1.00 95.75 +ATOM 102 CD1 PHE A 9 6.205 -4.658 2.976 1.00 95.75 +ATOM 103 CD2 PHE A 9 6.494 -2.385 3.801 1.00 95.75 +ATOM 104 CE1 PHE A 9 7.529 -4.956 3.343 1.00 95.75 +ATOM 105 CE2 PHE A 9 7.817 -2.684 4.169 1.00 95.75 +ATOM 106 CZ PHE A 9 8.334 -3.970 3.939 1.00 95.75 +ATOM 107 H PHE A 9 2.148 -1.930 1.913 1.00 99.99 +ATOM 108 N THR A 10 3.528 -4.316 0.010 1.00 95.85 +ATOM 109 CA THR A 10 3.622 -5.299 -1.079 1.00 95.85 +ATOM 110 C THR A 10 3.835 -4.624 -2.434 1.00 95.85 +ATOM 111 CB THR A 10 2.357 -6.170 -1.140 1.00 95.85 +ATOM 112 O THR A 10 4.733 -5.013 -3.175 1.00 95.85 +ATOM 113 CG2 THR A 10 2.445 -7.255 -2.212 1.00 95.85 +ATOM 114 OG1 THR A 10 2.146 -6.838 0.082 1.00 95.85 +ATOM 115 H THR A 10 2.717 -4.277 0.596 1.00 99.99 +ATOM 116 HG1 THR A 10 1.357 -7.353 -0.022 1.00 99.99 +ATOM 117 N VAL A 11 3.047 -3.592 -2.759 1.00 96.10 +ATOM 118 CA VAL A 11 3.177 -2.859 -4.029 1.00 96.10 +ATOM 119 C VAL A 11 4.548 -2.188 -4.140 1.00 96.10 +ATOM 120 CB VAL A 11 2.034 -1.841 -4.204 1.00 96.10 +ATOM 121 O VAL A 11 5.202 -2.317 -5.174 1.00 96.10 +ATOM 122 CG1 VAL A 11 2.239 -0.928 -5.422 1.00 96.10 +ATOM 123 CG2 VAL A 11 0.692 -2.558 -4.412 1.00 96.10 +ATOM 124 H VAL A 11 2.351 -3.334 -2.088 1.00 99.99 +ATOM 125 N ALA A 12 5.008 -1.520 -3.077 1.00 95.59 +ATOM 126 CA ALA A 12 6.301 -0.839 -3.056 1.00 95.59 +ATOM 127 C ALA A 12 7.470 -1.817 -3.255 1.00 95.59 +ATOM 128 CB ALA A 12 6.430 -0.061 -1.741 1.00 95.59 +ATOM 129 O ALA A 12 8.385 -1.514 -4.019 1.00 95.59 +ATOM 130 H ALA A 12 4.410 -1.508 -2.275 1.00 99.99 +ATOM 131 N THR A 13 7.419 -3.001 -2.635 1.00 94.11 +ATOM 132 CA THR A 13 8.445 -4.039 -2.818 1.00 94.11 +ATOM 133 C THR A 13 8.398 -4.653 -4.216 1.00 94.11 +ATOM 134 CB THR A 13 8.369 -5.131 -1.738 1.00 94.11 +ATOM 135 O THR A 13 9.435 -4.745 -4.854 1.00 94.11 +ATOM 136 CG2 THR A 13 8.715 -4.573 -0.355 1.00 94.11 +ATOM 137 OG1 THR A 13 7.079 -5.693 -1.633 1.00 94.11 +ATOM 138 H THR A 13 6.638 -3.155 -2.029 1.00 99.99 +ATOM 139 HG1 THR A 13 7.123 -6.350 -0.951 1.00 99.99 +ATOM 140 N THR A 14 7.213 -4.968 -4.760 1.00 93.90 +ATOM 141 CA THR A 14 7.092 -5.543 -6.121 1.00 93.90 +ATOM 142 C THR A 14 7.535 -4.612 -7.245 1.00 93.90 +ATOM 143 CB THR A 14 5.651 -5.960 -6.455 1.00 93.90 +ATOM 144 O THR A 14 7.745 -5.072 -8.357 1.00 93.90 +ATOM 145 CG2 THR A 14 5.207 -7.198 -5.681 1.00 93.90 +ATOM 146 OG1 THR A 14 4.714 -4.932 -6.199 1.00 93.90 +ATOM 147 H THR A 14 6.400 -4.797 -4.202 1.00 99.99 +ATOM 148 HG1 THR A 14 3.862 -5.276 -6.432 1.00 99.99 +ATOM 149 N SER A 15 7.585 -3.303 -6.989 1.00 91.22 +ATOM 150 CA SER A 15 8.091 -2.321 -7.953 1.00 91.22 +ATOM 151 C SER A 15 9.588 -2.045 -7.815 1.00 91.22 +ATOM 152 CB SER A 15 7.310 -1.014 -7.807 1.00 91.22 +ATOM 153 O SER A 15 10.166 -1.402 -8.689 1.00 91.22 +ATOM 154 OG SER A 15 7.463 -0.403 -6.533 1.00 91.22 +ATOM 155 H SER A 15 7.254 -3.014 -6.090 1.00 99.99 +ATOM 156 HG SER A 15 6.945 0.391 -6.547 1.00 99.99 +ATOM 157 N LEU A 16 10.169 -2.425 -6.673 1.00 85.38 +ATOM 158 CA LEU A 16 11.572 -2.198 -6.342 1.00 85.38 +ATOM 159 C LEU A 16 12.447 -3.386 -6.770 1.00 85.38 +ATOM 160 H LEU A 16 9.561 -2.894 -6.031 1.00 99.99 +TER 161 LEU A 16 +ATOM 162 CB LEU B 16 11.669 -1.927 -4.830 1.00 85.38 +ATOM 163 O LEU B 16 13.583 -3.156 -7.181 1.00 85.38 +ATOM 164 CG LEU B 16 13.080 -1.545 -4.347 1.00 85.38 +ATOM 165 CD1 LEU B 16 13.490 -0.148 -4.827 1.00 85.38 +ATOM 166 CD2 LEU B 16 13.114 -1.545 -2.817 1.00 85.38 +ATOM 167 N ASP B 17 11.914 -4.607 -6.661 1.00 73.64 +ATOM 168 CA ASP B 17 12.422 -5.816 -7.331 1.00 73.64 +ATOM 169 C ASP B 17 12.082 -5.810 -8.833 1.00 73.64 +ATOM 170 CB ASP B 17 11.841 -7.073 -6.638 1.00 73.64 +ATOM 171 O ASP B 17 12.961 -6.201 -9.638 1.00 73.64 +ATOM 172 CG ASP B 17 12.463 -7.412 -5.269 1.00 73.64 +ATOM 173 OD1 ASP B 17 13.655 -7.806 -5.237 1.00 73.64 +ATOM 174 OD2 ASP B 17 11.729 -7.366 -4.249 1.00 73.64 +ATOM 175 OXT ASP B 17 10.937 -5.420 -9.159 1.00 73.64 +TER 176 ASP B 17 +REMARK Source: tests/data/6325_Ethylene.sdf +REMARK Docked ligand coordinates... +REMARK Solution 1, from model "6325_Ethylene", ID: 0040000b03710053 +REMARK Energy -7.055624e+01, RMS -1.00 +REMARK Overlap Volume 0.0, Clash Volume 0.0 +REMARK Box_min: -10.654 -8.723 -9.638 +REMARK Box_max: 13.655 4.141 13.473 +REMARK Cube_min: -10.654 -14.446 -10.237 +REMARK Cube_max: 13.655 9.863 14.072 +REMARK Symmetry Type: Default +REMARK Symmetry Matrix: 0 +ATOM 177 C SDF A 1 -0.275 -0.845 -2.305 1.00 99.99 +ATOM 178 C SDF A 1 0.981 -0.395 -2.301 1.00 99.99 +ATOM 179 H SDF A 1 -1.106 -0.166 -2.458 1.00 99.99 +ATOM 180 H SDF A 1 -0.487 -1.898 -2.156 1.00 99.99 +ATOM 181 H SDF A 1 1.813 -1.073 -2.148 1.00 99.99 +ATOM 182 H SDF A 1 1.193 0.658 -2.449 1.00 99.99 diff --git a/tests/data/AT9G99999_monomer_6325_Ethylene/AT9G99999_monomer_6325_Ethylene0001.pdb b/tests/data/AT9G99999_monomer_6325_Ethylene/AT9G99999_monomer_6325_Ethylene0001.pdb new file mode 100644 index 0000000..b4b564a --- /dev/null +++ b/tests/data/AT9G99999_monomer_6325_Ethylene/AT9G99999_monomer_6325_Ethylene0001.pdb @@ -0,0 +1,205 @@ +REMARK File generated by Hex 8.0.0 on Mon Feb 19 14:02:49 2024. +REMARK Source: tests/data/AF2_AT9G99999_monomer.pdb +REMARK Docked receptor coordinates... +REMARK Solution 1, from model "AF2_AT9G99999_monomer", ID: 004a000c059c0018 +REMARK Energy -7.034417e+01, RMS -1.00 +REMARK Overlap Volume 0.0, Clash Volume 0.0 +REMARK Box_min: -10.654 -8.723 -9.638 +REMARK Box_max: 13.655 4.141 13.473 +REMARK Cube_min: -10.654 -14.446 -10.237 +REMARK Cube_max: 13.655 9.863 14.072 +REMARK Symmetry Type: Default +REMARK Symmetry Matrix: 0 +ATOM 1 N MET A 1 -7.410 -1.552 9.979 1.00 71.91 +ATOM 2 CA MET A 1 -6.641 -0.817 8.946 1.00 71.91 +ATOM 3 C MET A 1 -5.303 -1.485 8.564 1.00 71.91 +ATOM 4 CB MET A 1 -6.474 0.646 9.390 1.00 71.91 +ATOM 5 O MET A 1 -4.645 -0.990 7.666 1.00 71.91 +ATOM 6 CG MET A 1 -6.092 1.613 8.265 1.00 71.91 +ATOM 7 SD MET A 1 -6.114 3.332 8.818 1.00 71.91 +ATOM 8 CE MET A 1 -5.405 4.141 7.358 1.00 71.91 +ATOM 9 1H MET A 1 -8.294 -1.042 10.181 1.00 99.99 +ATOM 10 2H MET A 1 -7.633 -2.507 9.631 1.00 99.99 +ATOM 11 3H MET A 1 -6.843 -1.622 10.848 1.00 99.99 +ATOM 12 N PHE A 2 -4.916 -2.639 9.137 1.00 82.96 +ATOM 13 CA PHE A 2 -3.656 -3.336 8.797 1.00 82.96 +ATOM 14 C PHE A 2 -3.544 -3.808 7.338 1.00 82.96 +ATOM 15 CB PHE A 2 -3.477 -4.529 9.748 1.00 82.96 +ATOM 16 O PHE A 2 -2.492 -3.654 6.732 1.00 82.96 +ATOM 17 CG PHE A 2 -2.837 -4.145 11.064 1.00 82.96 +ATOM 18 CD1 PHE A 2 -1.434 -4.085 11.145 1.00 82.96 +ATOM 19 CD2 PHE A 2 -3.618 -3.837 12.195 1.00 82.96 +ATOM 20 CE1 PHE A 2 -0.812 -3.714 12.349 1.00 82.96 +ATOM 21 CE2 PHE A 2 -2.994 -3.457 13.397 1.00 82.96 +ATOM 22 CZ PHE A 2 -1.591 -3.397 13.473 1.00 82.96 +ATOM 23 H PHE A 2 -5.539 -3.014 9.824 1.00 99.99 +ATOM 24 N ARG A 3 -4.644 -4.300 6.744 1.00 88.23 +ATOM 25 CA ARG A 3 -4.655 -4.785 5.350 1.00 88.23 +ATOM 26 C ARG A 3 -4.257 -3.729 4.317 1.00 88.23 +ATOM 27 CB ARG A 3 -6.038 -5.349 4.983 1.00 88.23 +ATOM 28 O ARG A 3 -3.766 -4.087 3.258 1.00 88.23 +ATOM 29 CG ARG A 3 -6.239 -6.783 5.494 1.00 88.23 +ATOM 30 CD ARG A 3 -7.610 -7.350 5.094 1.00 88.23 +ATOM 31 NE ARG A 3 -7.758 -7.481 3.629 1.00 88.23 +ATOM 32 NH1 ARG A 3 -9.874 -8.376 3.605 1.00 88.23 +ATOM 33 NH2 ARG A 3 -8.815 -7.999 1.679 1.00 88.23 +ATOM 34 CZ ARG A 3 -8.811 -7.948 2.980 1.00 88.23 +ATOM 35 H ARG A 3 -5.475 -4.324 7.301 1.00 99.99 +ATOM 36 HE ARG A 3 -6.985 -7.188 3.071 1.00 99.99 +ATOM 37 1HH1 ARG A 3 -10.654 -8.723 3.085 1.00 99.99 +ATOM 38 2HH1 ARG A 3 -9.908 -8.357 4.604 1.00 99.99 +ATOM 39 1HH2 ARG A 3 -9.615 -8.353 1.195 1.00 99.99 +ATOM 40 2HH2 ARG A 3 -8.018 -7.684 1.164 1.00 99.99 +ATOM 41 N PHE A 4 -4.500 -2.449 4.602 1.00 95.39 +ATOM 42 CA PHE A 4 -4.141 -1.385 3.667 1.00 95.39 +ATOM 43 C PHE A 4 -2.630 -1.112 3.666 1.00 95.39 +ATOM 44 CB PHE A 4 -4.950 -0.122 3.981 1.00 95.39 +ATOM 45 O PHE A 4 -2.042 -0.961 2.601 1.00 95.39 +ATOM 46 CG PHE A 4 -4.637 1.004 3.018 1.00 95.39 +ATOM 47 CD1 PHE A 4 -3.744 2.026 3.392 1.00 95.39 +ATOM 48 CD2 PHE A 4 -5.173 0.983 1.716 1.00 95.39 +ATOM 49 CE1 PHE A 4 -3.393 3.025 2.468 1.00 95.39 +ATOM 50 CE2 PHE A 4 -4.820 1.983 0.794 1.00 95.39 +ATOM 51 CZ PHE A 4 -3.931 3.004 1.170 1.00 95.39 +ATOM 52 H PHE A 4 -4.937 -2.236 5.477 1.00 99.99 +ATOM 53 N LEU A 5 -1.999 -1.093 4.848 1.00 95.07 +ATOM 54 CA LEU A 5 -0.549 -0.898 4.972 1.00 95.07 +ATOM 55 C LEU A 5 0.230 -2.068 4.366 1.00 95.07 +ATOM 56 CB LEU A 5 -0.169 -0.721 6.454 1.00 95.07 +ATOM 57 O LEU A 5 1.200 -1.845 3.652 1.00 95.07 +ATOM 58 CG LEU A 5 -0.676 0.581 7.097 1.00 95.07 +ATOM 59 CD1 LEU A 5 -0.344 0.575 8.590 1.00 95.07 +ATOM 60 CD2 LEU A 5 -0.045 1.824 6.468 1.00 95.07 +ATOM 61 H LEU A 5 -2.570 -1.220 5.660 1.00 99.99 +ATOM 62 N ASP A 6 -0.243 -3.291 4.604 1.00 94.98 +ATOM 63 CA ASP A 6 0.310 -4.514 4.017 1.00 94.98 +ATOM 64 C ASP A 6 0.268 -4.491 2.476 1.00 94.98 +ATOM 65 CB ASP A 6 -0.502 -5.675 4.596 1.00 94.98 +ATOM 66 O ASP A 6 1.259 -4.793 1.812 1.00 94.98 +ATOM 67 CG ASP A 6 0.008 -7.032 4.128 1.00 94.98 +ATOM 68 OD1 ASP A 6 -0.791 -7.719 3.454 1.00 94.98 +ATOM 69 OD2 ASP A 6 1.140 -7.372 4.527 1.00 94.98 +ATOM 70 H ASP A 6 -1.025 -3.331 5.228 1.00 99.99 +ATOM 71 N TRP A 7 -0.843 -4.021 1.895 1.00 94.94 +ATOM 72 CA TRP A 7 -0.972 -3.855 0.445 1.00 94.94 +ATOM 73 C TRP A 7 0.004 -2.814 -0.129 1.00 94.94 +ATOM 74 CB TRP A 7 -2.424 -3.518 0.091 1.00 94.94 +ATOM 75 O TRP A 7 0.686 -3.117 -1.103 1.00 94.94 +ATOM 76 CG TRP A 7 -2.650 -3.240 -1.363 1.00 94.94 +ATOM 77 CD1 TRP A 7 -2.691 -4.169 -2.344 1.00 94.94 +ATOM 78 CD2 TRP A 7 -2.756 -1.947 -2.035 1.00 94.94 +ATOM 79 CE2 TRP A 7 -2.861 -2.176 -3.439 1.00 94.94 +ATOM 80 CE3 TRP A 7 -2.755 -0.604 -1.602 1.00 94.94 +ATOM 81 NE1 TRP A 7 -2.829 -3.547 -3.569 1.00 94.94 +ATOM 82 CH2 TRP A 7 -2.931 0.196 -3.906 1.00 94.94 +ATOM 83 CZ2 TRP A 7 -2.944 -1.130 -4.369 1.00 94.94 +ATOM 84 CZ3 TRP A 7 -2.849 0.455 -2.526 1.00 94.94 +ATOM 85 H TRP A 7 -1.598 -3.783 2.507 1.00 99.99 +ATOM 86 HE1 TRP A 7 -2.899 -3.980 -4.446 1.00 99.99 +ATOM 87 N ILE A 8 0.122 -1.619 0.473 1.00 95.58 +ATOM 88 CA ILE A 8 1.094 -0.599 0.022 1.00 95.58 +ATOM 89 C ILE A 8 2.526 -1.128 0.124 1.00 95.58 +ATOM 90 CB ILE A 8 0.955 0.719 0.824 1.00 95.58 +ATOM 91 O ILE A 8 3.316 -0.913 -0.792 1.00 95.58 +ATOM 92 CG1 ILE A 8 -0.332 1.498 0.479 1.00 95.58 +ATOM 93 CG2 ILE A 8 2.174 1.654 0.660 1.00 95.58 +ATOM 94 CD1 ILE A 8 -0.352 2.171 -0.903 1.00 95.58 +ATOM 95 H ILE A 8 -0.482 -1.442 1.251 1.00 99.99 +ATOM 96 N PHE A 9 2.857 -1.824 1.215 1.00 95.75 +ATOM 97 CA PHE A 9 4.181 -2.409 1.400 1.00 95.75 +ATOM 98 C PHE A 9 4.484 -3.426 0.296 1.00 95.75 +ATOM 99 CB PHE A 9 4.269 -3.029 2.799 1.00 95.75 +ATOM 100 O PHE A 9 5.521 -3.325 -0.350 1.00 95.75 +ATOM 101 CG PHE A 9 5.685 -3.372 3.208 1.00 95.75 +ATOM 102 CD1 PHE A 9 6.205 -4.658 2.976 1.00 95.75 +ATOM 103 CD2 PHE A 9 6.494 -2.385 3.801 1.00 95.75 +ATOM 104 CE1 PHE A 9 7.529 -4.956 3.343 1.00 95.75 +ATOM 105 CE2 PHE A 9 7.817 -2.684 4.169 1.00 95.75 +ATOM 106 CZ PHE A 9 8.334 -3.970 3.939 1.00 95.75 +ATOM 107 H PHE A 9 2.148 -1.930 1.913 1.00 99.99 +ATOM 108 N THR A 10 3.528 -4.316 0.010 1.00 95.85 +ATOM 109 CA THR A 10 3.622 -5.299 -1.079 1.00 95.85 +ATOM 110 C THR A 10 3.835 -4.624 -2.434 1.00 95.85 +ATOM 111 CB THR A 10 2.357 -6.170 -1.140 1.00 95.85 +ATOM 112 O THR A 10 4.733 -5.013 -3.175 1.00 95.85 +ATOM 113 CG2 THR A 10 2.445 -7.255 -2.212 1.00 95.85 +ATOM 114 OG1 THR A 10 2.146 -6.838 0.082 1.00 95.85 +ATOM 115 H THR A 10 2.717 -4.277 0.596 1.00 99.99 +ATOM 116 HG1 THR A 10 1.357 -7.353 -0.022 1.00 99.99 +ATOM 117 N VAL A 11 3.047 -3.592 -2.759 1.00 96.10 +ATOM 118 CA VAL A 11 3.177 -2.859 -4.029 1.00 96.10 +ATOM 119 C VAL A 11 4.548 -2.188 -4.140 1.00 96.10 +ATOM 120 CB VAL A 11 2.034 -1.841 -4.204 1.00 96.10 +ATOM 121 O VAL A 11 5.202 -2.317 -5.174 1.00 96.10 +ATOM 122 CG1 VAL A 11 2.239 -0.928 -5.422 1.00 96.10 +ATOM 123 CG2 VAL A 11 0.692 -2.558 -4.412 1.00 96.10 +ATOM 124 H VAL A 11 2.351 -3.334 -2.088 1.00 99.99 +ATOM 125 N ALA A 12 5.008 -1.520 -3.077 1.00 95.59 +ATOM 126 CA ALA A 12 6.301 -0.839 -3.056 1.00 95.59 +ATOM 127 C ALA A 12 7.470 -1.817 -3.255 1.00 95.59 +ATOM 128 CB ALA A 12 6.430 -0.061 -1.741 1.00 95.59 +ATOM 129 O ALA A 12 8.385 -1.514 -4.019 1.00 95.59 +ATOM 130 H ALA A 12 4.410 -1.508 -2.275 1.00 99.99 +ATOM 131 N THR A 13 7.419 -3.001 -2.635 1.00 94.11 +ATOM 132 CA THR A 13 8.445 -4.039 -2.818 1.00 94.11 +ATOM 133 C THR A 13 8.398 -4.653 -4.216 1.00 94.11 +ATOM 134 CB THR A 13 8.369 -5.131 -1.738 1.00 94.11 +ATOM 135 O THR A 13 9.435 -4.745 -4.854 1.00 94.11 +ATOM 136 CG2 THR A 13 8.715 -4.573 -0.355 1.00 94.11 +ATOM 137 OG1 THR A 13 7.079 -5.693 -1.633 1.00 94.11 +ATOM 138 H THR A 13 6.638 -3.155 -2.029 1.00 99.99 +ATOM 139 HG1 THR A 13 7.123 -6.350 -0.951 1.00 99.99 +ATOM 140 N THR A 14 7.213 -4.968 -4.760 1.00 93.90 +ATOM 141 CA THR A 14 7.092 -5.543 -6.121 1.00 93.90 +ATOM 142 C THR A 14 7.535 -4.612 -7.245 1.00 93.90 +ATOM 143 CB THR A 14 5.651 -5.960 -6.455 1.00 93.90 +ATOM 144 O THR A 14 7.745 -5.072 -8.357 1.00 93.90 +ATOM 145 CG2 THR A 14 5.207 -7.198 -5.681 1.00 93.90 +ATOM 146 OG1 THR A 14 4.714 -4.932 -6.199 1.00 93.90 +ATOM 147 H THR A 14 6.400 -4.797 -4.202 1.00 99.99 +ATOM 148 HG1 THR A 14 3.862 -5.276 -6.432 1.00 99.99 +ATOM 149 N SER A 15 7.585 -3.303 -6.989 1.00 91.22 +ATOM 150 CA SER A 15 8.091 -2.321 -7.953 1.00 91.22 +ATOM 151 C SER A 15 9.588 -2.045 -7.815 1.00 91.22 +ATOM 152 CB SER A 15 7.310 -1.014 -7.807 1.00 91.22 +ATOM 153 O SER A 15 10.166 -1.402 -8.689 1.00 91.22 +ATOM 154 OG SER A 15 7.463 -0.403 -6.533 1.00 91.22 +ATOM 155 H SER A 15 7.254 -3.014 -6.090 1.00 99.99 +ATOM 156 HG SER A 15 6.945 0.391 -6.547 1.00 99.99 +ATOM 157 N LEU A 16 10.169 -2.425 -6.673 1.00 85.38 +ATOM 158 CA LEU A 16 11.572 -2.198 -6.342 1.00 85.38 +ATOM 159 C LEU A 16 12.447 -3.386 -6.770 1.00 85.38 +ATOM 160 CB LEU A 16 11.669 -1.927 -4.830 1.00 85.38 +ATOM 161 O LEU A 16 13.583 -3.156 -7.181 1.00 85.38 +ATOM 162 CG LEU A 16 13.080 -1.545 -4.347 1.00 85.38 +ATOM 163 CD1 LEU A 16 13.490 -0.148 -4.827 1.00 85.38 +ATOM 164 CD2 LEU A 16 13.114 -1.545 -2.817 1.00 85.38 +ATOM 165 H LEU A 16 9.561 -2.894 -6.031 1.00 99.99 +ATOM 166 N ASP A 17 11.914 -4.607 -6.661 1.00 73.64 +ATOM 167 CA ASP A 17 12.422 -5.816 -7.331 1.00 73.64 +ATOM 168 C ASP A 17 12.082 -5.810 -8.833 1.00 73.64 +ATOM 169 CB ASP A 17 11.841 -7.073 -6.638 1.00 73.64 +ATOM 170 O ASP A 17 12.961 -6.201 -9.638 1.00 73.64 +ATOM 171 CG ASP A 17 12.463 -7.412 -5.269 1.00 73.64 +ATOM 172 OD1 ASP A 17 13.655 -7.806 -5.237 1.00 73.64 +ATOM 173 OD2 ASP A 17 11.729 -7.366 -4.249 1.00 73.64 +ATOM 174 OXT ASP A 17 10.937 -5.420 -9.159 1.00 73.64 +ATOM 175 H ASP A 17 11.110 -4.650 -6.067 1.00 99.99 +TER 176 ASP A 17 +REMARK Source: tests/data/6325_Ethylene.sdf +REMARK Docked ligand coordinates... +REMARK Solution 1, from model "6325_Ethylene", ID: 004a000c059c0018 +REMARK Energy -7.034417e+01, RMS -1.00 +REMARK Overlap Volume 0.0, Clash Volume 0.0 +REMARK Box_min: -10.654 -8.723 -9.638 +REMARK Box_max: 13.655 4.141 13.473 +REMARK Cube_min: -10.654 -14.446 -10.237 +REMARK Cube_max: 13.655 9.863 14.072 +REMARK Symmetry Type: Default +REMARK Symmetry Matrix: 0 +ATOM 177 C SDF A 1 0.249 0.367 -3.298 1.00 99.99 +ATOM 178 C SDF A 1 -0.044 -0.621 -2.450 1.00 99.99 +ATOM 179 H SDF A 1 -0.193 0.398 -4.287 1.00 99.99 +ATOM 180 H SDF A 1 0.935 1.156 -3.013 1.00 99.99 +ATOM 181 H SDF A 1 0.399 -0.652 -1.461 1.00 99.99 +ATOM 182 H SDF A 1 -0.730 -1.410 -2.735 1.00 99.99 diff --git a/tests/data/AT9G99999_monomer_6325_Ethylene/hex_output.txt b/tests/data/AT9G99999_monomer_6325_Ethylene/hex_output.txt new file mode 100644 index 0000000..b48ca48 --- /dev/null +++ b/tests/data/AT9G99999_monomer_6325_Ethylene/hex_output.txt @@ -0,0 +1,218 @@ +Hex 8.0.0 starting at Mon Feb 19 14:02:25 2024 on host bar. + +Hostname: bar +Main memory: 128813 Mb +CPUs online: 32 +GPUs online: 0 +Compute threads: 24 + +Sizeof(short): 2 +Sizeof(int): 4 +Sizeof(long): 8 +Sizeof(long int):8 +Sizeof(float): 4 +Sizeof(double): 8 +Sizeof(octa): 8 +Sizeof(int *): 8 +Sizeof(long *): 8 +Sizeof(void *): 8 + +Default path: /home/dnguyen/BAR_API_forked/BAR_API +PATH: /home/dnguyen/hex/exe:/home/dnguyen/hex/bin:/home/dnguyen/BAR_API_forked/BAR_API/venv/bin:/home/dnguyen/.vscode-server/bin/8b3775030ed1a69b13e4f4c628c612102e30a681/bin/remote-cli:/usr/local/bin:/usr/bin:/bin:/usr/local/games:/usr/games:/ho +Hex program: /home/dnguyen/hex/exe/hex8.0.0-nogui.x64 +HEX_ROOT: /home/dnguyen/hex +HEX_STRIDE: stride.x64 +HEX_PDB: NULL +HEX_DATA: NULL +HEX_MACROS: NULL +HEX_COLOURS: NULL +HEX_STARTUP: NULL +HEX_FONTFILE: NULL +HEX_PIPE: NULL +HEX_CACHE: /home/dnguyen/hex_cache +HEX_LOG: NULL +HEX_CPUS: NULL +HEX_GPUS: NULL +HEX_FIRST_GPU: NULL +HEX_GTO_SCALE: NULL +HEX_ETO_SCALE: NULL + +CUDA enabled: No + + + +Running HEX_STARTUP file: /home/dnguyen/hex/data/startup_v5.mac +Disc Cache enabled. Using directory: /home/dnguyen/hex_cache + + +Reading commands from stdin ... +>> open_receptor tests/data/AF2_AT9G99999_monomer.pdb +Assuming tests/data/AF2_AT9G99999_monomer.pdb is a PDB file... + +Opened PDB file: tests/data/AF2_AT9G99999_monomer.pdb, ID = AF2_AT9G99999_monomer +Loaded PDB file: tests/data/AF2_AT9G99999_monomer.pdb, (18 residues, 176 atoms, 1 models) +Counted 1 +ve and 2 -ve formal charged residues: Net formal charge: -1 +>AF2_AT9G99999_monomer A +MFRFLDWIFTVATTSLD +>> open_ligand tests/data/6325_Ethylene.sdf +Assuming tests/data/6325_Ethylene.sdf is an SDF file... + +Opened SDF file: tests/data/6325_Ethylene.sdf, ID = 6325_Ethylene +>> docking_correlation 1 +Docking with shape+electrostatics. +>> docking_score_threshold 0 +>> max_docking_solutions 25 +>> docking_receptor_stepsize 5.50 +Receptor step size: 5.50 deg. B=32, M=64, T=1692. +>> docking_ligand_stepsize 5.50 +Ligand step size: 5.50 deg. B=32, M=64, T=1692. +>> docking_alpha_stepsize 2.80 +Twist step size: 2.80 deg. B=64, M=128. +>> docking_main_scan 16 +>> receptor_origin C-825:VAL-O +*Warning* No Match: C-825:VAL-O -> C-825:VAL-O (Chain-ResidueID:ResidueName-AtomName) +>> commit_edits +>> activate_docking + +Contouring surface for molecule AF2_AT9G99999_monomer. +Polar probe = 1.40A, Apolar probe = 1.40A +Gaussian sampling over 146 atoms done in 0.03 seconds. +Contoured 37544 triangles (18774 vertices) in 0.03 seconds. +Surface traversal done in 0.01 seconds - Found 1 surface segments. +Primary surface: Area = 1893.22, Volume = 5111.06. +Culled 0 small segments in 0.01 seconds. +Total contouring time: 0.05 seconds. + + +Contouring surface for molecule 6325_Ethylene. +Polar probe = 1.40A, Apolar probe = 1.40A +Gaussian sampling over 2 atoms done in 0.00 seconds. +Contoured 3244 triangles (1624 vertices) in 0.00 seconds. +Surface traversal done in 0.00 seconds - Found 1 surface segments. +Primary surface: Area = 164.05, Volume = 195.88. +Culled 0 small segments in 0.00 seconds. +Total contouring time: 0.00 seconds. + +Sampling surface and interior volumes for molecule AF2_AT9G99999_monomer. +Generated 14696 exterior and 5220 interior skin grid cells. +Exterior skin volume = 3174.34; interior skin volume = 1127.52. +Volume sampling done in 0.04 seconds. +Sampling surface and interior volumes for molecule 6325_Ethylene. +Generated 966 exterior and 123 interior skin grid cells. +Exterior skin volume = 208.66; interior skin volume = 26.57. +Volume sampling done in 0.00 seconds. + +Calculating potential to N = 25 (5525 coefficients) using 24 Tasks ... +Grid: 74x74x74 = 405224 cells (20309 non-zero) of 0.60 Angstroms. +Done integration over 20309 cells in 0.06s (321208/s). + +Calculating electrostatics for molecule AF2_AT9G99999_monomer. +Charge density for molecule AF2_AT9G99999_monomer to N = 25: 173 atoms done in 0.00 seconds. +Potential for molecule AF2_AT9G99999_monomer to N = 25 done in 0.02 seconds. +Calculating electrostatics for molecule 6325_Ethylene. +Charge density for molecule 6325_Ethylene to N = 25: 0 atoms done in 0.00 seconds. +Potential for molecule 6325_Ethylene to N = 25 done in 0.01 seconds. + +------------------------------------------------------------------------------ +Docking will output a maximum of 25 solutions per pair... + +------------------------------------------------------------------------------ +Docking 1 pair of starting orientations... + +Docking receptor: AF2_AT9G99999_monomer and ligand: 6325_Ethylene... + +Receptor AF2_AT9G99999_monomer: Tag = AF2_AT9G99999_monomer +Ligand 6325_Ethylene: Tag = 6325_Ethylene + +Setting up shape + electrostatics correlation. + +Starting SPF search. +Setting docking_score threshold = 0.0 +Setting 30 distance samples from 0.00 to 23.20, with steps of 0.80. + + +Total 6D space: Iterate[30,1692,1] x FFT[128,32,64] = 13306429440. +Initial rotational increments (N=16) Receptor: 1692 (39Mb), Ligand: 1692 (39Mb) +Applying 1692+1692 coefficient rotations on 24 CPUs for N=16. +Done 3384 rotations in a total of 0.08s (44932/s). + +Starting 3D FFT search using 24 CPUs and 0 GPUs with N=16, Nalpha=128/128. +Estart = 68.63. +Done 13306429440 orientations in 23.31s (570939787/s). +Found 51678702/13306429440 within score threshold = 0.0 NOT including start guess. + +Time spent culling 42*1600000 solutions = 7.90s. +Starting guess not found in top 1278702 solutions. +Emin = -68.28, Emax = -0.00 + +Re-sampling top 40000 orientations -> top 28433 retained. +Surviving rotational steps (N=25) Receptor: 48 (9Mb), Ligand: 1441 (243Mb) +Applying 48+1441 coefficient rotations on 24 CPUs for N=25. +Done 1489 rotations in a total of 0.11s (12949/s). + +Starting 1D FFT refinement using 24 CPUs and 0 GPUs with N=25, Nalpha=128/128. +Estart = 59.61. +Done 3639424 orientations in 0.24s (15429264/s). +Found 70787/3639424 within score threshold = 0.0 NOT including start guess. + + +Solution buffer reached 70787/200000 = 35.4% occupancy with no culling. +Starting guess not found in top 70787 solutions. +Emin = -70.34, Emax = -6.16 + +Docking correlation summary by RMS deviation and steric clashes +------------------------------------------------------------------------- + Soln Etotal Eshape Eforce Eair RMS Bumps + ---- --------- --------- --------- --------- ---------------- ----- + + +Docked structures AF2_AT9G99999_monomer:6325_Ethylene in a total of 0 min, 24 sec. + + +------------------------------------------------------------------------------ +Saving top 25 orientations. + +Docking done in a total of 0 min, 24 sec. + + +------------------------------------------------------------------------------ + +No AIRs enabled or defined. Skipping restraint checks. +Clustering found 1 clusters from 25 docking solutions in 0.00 seconds. + +---- ---- ------- ------- ------- ------- ------- --- ----- +Clst Soln Models Etotal Eshape Eforce Eair Bmp RMS +---- ---- ------- ------- ------- ------- ------- --- ----- + 1 1 001:001 -70.3 -70.3 0.0 0.0 -1 -1.00 + 1 2 001:001 -70.2 -70.2 0.0 0.0 -1 -1.00 + 1 3 001:001 -70.2 -70.2 0.0 0.0 -1 -1.00 + 1 4 001:001 -70.1 -70.1 0.0 0.0 -1 -1.00 + 1 5 001:001 -70.0 -70.0 0.0 0.0 -1 -1.00 + 1 6 001:001 -70.0 -70.0 0.0 0.0 -1 -1.00 + 1 7 001:001 -70.0 -70.0 0.0 0.0 -1 -1.00 + 1 8 001:001 -69.8 -69.8 0.0 0.0 -1 -1.00 + 1 9 001:001 -69.8 -69.8 0.0 0.0 -1 -1.00 + 1 10 001:001 -69.8 -69.8 0.0 0.0 -1 -1.00 + 1 11 001:001 -69.8 -69.8 0.0 0.0 -1 -1.00 + 1 12 001:001 -69.8 -69.8 0.0 0.0 -1 -1.00 + 1 13 001:001 -69.7 -69.7 0.0 0.0 -1 -1.00 + 1 14 001:001 -69.7 -69.7 0.0 0.0 -1 -1.00 + 1 15 001:001 -69.6 -69.6 0.0 0.0 -1 -1.00 + 1 16 001:001 -69.6 -69.6 0.0 0.0 -1 -1.00 + 1 17 001:001 -69.6 -69.6 0.0 0.0 -1 -1.00 + 1 18 001:001 -69.6 -69.6 0.0 0.0 -1 -1.00 + 1 19 001:001 -69.5 -69.5 0.0 0.0 -1 -1.00 + 1 20 001:001 -69.5 -69.5 0.0 0.0 -1 -1.00 + 1 21 001:001 -69.5 -69.5 0.0 0.0 -1 -1.00 + 1 22 001:001 -69.5 -69.5 0.0 0.0 -1 -1.00 + 1 23 001:001 -69.5 -69.5 0.0 0.0 -1 -1.00 + 1 24 001:001 -69.5 -69.5 0.0 0.0 -1 -1.00 + 1 25 001:001 -69.5 -69.5 0.0 0.0 -1 -1.00 +------------------------------------------------------------ + 1 1 001:001 -70.3 -70.3 0.0 0.0 -1 -1.00 +>> save_range 1 100 tests/data/AT9G99999_monomer_6325_Ethylene/ AT9G99999_monomer_6325_Ethylene pdb +Saving orientation 1 (solution 1) to tests/data/AT9G99999_monomer_6325_Ethylene/AT9G99999_monomer_6325_Ethylene0001.pdb + +Max application memory used: 384.17 MB virtual + 0.00 KB shared. + +Hex stopping: Mon Feb 19 14:02:49 2024 diff --git a/tests/resources/test_docking_utils.py b/tests/resources/test_docking_utils.py new file mode 100644 index 0000000..a225f8f --- /dev/null +++ b/tests/resources/test_docking_utils.py @@ -0,0 +1,131 @@ +import unittest +from api.utils.docking_utils import Receptor, ComplexReceptor, MonomerReceptor +from api.utils.docking_utils import Ligand +from api.utils.docking_utils import Docker +from api.utils.docking_utils import MonomerDocking, ComplexDocking + + +class TestReceptorClasses(unittest.TestCase): + + def test_monomer_receptor_init(self): + """Test that MonomerReceptor object is correctly instantiated.""" + + monomer_receptor = MonomerReceptor("AT9G99999", "/tests/data/AF2_AT9G99999_monomer.pdb") + self.assertEqual(monomer_receptor.name, "AT9G99999") + self.assertEqual(monomer_receptor.file_path, "/tests/data/AF2_AT9G99999_monomer.pdb") + + def test_complex_receptor_init(self): + """Test that ComplexReceptor object is correctly instantiated. This + function also tests that monomers are separated correctly using + separate_monomers when the object is instantiated. + """ + + monomers_list = ["A", "B"] + complex_receptor = ComplexReceptor("test_complex_receptor", + "tests/data/AF2_AT8G88888_complex.pdb", + monomers_list) + self.assertEqual(complex_receptor.name, "test_complex_receptor") + self.assertEqual(complex_receptor.file_path, "tests/data/AF2_AT8G88888_complex.pdb") + self.assertEqual(complex_receptor.monomers_list, monomers_list) + self.assertEqual(len(complex_receptor.line_numbers), len(monomers_list)) + self.assertEqual(complex_receptor.line_numbers, [[48, 180], [181, 195]]) + + +class TestLigandClass(unittest.TestCase): + + def test_ligand_init(self): + """Test that Ligand object is correctly instantiated.""" + + ligand = Ligand("test_ligand", "tests/data/6325_Ethylene.sdf") + self.assertEqual(ligand.name, "test_ligand") + self.assertEqual(ligand.file_path, "tests/data/6325_Ethylene.sdf") + + +class TestDockerClass(unittest.TestCase): + + def test_create_monomer_receptor(self): + """Test that docker creates a MonomerReceptor object when given a + monomer pdb file.""" + + receptor_name = "AT9G99999_monomer" + receptor_path = "tests/data/AF2_AT9G99999_monomer.pdb" + receptor = Docker.create_receptor(receptor_name, receptor_path) + self.assertEqual(isinstance(receptor, MonomerReceptor), True) + self.assertEqual(receptor.name, receptor_name) + self.assertEqual(receptor.file_path, receptor_path) + + def test_create_complex_receptor(self): + """Test that docker creates a correct ComplexReceptor object when + given a complex pdb file.""" + + receptor_name = "AT8G88888_complex" + receptor_path = "tests/data/AF2_AT8G88888_complex.pdb" + receptor = Docker.create_receptor(receptor_name, receptor_path) + self.assertEqual(isinstance(receptor, Receptor), True) + self.assertEqual(receptor.name, "AT8G88888_complex") + self.assertEqual(receptor.file_path, "tests/data/AF2_AT8G88888_complex.pdb") + self.assertEqual(receptor.monomers_list, ["A", "B"]) + self.assertEqual(receptor.line_numbers, [[48, 180], [181, 195]]) + + def test_docking_exists(self): + """Test that Docker.create_docking returns None when the docking + already exists.""" + + receptor_name = "AT8G88888_complex" + ligand_name = "6325_Ethylene" + receptor_name_2 = "AT9G99999_monomer" + results_path = "tests/data/" + docking = Docker.create_docking(receptor_name, ligand_name, results_path) + docking2 = Docker.create_docking(receptor_name_2, ligand_name, results_path) + self.assertEqual(docking, None) + self.assertEqual(docking2, None) + + +class TestDockingClass(unittest.TestCase): + + def test_docking_complex_results(self): + """Test that correct dictionary is created in normalized_results for + complex docking.""" + + receptor_name = "AT8G88888_complex" + receptor_path = "tests/data/AF2_AT8G88888_complex.pdb" + ligand_name = "6325_Ethylene" + ligand_path = "tests/data/6325_Ethylene.sdf" + results_path = "tests/data/AT8G88888_complex_6325_Ethylene/" + receptor = Docker.create_receptor(receptor_name, receptor_path) + ligand = Ligand(ligand_name, ligand_path) + docking = ComplexDocking(receptor, ligand, results_path) + docking.separate_results() + docking.crte_ligand_reserved_attr() + normalized_results = docking.normalize_results(5) + + self.assertIsInstance(normalized_results, dict) + self.assertIsNot(len(normalized_results), 0) + self.assertIn('AT8G88888_complex_A', normalized_results) + self.assertIn('AT8G88888_complex_B', normalized_results) + self.assertIn('6325_Ethylene', normalized_results['AT8G88888_complex_A']) + self.assertIn('6325_Ethylene', normalized_results['AT8G88888_complex_B']) + + def test_docking_monomer_results(self): + """Test that correct dictionary is created in normalized_results for + monomer docking.""" + + receptor_name = "AT9G99999_monomer" + receptor_path = "tests/data/AF2_AT9G99999_monomer.pdb" + ligand_name = "6325_Ethylene" + ligand_path = "tests/data/6325_Ethylene.sdf" + results_path = "tests/data/AT9G99999_monomer_6325_Ethylene/" + receptor = Docker.create_receptor(receptor_name, receptor_path) + ligand = Ligand(ligand_name, ligand_path) + docking = MonomerDocking(receptor, ligand, results_path) + docking.crte_ligand_reserved_attr() + normalized_results = docking.normalize_results(5) + + self.assertIsInstance(normalized_results, dict) + self.assertIsNot(len(normalized_results), 0) + self.assertIn('AT9G99999_monomer', normalized_results) + self.assertIn('6325_Ethylene', normalized_results['AT9G99999_monomer']) + + +if __name__ == '__main__': + unittest.main() From 1fdf2f6f37f7009de7fad5ff4181233ee5e68326 Mon Sep 17 00:00:00 2001 From: Dien Nguyen Date: Tue, 27 Feb 2024 16:03:49 -0500 Subject: [PATCH 14/35] Move test_docking_utils.py to tests/utils/ folder --- tests/{resources => utils}/test_docking_utils.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{resources => utils}/test_docking_utils.py (100%) diff --git a/tests/resources/test_docking_utils.py b/tests/utils/test_docking_utils.py similarity index 100% rename from tests/resources/test_docking_utils.py rename to tests/utils/test_docking_utils.py From 4bec80afd8f839dfb4d2a913461bb08898c5c442 Mon Sep 17 00:00:00 2001 From: Dien Nguyen Date: Tue, 19 Mar 2024 14:19:07 -0400 Subject: [PATCH 15/35] Add regex matching to separate coordinates in docking results pdb Reformat output json to include path to results file and the date the docking was performed --- api/utils/docking_utils.py | 95 +++++++++++++++++++++++++------------- 1 file changed, 63 insertions(+), 32 deletions(-) diff --git a/api/utils/docking_utils.py b/api/utils/docking_utils.py index b0724e5..210e699 100755 --- a/api/utils/docking_utils.py +++ b/api/utils/docking_utils.py @@ -220,13 +220,22 @@ def result_dict_generator(self, monomer_number, threshold): for line in receptor_file_lines: splitted_line = line.split() if line[0:4] == 'ATOM': - coord = map(float, filter(None, splitted_line[6:9])) # check if chain name and residue are in the same column, e.g. A1000 if re.search(r'\d', splitted_line[4]) is None: residue = splitted_line[5] else: residue = splitted_line[4][1:] + + # Get the coordinates by regex matching, since they are not + # always separated by a space + pattern = r"[-+]?\d+\.\d+" + stripped_coords = line[28:54].strip() + # Find all matches in the input string + matches = re.findall(pattern, stripped_coords) + # Convert the matches to floats + coord = [float(match) for match in matches] + if int(residue) in reference: reference[int(residue)][int(splitted_line[1])] = tuple(coord) else: @@ -358,14 +367,14 @@ def normalize_results(self, threshold): ligand_key = list(results_dict[receptor_key].keys())[0] inside_dict = results_dict[receptor_key][ligand_key] - abs_max = None - abs_min = None + max_energy = None + min_energy = None # To eliminate empty dictionaries that might cause division errors below # normalized_mon_dicitonary calculations if inside_dict != {}: - abs_min = min(inside_dict.values()) - abs_max = max(inside_dict.values()) + min_energy = min(inside_dict.values()) + max_energy = max(inside_dict.values()) all_normalized_results = {} @@ -375,12 +384,12 @@ def normalize_results(self, threshold): # prevent substraction of equal values or values that doesn't make any sense # in terms of accuracy - if abs_min == abs_max: + if min_energy == max_energy: for k, v in inside_dict.items(): normalized_mon_dict[receptor_key][ligand_key][k] = 1 else: for k, v in inside_dict.items(): - normalized_value = (v - abs_min) / (abs_max - abs_min) + normalized_value = (v - min_energy) / (max_energy - min_energy) normalized_mon_dict[receptor_key][ligand_key][k] = normalized_value all_normalized_results.update(normalized_mon_dict) return all_normalized_results @@ -527,9 +536,10 @@ def start(receptor: str, ligand: str, docking_pdb_path: str): ct = datetime.datetime.now() print("Starting the docking process at {}".format(ct)) docking = Docker.create_docking(receptor, ligand, docking_pdb_path) - if docking is None: - receptor = receptor.split('.')[0] - results_path = docking_pdb_path + receptor + '_' + ligand + '/' + if isinstance(docking, list): + # receptor = receptor.split('.')[0] + # results_path = docking_pdb_path + receptor + '_' + ligand + '/' + results_path = docking[1] with open(results_path + "final.json") as json_file: final_json = json.load(json_file) return final_json @@ -538,7 +548,7 @@ def start(receptor: str, ligand: str, docking_pdb_path: str): elif docking == "Ligand file not found": return "Ligand file not found" - results_path = docking_pdb_path + receptor + '_' + ligand + '/' + results_path = docking_pdb_path + docking.receptor.name + '_' + ligand + '/' # create folder to store docking results os.makedirs(results_path) @@ -548,12 +558,16 @@ def start(receptor: str, ligand: str, docking_pdb_path: str): docking.separate_results() docking.crte_ligand_reserved_attr() normalized_results = docking.normalize_results(5) + final_json = {} + final_json["energies_json"] = normalized_results + final_json["path"] = '//bar.utoronto.ca/HEX_RESULTS/' + docking.receptor.name + '_' + ligand + '/' + final_json["best_HEX_result_path"] = final_json["path"] + docking.receptor.name + '_' + ligand + '0001.pdb' + final_json["date"] = datetime.datetime.now().date().strftime("%Y-%m-%d") new_json = docking.results_path + "final.json" with open(new_json, 'w') as file: - file.write(json.dumps(normalized_results)) - ct = datetime.datetime.now() - print("current time:-", ct) - return normalized_results + file.write(json.dumps(final_json)) + print("current time:-", datetime.datetime.now()) + return final_json def create_receptor(receptor_name: str, receptor_file_path: str): """Return a new receptor with the name receptor_name, by parsing @@ -589,27 +603,42 @@ def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str): """Return a docking pair, which contains a Receptor and a Ligand, as specified by receptor_name and ligand_name, respectively. """ + # find receptor file and create receptor object + receptor_folder = "/DATA/AF2-pdbs/Arabidopsis/AF2_Ath_PDBs_FAs_renamed/" + # check that the docking combination has not been run before # results_path = docking_pdb_path + 'RESULTS/' + receptor_name + '_' + ligand_name + '/' if '.' in receptor_name: receptor_name = receptor_name[:receptor_name.index('.')] + command = ['ls ' + 'AF2_' + receptor_name + '*.pdb'] + completed_process = subprocess.run(command, + shell = True, + cwd = receptor_folder, + stdout = subprocess.PIPE, + stderr = subprocess.PIPE, + text = True) + if completed_process.returncode != 0: + print("Receptor file not found") + # return "Receptor file not found" + receptor_file = completed_process.stdout[:-1] + + receptor_file_path = receptor_folder + receptor_file + receptor_name = receptor_file[4:(receptor_file.index('.') + 2)] + results_path = docking_pdb_path + receptor_name + '_' + ligand_name + '/' print(results_path) + if os.path.exists(results_path): print("The docking between {0} and {1} has already been done.".format(receptor_name, ligand_name)) - return None - - # find receptor file and create receptor object - receptor_folder = '/DATA/AF2-pdbs/Arabidopsis/AF2_Ath_PDBs_FAs_renamed/' - receptor_file_found = False + return [None, results_path] + receptor = Docker.create_receptor(receptor_name, receptor_file_path) - for receptor_file in os.listdir(receptor_folder): - if receptor_file[0] != '.' and receptor_file[-4:] == '.pdb' and \ - (receptor_name in receptor_file): - receptor_file_found = True - receptor_file_path = receptor_folder + receptor_file - receptor = Docker.create_receptor(receptor_name, receptor_file_path) + # for receptor_file in os.listdir(receptor_folder): + # if receptor_file[0] != '.' and receptor_file[-4:] == '.pdb' and \ + # (receptor_name in receptor_file): + # receptor_file_path = receptor_folder + receptor_file + # receptor = Docker.create_receptor(receptor_name, receptor_file_path) # find ligand file and create ligand object ligand_folder = '/DATA/HEX_API/HEX_SELECTED_LIGANDS/' @@ -622,10 +651,8 @@ def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str): ligand_file_found = True ligand_file_path = ligand_folder + '/' + ligand_file ligand = Ligand(ligand_name, ligand_file_path) - - if not receptor_file_found: - return "Receptor file not found" - elif not ligand_file_found: + + if not ligand_file_found: return "Ligand file not found" # receptor and ligand objects are created and ready for docking @@ -675,12 +702,13 @@ def create_mapping_filtered(folder_path: str, results_path: str): folder_path: where the sdf files are stored results_path: where the json file should be created """ - mapped_sdf = {} + mapped_sdf = [] sdf_files = os.listdir(folder_path) for file in sdf_files: if file[0] != "." and file[-4:] == ".sdf": name = file[file.index("_") + 1:-4] - mapped_sdf[name] = file + mapped_sdf.append({'value': file, 'text': name}) + # mapped_sdf[name] = file json_file = results_path + "sdf_mapping_filtered.json" with open(json_file, 'w') as file: file.write(json.dumps(mapped_sdf)) @@ -707,3 +735,6 @@ def create_mapping_unfiltered(self, folder_path: str, results_path: str): with open(json_file, 'w') as file: file.write(json.dumps(mapped_sdf)) return mapped_sdf + +# if __name__ == "__main__": +# Docker.start("AT3G22150", "801_Auxin", "/DATA/HEX_API/RESULTS/") \ No newline at end of file From a7ad94ef2817678ada80233ac0e9e52a09e9d320 Mon Sep 17 00:00:00 2001 From: Dien Nguyen Date: Thu, 21 Mar 2024 17:02:13 -0400 Subject: [PATCH 16/35] Add timestamp to final energies json --- api/utils/docking_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/api/utils/docking_utils.py b/api/utils/docking_utils.py index 210e699..112a9b5 100755 --- a/api/utils/docking_utils.py +++ b/api/utils/docking_utils.py @@ -534,6 +534,7 @@ def start(receptor: str, ligand: str, docking_pdb_path: str): """ # create docking object ct = datetime.datetime.now() + ct_string = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") print("Starting the docking process at {}".format(ct)) docking = Docker.create_docking(receptor, ligand, docking_pdb_path) if isinstance(docking, list): @@ -562,7 +563,7 @@ def start(receptor: str, ligand: str, docking_pdb_path: str): final_json["energies_json"] = normalized_results final_json["path"] = '//bar.utoronto.ca/HEX_RESULTS/' + docking.receptor.name + '_' + ligand + '/' final_json["best_HEX_result_path"] = final_json["path"] + docking.receptor.name + '_' + ligand + '0001.pdb' - final_json["date"] = datetime.datetime.now().date().strftime("%Y-%m-%d") + final_json["date"] = ct_string new_json = docking.results_path + "final.json" with open(new_json, 'w') as file: file.write(json.dumps(final_json)) From 35bb983bbcfcf74e3d14f3f40a60a5a034b687af Mon Sep 17 00:00:00 2001 From: Dien Nguyen Date: Thu, 21 Mar 2024 17:11:45 -0400 Subject: [PATCH 17/35] Fix styling issues --- api/utils/docking_utils.py | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/api/utils/docking_utils.py b/api/utils/docking_utils.py index 112a9b5..15329f8 100755 --- a/api/utils/docking_utils.py +++ b/api/utils/docking_utils.py @@ -613,19 +613,19 @@ def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str): receptor_name = receptor_name[:receptor_name.index('.')] command = ['ls ' + 'AF2_' + receptor_name + '*.pdb'] completed_process = subprocess.run(command, - shell = True, - cwd = receptor_folder, - stdout = subprocess.PIPE, - stderr = subprocess.PIPE, - text = True) + shell=True, + cwd=receptor_folder, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True) if completed_process.returncode != 0: print("Receptor file not found") # return "Receptor file not found" receptor_file = completed_process.stdout[:-1] - + receptor_file_path = receptor_folder + receptor_file receptor_name = receptor_file[4:(receptor_file.index('.') + 2)] - + results_path = docking_pdb_path + receptor_name + '_' + ligand_name + '/' print(results_path) @@ -635,12 +635,6 @@ def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str): return [None, results_path] receptor = Docker.create_receptor(receptor_name, receptor_file_path) - # for receptor_file in os.listdir(receptor_folder): - # if receptor_file[0] != '.' and receptor_file[-4:] == '.pdb' and \ - # (receptor_name in receptor_file): - # receptor_file_path = receptor_folder + receptor_file - # receptor = Docker.create_receptor(receptor_name, receptor_file_path) - # find ligand file and create ligand object ligand_folder = '/DATA/HEX_API/HEX_SELECTED_LIGANDS/' ligand_file_found = False @@ -652,7 +646,7 @@ def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str): ligand_file_found = True ligand_file_path = ligand_folder + '/' + ligand_file ligand = Ligand(ligand_name, ligand_file_path) - + if not ligand_file_found: return "Ligand file not found" @@ -736,6 +730,3 @@ def create_mapping_unfiltered(self, folder_path: str, results_path: str): with open(json_file, 'w') as file: file.write(json.dumps(mapped_sdf)) return mapped_sdf - -# if __name__ == "__main__": -# Docker.start("AT3G22150", "801_Auxin", "/DATA/HEX_API/RESULTS/") \ No newline at end of file From be5d74abd0b0ad1086b83c0b456d7ed7d5d20bd9 Mon Sep 17 00:00:00 2001 From: Dien Nguyen Date: Fri, 22 Mar 2024 14:18:17 -0400 Subject: [PATCH 18/35] Fix styling issues --- api/resources/snps.py | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/api/resources/snps.py b/api/resources/snps.py index aa5616e..404b2a6 100755 --- a/api/resources/snps.py +++ b/api/resources/snps.py @@ -50,35 +50,23 @@ @snps.route("/docking//") class Docking(Resource): - decorators = [limiter.limit("2/minute")] + decorators = [limiter.limit("2/minute")] @snps.param("receptor", _in="path", default="bri1") @snps.param("ligand", _in="path", default="brass") def get(self, receptor, ligand): receptor = escape(receptor) ligand = escape(ligand) - - # TODO: Clean comments left by metyu before commit - - docking_pdb_link = "//bar.utoronto.ca/docking-pdbs/" docking_pdb_path = "/DATA/HEX_API/RESULTS/" - # TODO: Then add regex check to receptors/ligands (For Arabidopsis genes, simply reuse - # is_arabidopsis_gene_valid; but you will need make regex check for your SDFs) - #Receptors can be adjusted please adjust the file format on the directories as well (sdf vs pdb) - if not BARUtils.is_arabidopsis_gene_valid(receptor): return BARUtils.error_exit("Invalid arapbidopsis pdb gene id"), 400 - + matched = re.search("[a-z]", ligand) if matched is None: return BARUtils.error_exit("Invalid ligand name"), 400 - docking_file_name = receptor.upper() + "-" + ligand.upper() + \ - "-docking0001.pdb " - response = requests.get("https:" + docking_pdb_link + docking_file_name) - - # Importing start function to initiate docking_utils file + # start function to initiate docking_utils file final_json = Docker.start(receptor, ligand, docking_pdb_path) return BARUtils.success_exit(final_json) From 203f5471f085b19f77842008eae1fb920c1d0b6f Mon Sep 17 00:00:00 2001 From: Dien Nguyen Date: Fri, 22 Mar 2024 17:54:30 -0400 Subject: [PATCH 19/35] Fix test for testing docking that already exists --- tests/data/AT1G66340.1_6325_Ethylene/final.json | 1 + tests/utils/test_docking_utils.py | 7 ++----- 2 files changed, 3 insertions(+), 5 deletions(-) create mode 100644 tests/data/AT1G66340.1_6325_Ethylene/final.json diff --git a/tests/data/AT1G66340.1_6325_Ethylene/final.json b/tests/data/AT1G66340.1_6325_Ethylene/final.json new file mode 100644 index 0000000..e872899 --- /dev/null +++ b/tests/data/AT1G66340.1_6325_Ethylene/final.json @@ -0,0 +1 @@ +{"dummyjson": "true"} \ No newline at end of file diff --git a/tests/utils/test_docking_utils.py b/tests/utils/test_docking_utils.py index a225f8f..0a6ce2d 100644 --- a/tests/utils/test_docking_utils.py +++ b/tests/utils/test_docking_utils.py @@ -71,14 +71,11 @@ def test_docking_exists(self): """Test that Docker.create_docking returns None when the docking already exists.""" - receptor_name = "AT8G88888_complex" + receptor_name = "AT1G66340" ligand_name = "6325_Ethylene" - receptor_name_2 = "AT9G99999_monomer" results_path = "tests/data/" docking = Docker.create_docking(receptor_name, ligand_name, results_path) - docking2 = Docker.create_docking(receptor_name_2, ligand_name, results_path) - self.assertEqual(docking, None) - self.assertEqual(docking2, None) + self.assertEqual(docking[0], None) class TestDockingClass(unittest.TestCase): From 0e4f104adab2f342e6339347534ee286c6784daf Mon Sep 17 00:00:00 2001 From: Dien Nguyen Date: Thu, 28 Mar 2024 13:41:25 -0400 Subject: [PATCH 20/35] Make changes to test file to skip in gitbuh environment Change json results when receptor or ligand not found --- api/resources/snps.py | 7 ++++++- api/utils/docking_utils.py | 4 ++-- tests/utils/test_docking_utils.py | 2 ++ 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/api/resources/snps.py b/api/resources/snps.py index 404b2a6..112c11d 100755 --- a/api/resources/snps.py +++ b/api/resources/snps.py @@ -69,7 +69,12 @@ def get(self, receptor, ligand): # start function to initiate docking_utils file final_json = Docker.start(receptor, ligand, docking_pdb_path) - return BARUtils.success_exit(final_json) + if final_json == "Receptor file not found": + return BARUtils.error_exit("There are no data found for the given gene"), 400 + elif final_json == "Ligand file not found": + return BARUtils.error_exit("There are no data found for the given ligand"), 400 + else: + return BARUtils.success_exit(final_json) @snps.route("/phenix//") diff --git a/api/utils/docking_utils.py b/api/utils/docking_utils.py index 15329f8..3f64782 100755 --- a/api/utils/docking_utils.py +++ b/api/utils/docking_utils.py @@ -618,9 +618,9 @@ def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str): stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) + print("return code" + str(completed_process.returncode)) if completed_process.returncode != 0: - print("Receptor file not found") - # return "Receptor file not found" + return "Receptor file not found" receptor_file = completed_process.stdout[:-1] receptor_file_path = receptor_folder + receptor_file diff --git a/tests/utils/test_docking_utils.py b/tests/utils/test_docking_utils.py index 0a6ce2d..0bc10e0 100644 --- a/tests/utils/test_docking_utils.py +++ b/tests/utils/test_docking_utils.py @@ -1,4 +1,5 @@ import unittest +import pytest from api.utils.docking_utils import Receptor, ComplexReceptor, MonomerReceptor from api.utils.docking_utils import Ligand from api.utils.docking_utils import Docker @@ -67,6 +68,7 @@ def test_create_complex_receptor(self): self.assertEqual(receptor.monomers_list, ["A", "B"]) self.assertEqual(receptor.line_numbers, [[48, 180], [181, 195]]) + @pytest.mark.integration def test_docking_exists(self): """Test that Docker.create_docking returns None when the docking already exists.""" From 88b92f0e94e101650c05ef25c4451826dd613793 Mon Sep 17 00:00:00 2001 From: Dien Nguyen Date: Thu, 28 Mar 2024 17:37:18 -0400 Subject: [PATCH 21/35] Add code to skip test in CI --- tests/utils/test_docking_utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/utils/test_docking_utils.py b/tests/utils/test_docking_utils.py index 0bc10e0..105bb68 100644 --- a/tests/utils/test_docking_utils.py +++ b/tests/utils/test_docking_utils.py @@ -4,7 +4,9 @@ from api.utils.docking_utils import Ligand from api.utils.docking_utils import Docker from api.utils.docking_utils import MonomerDocking, ComplexDocking +import os +IN_CI = os.getenv("CI") == "true" class TestReceptorClasses(unittest.TestCase): @@ -68,7 +70,7 @@ def test_create_complex_receptor(self): self.assertEqual(receptor.monomers_list, ["A", "B"]) self.assertEqual(receptor.line_numbers, [[48, 180], [181, 195]]) - @pytest.mark.integration + @pytest.mark.skipif(IN_CI, reason = "Doesn't work in Github CI") def test_docking_exists(self): """Test that Docker.create_docking returns None when the docking already exists.""" From 1a20055fc1a0e6fcce3035d63ba2f2341e67093b Mon Sep 17 00:00:00 2001 From: Dien Nguyen Date: Thu, 28 Mar 2024 17:42:46 -0400 Subject: [PATCH 22/35] Fix code to skip test if not running on BAR --- tests/utils/test_docking_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/utils/test_docking_utils.py b/tests/utils/test_docking_utils.py index 105bb68..9b97d89 100644 --- a/tests/utils/test_docking_utils.py +++ b/tests/utils/test_docking_utils.py @@ -6,7 +6,7 @@ from api.utils.docking_utils import MonomerDocking, ComplexDocking import os -IN_CI = os.getenv("CI") == "true" +NOT_IN_BAR = not os.environ.get("BAR") == "true" class TestReceptorClasses(unittest.TestCase): @@ -70,7 +70,7 @@ def test_create_complex_receptor(self): self.assertEqual(receptor.monomers_list, ["A", "B"]) self.assertEqual(receptor.line_numbers, [[48, 180], [181, 195]]) - @pytest.mark.skipif(IN_CI, reason = "Doesn't work in Github CI") + @pytest.mark.skipif(NOT_IN_BAR, reason = "Only works on BAR") def test_docking_exists(self): """Test that Docker.create_docking returns None when the docking already exists.""" From 6853a159c6b4a23a6368dba4146d4951baadd109 Mon Sep 17 00:00:00 2001 From: Dien Nguyen Date: Thu, 28 Mar 2024 18:00:07 -0400 Subject: [PATCH 23/35] Fix styling --- tests/utils/test_docking_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/utils/test_docking_utils.py b/tests/utils/test_docking_utils.py index 9b97d89..610e457 100644 --- a/tests/utils/test_docking_utils.py +++ b/tests/utils/test_docking_utils.py @@ -8,6 +8,7 @@ NOT_IN_BAR = not os.environ.get("BAR") == "true" + class TestReceptorClasses(unittest.TestCase): def test_monomer_receptor_init(self): @@ -70,7 +71,7 @@ def test_create_complex_receptor(self): self.assertEqual(receptor.monomers_list, ["A", "B"]) self.assertEqual(receptor.line_numbers, [[48, 180], [181, 195]]) - @pytest.mark.skipif(NOT_IN_BAR, reason = "Only works on BAR") + @pytest.mark.skipif(NOT_IN_BAR, reason="Only works on BAR") def test_docking_exists(self): """Test that Docker.create_docking returns None when the docking already exists.""" From dbd55fcf703a5fdafa83775651a4fdb83c11f089 Mon Sep 17 00:00:00 2001 From: Dien Nguyen Date: Thu, 4 Apr 2024 09:56:50 -0400 Subject: [PATCH 24/35] Add tests for SDFMapping --- api/utils/docking_utils.py | 7 +- .../filtered/443453_Gibberellin_A15.sdf | 266 ++++++++++++++ .../filtered/5984_D-(-)-Fructose.sdf | 297 ++++++++++++++++ .../filtered/73672_isoxaben.sdf | 332 ++++++++++++++++++ .../sample_ligands/filtered/801_Auxin.sdf | 183 ++++++++++ .../sample_ligands/unfiltered/103061392.sdf | 36 ++ .../sample_ligands/unfiltered/134970870.sdf | 107 ++++++ .../sample_ligands/unfiltered/135191341.sdf | 105 ++++++ .../sample_ligands/unfiltered/135355153.sdf | 41 +++ tests/utils/test_docking_utils.py | 61 +++- 10 files changed, 1429 insertions(+), 6 deletions(-) create mode 100644 tests/data/sample_ligands/filtered/443453_Gibberellin_A15.sdf create mode 100644 tests/data/sample_ligands/filtered/5984_D-(-)-Fructose.sdf create mode 100644 tests/data/sample_ligands/filtered/73672_isoxaben.sdf create mode 100644 tests/data/sample_ligands/filtered/801_Auxin.sdf create mode 100644 tests/data/sample_ligands/unfiltered/103061392.sdf create mode 100644 tests/data/sample_ligands/unfiltered/134970870.sdf create mode 100644 tests/data/sample_ligands/unfiltered/135191341.sdf create mode 100644 tests/data/sample_ligands/unfiltered/135355153.sdf diff --git a/api/utils/docking_utils.py b/api/utils/docking_utils.py index 3f64782..e2c6f4e 100755 --- a/api/utils/docking_utils.py +++ b/api/utils/docking_utils.py @@ -703,7 +703,6 @@ def create_mapping_filtered(folder_path: str, results_path: str): if file[0] != "." and file[-4:] == ".sdf": name = file[file.index("_") + 1:-4] mapped_sdf.append({'value': file, 'text': name}) - # mapped_sdf[name] = file json_file = results_path + "sdf_mapping_filtered.json" with open(json_file, 'w') as file: file.write(json.dumps(mapped_sdf)) @@ -719,13 +718,13 @@ def create_mapping_unfiltered(self, folder_path: str, results_path: str): folder_path: where the sdf files are stored results_path: where the json file should be created """ - mapped_sdf = {} + mapped_sdf = [] sdf_files = os.listdir(folder_path) for file in sdf_files: if file[0] != "." and file[-4:] == ".sdf": names = self.get_substance_name(file, folder_path) - sdf_number = file.split(".")[0] - mapped_sdf[sdf_number] = ",".join(names) + all_names = ",".join(names) + mapped_sdf.append({'value': file, 'text': all_names}) json_file = results_path + "sdf_mapping_unfiltered.json" with open(json_file, 'w') as file: file.write(json.dumps(mapped_sdf)) diff --git a/tests/data/sample_ligands/filtered/443453_Gibberellin_A15.sdf b/tests/data/sample_ligands/filtered/443453_Gibberellin_A15.sdf new file mode 100644 index 0000000..1cede6e --- /dev/null +++ b/tests/data/sample_ligands/filtered/443453_Gibberellin_A15.sdf @@ -0,0 +1,266 @@ +443453 + -OEChem-03192020593D + + 53 56 0 1 0 0 0 0 0999 V2000 + 0.1236 3.0990 0.8730 O 0 0 0 0 0 0 0 0 0 0 0 0 + -0.3854 -3.4237 0.8105 O 0 0 0 0 0 0 0 0 0 0 0 0 + 0.1809 -2.9472 -1.3454 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3584 -0.9329 1.9903 O 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7768 0.7463 1.3864 O 0 0 0 0 0 0 0 0 0 0 0 0 + -1.3538 -0.4319 0.0963 C 0 0 1 0 0 0 0 0 0 0 0 0 + -1.0105 0.8566 -0.7318 C 0 0 2 0 0 0 0 0 0 0 0 0 + 0.4869 1.1322 -0.4269 C 0 0 1 0 0 0 0 0 0 0 0 0 + 1.0193 -0.3273 -0.5291 C 0 0 2 0 0 0 0 0 0 0 0 0 + 0.0074 -1.1685 0.2825 C 0 0 2 0 0 0 0 0 0 0 0 0 + -2.0293 -0.1097 1.4402 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.5154 -1.1885 -0.5954 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5430 -0.4670 -0.3113 C 0 0 2 0 0 0 0 0 0 0 0 0 + -3.3341 0.5290 0.9785 C 0 0 1 0 0 0 0 0 0 0 0 0 + -2.0059 2.0256 -0.6338 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.1925 1.9736 -1.5082 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.0181 1.9563 0.5166 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.7200 -0.3852 -0.1626 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.6814 1.8077 0.9486 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.2622 0.4602 -1.3279 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.7218 1.8959 -1.3938 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.0462 -2.5866 -0.2001 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.0264 -1.9089 -0.5428 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.9767 -0.1289 1.0910 C 0 0 0 0 0 0 0 0 0 0 0 0 + -4.9386 -0.4761 -0.7046 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.0369 0.5646 -1.7951 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.8790 -0.6259 -1.5832 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.2566 -1.1727 1.3441 H 0 0 0 0 0 0 0 0 0 0 0 0 + -1.4833 0.5460 2.1147 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.2272 -1.0338 2.0020 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.6237 -2.2082 -0.2092 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.4083 -1.2280 -1.6838 H 0 0 0 0 0 0 0 0 0 0 0 0 + -4.0782 0.5310 1.7815 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.6097 1.9810 -1.5537 H 0 0 0 0 0 0 0 0 0 0 0 0 + -1.5388 3.0110 -0.7017 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.8704 3.0201 -1.5008 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.9153 1.5956 -2.5018 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.6222 2.5239 1.3681 H 0 0 0 0 0 0 0 0 0 0 0 0 + -3.9411 2.4664 0.2142 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.7267 1.9624 1.2042 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.2548 1.2701 1.7884 H 0 0 0 0 0 0 0 0 0 0 0 0 + 3.1601 0.0269 -2.3328 H 0 0 0 0 0 0 0 0 0 0 0 0 + 4.3408 0.4975 -1.1273 H 0 0 0 0 0 0 0 0 0 0 0 0 + 3.0569 2.4587 -0.5161 H 0 0 0 0 0 0 0 0 0 0 0 0 + 3.1681 2.4021 -2.2586 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.6031 -2.6245 0.1690 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.7809 -2.2519 -1.5539 H 0 0 0 0 0 0 0 0 0 0 0 0 + 4.1167 -1.9748 -0.4362 H 0 0 0 0 0 0 0 0 0 0 0 0 + -5.7590 0.1327 -0.3404 H 0 0 0 0 0 0 0 0 0 0 0 0 + -5.1367 -1.1604 -1.5223 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.7990 3.7041 0.5229 H 0 0 0 0 0 0 0 0 0 0 0 0 + -0.4280 -4.3576 0.5136 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.6260 -0.7265 2.9110 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 19 1 0 0 0 0 + 1 51 1 0 0 0 0 + 2 22 1 0 0 0 0 + 2 52 1 0 0 0 0 + 3 22 2 0 0 0 0 + 4 24 1 0 0 0 0 + 4 53 1 0 0 0 0 + 5 24 2 0 0 0 0 + 6 7 1 0 0 0 0 + 6 10 1 0 0 0 0 + 6 11 1 0 0 0 0 + 6 12 1 0 0 0 0 + 7 8 1 0 0 0 0 + 7 15 1 0 0 0 0 + 7 26 1 0 0 0 0 + 8 9 1 0 0 0 0 + 8 16 1 0 0 0 0 + 8 19 1 0 0 0 0 + 9 10 1 0 0 0 0 + 9 13 1 0 0 0 0 + 9 27 1 0 0 0 0 + 10 22 1 0 0 0 0 + 10 28 1 0 0 0 0 + 11 14 1 0 0 0 0 + 11 29 1 0 0 0 0 + 11 30 1 0 0 0 0 + 12 18 1 0 0 0 0 + 12 31 1 0 0 0 0 + 12 32 1 0 0 0 0 + 13 20 1 0 0 0 0 + 13 23 1 0 0 0 0 + 13 24 1 0 0 0 0 + 14 17 1 0 0 0 0 + 14 18 1 0 0 0 0 + 14 33 1 0 0 0 0 + 15 17 1 0 0 0 0 + 15 34 1 0 0 0 0 + 15 35 1 0 0 0 0 + 16 21 1 0 0 0 0 + 16 36 1 0 0 0 0 + 16 37 1 0 0 0 0 + 17 38 1 0 0 0 0 + 17 39 1 0 0 0 0 + 18 25 2 0 0 0 0 + 19 40 1 0 0 0 0 + 19 41 1 0 0 0 0 + 20 21 1 0 0 0 0 + 20 42 1 0 0 0 0 + 20 43 1 0 0 0 0 + 21 44 1 0 0 0 0 + 21 45 1 0 0 0 0 + 23 46 1 0 0 0 0 + 23 47 1 0 0 0 0 + 23 48 1 0 0 0 0 + 25 49 1 0 0 0 0 + 25 50 1 0 0 0 0 +M END +> +443453 + +> +0.8 + +> +1 + +> +19 +1 -0.68 +10 0.06 +12 0.14 +13 0.06 +14 0.14 +18 -0.28 +19 0.28 +2 -0.65 +22 0.66 +24 0.66 +25 -0.3 +3 -0.57 +4 -0.65 +49 0.15 +5 -0.57 +50 0.15 +51 0.4 +52 0.5 +53 0.5 + +> +4.8 + +> +11 +1 1 acceptor +1 1 donor +1 2 acceptor +1 3 acceptor +1 4 acceptor +1 5 acceptor +3 2 3 22 anion +3 4 5 24 anion +5 6 7 8 9 10 rings +6 8 9 13 16 20 21 rings +8 6 7 11 12 14 15 17 18 rings + +> +25 + +> +7 + +> +0 + +> +0 + +> +0 + +> +0 + +> +1 + +> +1 + +> +0006C43D00000001 + +> +109.8608 + +> +56.05 + +> +10863032 1 17775002341051126289 +10967382 1 18337399321749125231 +11132069 177 18410571760632776393 +11578080 2 17701800121260520545 +12011746 2 18271238417445912848 +12553582 1 18340217378952578125 +12592029 89 18260545624169624827 +12633257 1 18125411302951212400 +13140716 1 18267019453643120282 +13172582 1 18408322181248669898 +13224815 77 18408325466861773613 +13538477 17 18042969963765403390 +13583140 156 16988268769054638501 +14178342 30 18194960747293312025 +14787075 74 17773863363032392001 +15309172 13 18335713749033407887 +16752209 62 18408610240252119924 +16945 1 17822014194095494282 +17349148 13 18408877426284028311 +17492 54 18261966265710939357 +1813 80 17895468108494128074 +18186145 218 17632583720907680458 +192875 21 18408877456327615965 +20028762 73 17915180341888784015 +20600515 1 18341608244214032320 +20691752 17 18272943729620618673 +20715895 44 17970892381477415541 +20739085 24 17971504886336515381 +20905425 154 17982735168883945983 +2334 1 18411142424152217716 +23419403 2 17684601663405341276 +23559900 14 18199188395620067732 +2748010 2 17253711577657852757 +3286 77 17560798775942189230 +34934 24 18042688493083725276 +352729 6 18265064698321524087 +394222 165 17896054272508719104 +474 4 18335149661193390307 +484985 159 14682784046149967332 +70251023 43 17909274585694312047 +90525 40 18335427850166549029 + +> +485.14 +6.36 +3.04 +1.47 +2.87 +0.9 +0.26 +-0.26 +-0.16 +-0.37 +-0.24 +-0.77 +-0.18 +-0.53 + +> +1060.249 + +> +262.2 + +> +2 +5 +10 + +$$$$ diff --git a/tests/data/sample_ligands/filtered/5984_D-(-)-Fructose.sdf b/tests/data/sample_ligands/filtered/5984_D-(-)-Fructose.sdf new file mode 100644 index 0000000..46fa7c4 --- /dev/null +++ b/tests/data/sample_ligands/filtered/5984_D-(-)-Fructose.sdf @@ -0,0 +1,297 @@ +5984 + -OEChem-03192014583D + + 24 23 0 1 0 0 0 0 0999 V2000 + -0.3508 0.2852 1.6168 O 0 0 0 0 0 0 0 0 0 0 0 0 + -2.1229 -0.0967 -1.5767 O 0 0 0 0 0 0 0 0 0 0 0 0 + 0.3999 -1.9832 0.0163 O 0 0 0 0 0 0 0 0 0 0 0 0 + -4.2737 0.3477 0.1329 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.7917 -1.0026 0.5184 O 0 0 0 0 0 0 0 0 0 0 0 0 + 3.8346 1.3519 -0.1667 O 0 0 0 0 0 0 0 0 0 0 0 0 + -0.4980 0.2331 0.2008 C 0 0 2 0 0 0 0 0 0 0 0 0 + -1.9435 -0.1555 -0.1619 C 0 0 1 0 0 0 0 0 0 0 0 0 + 0.5984 -0.6447 -0.4216 C 0 0 1 0 0 0 0 0 0 0 0 0 + -2.9638 0.7835 0.4823 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.0330 -0.2421 -0.0847 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4951 1.1234 -0.5560 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.3271 1.2533 -0.1665 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.1596 -1.1825 0.1530 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.5128 -0.6492 -1.5135 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.8919 0.7765 1.5733 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.8443 1.8081 0.1148 H 0 0 0 0 0 0 0 0 0 0 0 0 + -0.7717 1.1026 1.9320 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4381 1.1623 -1.6468 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.8696 1.9070 -0.1228 H 0 0 0 0 0 0 0 0 0 0 0 0 + -1.9237 0.8101 -1.8659 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.4087 -1.9817 0.9890 H 0 0 0 0 0 0 0 0 0 0 0 0 + -4.8996 0.9640 0.5499 H 0 0 0 0 0 0 0 0 0 0 0 0 + 3.8537 1.3937 0.8047 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 7 1 0 0 0 0 + 1 18 1 0 0 0 0 + 2 8 1 0 0 0 0 + 2 21 1 0 0 0 0 + 3 9 1 0 0 0 0 + 3 22 1 0 0 0 0 + 4 10 1 0 0 0 0 + 4 23 1 0 0 0 0 + 5 11 2 0 0 0 0 + 6 12 1 0 0 0 0 + 6 24 1 0 0 0 0 + 7 8 1 0 0 0 0 + 7 9 1 0 0 0 0 + 7 13 1 0 0 0 0 + 8 10 1 0 0 0 0 + 8 14 1 0 0 0 0 + 9 11 1 0 0 0 0 + 9 15 1 0 0 0 0 + 10 16 1 0 0 0 0 + 10 17 1 0 0 0 0 + 11 12 1 0 0 0 0 + 12 19 1 0 0 0 0 + 12 20 1 0 0 0 0 +M END +> +5984 + +> +0.6 + +> +1 +113 +63 +77 +90 +29 +4 +106 +81 +93 +21 +64 +52 +13 +83 +56 +43 +8 +42 +66 +103 +107 +5 +100 +94 +3 +50 +31 +73 +86 +47 +17 +23 +20 +91 +110 +10 +14 +22 +61 +104 +7 +97 +85 +105 +45 +44 +95 +6 +59 +69 +87 +70 +16 +74 +41 +78 +33 +99 +46 +112 +114 +49 +12 +25 +51 +101 +89 +109 +18 +35 +28 +96 +2 +62 +27 +57 +108 +65 +36 +39 +72 +9 +98 +79 +55 +84 +58 +30 +88 +24 +11 +102 +38 +32 +34 +26 +37 +71 +40 +19 +48 +82 +53 +80 +67 +60 +54 +15 +111 +68 +76 +92 +75 + +> +17 +1 -0.68 +10 0.28 +11 0.45 +12 0.34 +18 0.4 +2 -0.68 +21 0.4 +22 0.4 +23 0.4 +24 0.4 +3 -0.68 +4 -0.68 +5 -0.57 +6 -0.68 +7 0.28 +8 0.28 +9 0.34 + +> +5 + +> +11 +1 1 acceptor +1 1 donor +1 2 acceptor +1 2 donor +1 3 acceptor +1 3 donor +1 4 acceptor +1 4 donor +1 5 acceptor +1 6 acceptor +1 6 donor + +> +12 + +> +3 + +> +0 + +> +0 + +> +0 + +> +0 + +> +1 + +> +6 + +> +0000176000000001 + +> +14.8267 + +> +55.858 + +> +10219947 1 18259706679105643821 +10857977 72 15647061456026330781 +12251169 10 14346077581156446260 +12932764 1 17385438795719614274 +14325111 11 18409451396669739967 +15310529 11 18341612560571437217 +15775835 57 18343023310977726820 +170605 34 18341619195779048951 +18186145 218 18411136926699554526 +20645464 45 16660360411259445224 +20645476 183 17703790348838210055 +20711985 344 13253979848264163674 +20871999 31 16271639109051498853 +21119208 17 17060347326849160756 +21293036 1 16917071070392623369 +21499 59 18410854390739347150 +228727 97 17489598835037837440 +23211744 41 17385720270780590995 +23402539 116 16515958268129347903 +23552423 10 17773044050770881839 +5084963 1 18059013874308216274 +528886 8 18411135861458162993 +57812782 119 16515402967153822125 + +> +211.74 +6.02 +1.28 +1.02 +1.45 +0.31 +0 +-2.26 +-0.19 +-0.91 +0 +0.36 +-0.04 +-0.43 + +> +401.786 + +> +129.9 + +> +2 +5 +10 + +$$$$ diff --git a/tests/data/sample_ligands/filtered/73672_isoxaben.sdf b/tests/data/sample_ligands/filtered/73672_isoxaben.sdf new file mode 100644 index 0000000..0cbc2b4 --- /dev/null +++ b/tests/data/sample_ligands/filtered/73672_isoxaben.sdf @@ -0,0 +1,332 @@ +73672 + -OEChem-01292022313D + + 48 49 0 0 0 0 0 0 0999 V2000 + 1.5815 -0.0149 -1.9046 O 0 0 0 0 0 0 0 0 0 0 0 0 + -1.0163 0.0226 1.5313 O 0 0 0 0 0 0 0 0 0 0 0 0 + -2.8096 -2.4008 0.0430 O 0 0 0 0 0 0 0 0 0 0 0 0 + -2.7579 2.3983 -0.0157 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.9484 -0.0044 -1.6550 N 0 0 0 0 0 0 0 0 0 0 0 0 + -0.4548 -0.0353 -0.7579 N 0 0 0 0 0 0 0 0 0 0 0 0 + 4.3874 -0.0002 0.3050 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.2162 -1.2511 -0.1539 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.1981 1.2668 -0.1418 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.9913 -0.0073 -0.3541 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.3418 -0.0076 1.8550 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5852 -2.6088 0.1428 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.6001 2.6119 0.2617 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.8038 -0.0189 0.3139 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.9104 -0.0234 -0.7245 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.3267 -0.0121 0.3457 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.7765 -0.0013 0.0151 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.4417 -1.2039 -0.1184 C 0 0 0 0 0 0 0 0 0 0 0 0 + -3.4156 1.2117 -0.1483 C 0 0 0 0 0 0 0 0 0 0 0 0 + -4.8018 -1.1931 -0.4280 C 0 0 0 0 0 0 0 0 0 0 0 0 + -4.7756 1.2226 -0.4577 C 0 0 0 0 0 0 0 0 0 0 0 0 + -5.4687 0.0202 -0.5976 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.7911 -2.9509 1.3584 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.7280 2.9798 1.2859 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.3947 -1.1959 -1.2362 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.2082 -1.2246 0.3161 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.2145 1.2125 0.2704 H 0 0 0 0 0 0 0 0 0 0 0 0 + 5.3162 1.2638 -1.2335 H 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7968 0.8551 2.2526 H 0 0 0 0 0 0 0 0 0 0 0 0 + 5.3508 0.0103 2.2822 H 0 0 0 0 0 0 0 0 0 0 0 0 + 3.8332 -0.8963 2.2434 H 0 0 0 0 0 0 0 0 0 0 0 0 + 5.2276 -3.4060 -0.2462 H 0 0 0 0 0 0 0 0 0 0 0 0 + 4.4723 -2.7775 1.2170 H 0 0 0 0 0 0 0 0 0 0 0 0 + 3.6058 -2.7158 -0.3320 H 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5901 2.7419 1.3473 H 0 0 0 0 0 0 0 0 0 0 0 0 + 5.2044 3.4239 -0.1564 H 0 0 0 0 0 0 0 0 0 0 0 0 + 3.5802 2.7318 -0.1145 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.5877 -0.0248 1.3710 H 0 0 0 0 0 0 0 0 0 0 0 0 + -0.8640 -0.0470 -1.6888 H 0 0 0 0 0 0 0 0 0 0 0 0 + -5.3482 -2.1257 -0.5393 H 0 0 0 0 0 0 0 0 0 0 0 0 + -5.3019 2.1637 -0.5921 H 0 0 0 0 0 0 0 0 0 0 0 0 + -6.5275 0.0286 -0.8389 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.1678 -3.8491 1.3445 H 0 0 0 0 0 0 0 0 0 0 0 0 + -3.8037 -3.2337 1.6632 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.3693 -2.2449 2.0797 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.0576 3.8432 1.2592 H 0 0 0 0 0 0 0 0 0 0 0 0 + -3.7287 3.3237 1.5662 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.3565 2.2725 2.0328 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 5 1 0 0 0 0 + 1 15 1 0 0 0 0 + 2 16 2 0 0 0 0 + 3 18 1 0 0 0 0 + 3 23 1 0 0 0 0 + 4 19 1 0 0 0 0 + 4 24 1 0 0 0 0 + 5 10 2 0 0 0 0 + 6 15 1 0 0 0 0 + 6 16 1 0 0 0 0 + 6 39 1 0 0 0 0 + 7 8 1 0 0 0 0 + 7 9 1 0 0 0 0 + 7 10 1 0 0 0 0 + 7 11 1 0 0 0 0 + 8 12 1 0 0 0 0 + 8 25 1 0 0 0 0 + 8 26 1 0 0 0 0 + 9 13 1 0 0 0 0 + 9 27 1 0 0 0 0 + 9 28 1 0 0 0 0 + 10 14 1 0 0 0 0 + 11 29 1 0 0 0 0 + 11 30 1 0 0 0 0 + 11 31 1 0 0 0 0 + 12 32 1 0 0 0 0 + 12 33 1 0 0 0 0 + 12 34 1 0 0 0 0 + 13 35 1 0 0 0 0 + 13 36 1 0 0 0 0 + 13 37 1 0 0 0 0 + 14 15 2 0 0 0 0 + 14 38 1 0 0 0 0 + 16 17 1 0 0 0 0 + 17 18 2 0 0 0 0 + 17 19 1 0 0 0 0 + 18 20 1 0 0 0 0 + 19 21 2 0 0 0 0 + 20 22 2 0 0 0 0 + 20 40 1 0 0 0 0 + 21 22 1 0 0 0 0 + 21 41 1 0 0 0 0 + 22 42 1 0 0 0 0 + 23 43 1 0 0 0 0 + 23 44 1 0 0 0 0 + 23 45 1 0 0 0 0 + 24 46 1 0 0 0 0 + 24 47 1 0 0 0 0 + 24 48 1 0 0 0 0 +M END +> +73672 + +> +0.8 + +> +1 +14 +60 +56 +31 +22 +58 +64 +63 +7 +13 +42 +27 +9 +57 +29 +12 +51 +61 +66 +33 +59 +39 +23 +25 +36 +32 +62 +28 +10 +18 +41 +65 +26 +24 +15 +35 +11 +5 +8 +45 +30 +47 +44 +52 +21 +4 +16 +20 +3 +46 +48 +54 +2 +55 +49 +6 +50 +19 +38 +53 +68 +17 +37 +43 +34 +70 +67 +69 +40 + +> +24 +1 -0.02 +10 0.11 +14 -0.15 +15 0.2 +16 0.54 +17 0.09 +18 0.08 +19 0.08 +2 -0.57 +20 -0.15 +21 -0.15 +22 -0.15 +23 0.28 +24 0.28 +3 -0.36 +38 0.15 +39 0.37 +4 -0.36 +40 0.15 +41 0.15 +42 0.15 +5 -0.41 +6 -0.49 +7 0.18 + +> +8 + +> +10 +1 11 hydrophobe +1 12 hydrophobe +1 13 hydrophobe +1 2 acceptor +1 3 acceptor +1 4 acceptor +1 5 acceptor +1 6 donor +5 1 5 10 14 15 rings +6 17 18 19 20 21 22 rings + +> +24 + +> +0 + +> +0 + +> +0 + +> +0 + +> +0 + +> +1 + +> +4 + +> +00011FC800000001 + +> +85.2359 + +> +50.748 + +> +10366900 7 17846498153403680307 +10595046 47 18412261757249425332 +10670039 82 16845310345148862312 +11405975 8 18339928125900624090 +12107183 9 17762899866583206346 +12166972 35 18114184133342584557 +12236239 1 17748820817921540483 +12596602 18 18113613525498890928 +12670546 56 18260544498576716388 +13167823 11 18410852140451225614 +13224815 77 15913334567591032886 +13533116 47 18409164390134199090 +13583140 156 18131063866327987482 +13911987 19 16988564653377796772 +14251764 38 18272935994669744648 +14341114 176 18410295826116619172 +15788980 27 18187368718197926686 +15961568 22 18338800125861386260 +17349148 13 17603588512614199393 +17844677 252 18341619209117683300 +1813 80 16588026775584437974 +19489759 90 18341610386996167121 +19958102 18 18113889434530828159 +20511986 3 17749933557500052928 +20645477 70 16773525360836679502 +21033648 29 17131255892496812096 +21065198 57 18411138022222234766 +21859007 373 17387112196978238957 +23402539 116 18411975850346435557 +23557571 272 18202289086671170445 +23559900 14 18410292523613517392 +23569943 247 17097762466360446034 +2838139 119 16371273443410758141 +300161 21 18114456842448656190 +3004659 81 18334293141904462754 +34797466 226 18059021690990667228 +351380 180 18413385432136517725 +3633792 109 18115293476867354335 +4073 2 18114185276426065634 +4214541 1 18410855421536876668 +5104073 3 18409732863341281898 +5283173 99 18271242720887508337 +67856867 119 17970350527596444980 +90127 26 18130798858260937044 +9971528 1 17749109980937210316 +9981440 41 18411704245215730995 + +> +460.47 +13.64 +2.76 +1.29 +4.71 +0.05 +0.14 +-0.01 +-0.2 +-0.79 +1.02 +0.5 +-0.02 +0.13 + +> +963.108 + +> +263 + +> +2 +5 +10 + +$$$$ diff --git a/tests/data/sample_ligands/filtered/801_Auxin.sdf b/tests/data/sample_ligands/filtered/801_Auxin.sdf new file mode 100644 index 0000000..ca48c82 --- /dev/null +++ b/tests/data/sample_ligands/filtered/801_Auxin.sdf @@ -0,0 +1,183 @@ +801 + -OEChem-03192020413D + + 21 22 0 0 0 0 0 0 0999 V2000 + -3.1373 -0.4826 -0.9682 O 0 5 0 0 0 0 0 0 0 0 0 0 + -3.0086 1.7026 -0.3061 O 0 0 0 0 0 0 0 0 0 0 0 0 + 0.8176 -2.0611 -0.1166 N 0 0 0 0 0 0 0 0 0 0 0 0 + 0.5445 0.1149 0.2923 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.6808 -0.5631 0.5447 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.4680 -0.8477 -0.1212 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.4813 -1.8993 0.2843 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.9454 0.0466 1.0039 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.9580 1.4594 0.3762 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.7895 -0.5319 -0.4562 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.2769 1.7901 0.0441 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.1768 0.8081 -0.3654 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.7779 0.4639 -0.2120 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.1452 -2.7503 0.3478 H 0 0 0 0 0 0 0 0 0 0 0 0 + -2.5298 -0.6664 1.5980 H 0 0 0 0 0 0 0 0 0 0 0 0 + -1.7520 0.9102 1.6506 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2390 -2.9434 -0.3723 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.2693 2.2375 0.6926 H 0 0 0 0 0 0 0 0 0 0 0 0 + 3.4908 -1.2956 -0.7751 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.6013 2.8254 0.1067 H 0 0 0 0 0 0 0 0 0 0 0 0 + 4.1964 1.0872 -0.6186 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 13 1 0 0 0 0 + 2 13 2 0 0 0 0 + 3 6 1 0 0 0 0 + 3 7 1 0 0 0 0 + 3 17 1 0 0 0 0 + 4 5 1 0 0 0 0 + 4 6 1 0 0 0 0 + 4 9 2 0 0 0 0 + 5 7 2 0 0 0 0 + 5 8 1 0 0 0 0 + 6 10 2 0 0 0 0 + 7 14 1 0 0 0 0 + 8 13 1 0 0 0 0 + 8 15 1 0 0 0 0 + 8 16 1 0 0 0 0 + 9 11 1 0 0 0 0 + 9 18 1 0 0 0 0 + 10 12 1 0 0 0 0 + 10 19 1 0 0 0 0 + 11 12 2 0 0 0 0 + 11 20 1 0 0 0 0 + 12 21 1 0 0 0 0 +M CHG 1 1 -1 +M END +> +801 + +> +0.6 + +> +1 +2 + +> +18 +1 -0.9 +10 -0.15 +11 -0.15 +12 -0.15 +13 0.91 +14 0.15 +17 0.27 +18 0.15 +19 0.15 +2 -0.9 +20 0.15 +21 0.15 +3 0.03 +5 -0.18 +6 -0.15 +7 -0.3 +8 0.07 +9 -0.15 + +> +2 + +> +7 +1 1 acceptor +1 2 acceptor +1 3 cation +1 3 donor +3 1 2 13 anion +5 3 4 5 6 7 rings +6 4 6 9 10 11 12 rings + +> +13 + +> +0 + +> +0 + +> +0 + +> +0 + +> +0 + +> +1 + +> +1 + +> +0000032100000001 + +> +19.5653 + +> +35.666 + +> +1 1 18410013217136608694 +10608611 8 18337393858566906048 +11206711 2 17981333596590782830 +11769659 78 12535357753079887780 +124424 183 17967811643942126866 +12654215 9 18263925449534713868 +13380535 76 18339640148232832674 +14325111 11 18410013255712095577 +14911166 2 18340498802568737270 +15279308 100 18336836372128195060 +15775835 57 18413673500199140813 +16945 1 18340218422576708290 +17844478 74 18043270096074572139 +18186145 218 18272662237532389144 +20653085 51 18410860962287236201 +21028194 46 18335426806473554737 +21524375 3 18334289894196932995 +21947302 44 18334855004513486555 +23402655 69 18342171146833090685 +23493267 7 17603883199332416488 +23559900 14 18199773461140915892 +238 59 15804079125244740965 +25 1 18336551503874047873 +2748010 2 18125173864078903938 +528886 8 18411982477037724968 +63268167 104 18342743974667681472 +81228 2 17547010092349083291 + +> +250.81 +4.9 +1.95 +0.82 +1.27 +0.2 +0.03 +-1.84 +-1.19 +-0.26 +0 +0.32 +0 +0.21 + +> +548.117 + +> +137.2 + +> +2 +5 +10 + +$$$$ diff --git a/tests/data/sample_ligands/unfiltered/103061392.sdf b/tests/data/sample_ligands/unfiltered/103061392.sdf new file mode 100644 index 0000000..6be5427 --- /dev/null +++ b/tests/data/sample_ligands/unfiltered/103061392.sdf @@ -0,0 +1,36 @@ +103061392 + -OEChem-02242004582D + + 0 0 0 0 0 0 0 0 0999 V2000 +M END +> +103061392 + +> +1 + +> +KEGG + +> +C18210 + +> +polypeptide placental hormone + +> +C18210 +Chorionic somatomammotropin hormone +PL +Placental lactogen + +> +C18210 + +> +http://www.genome.jp/kegg/ + +> +http://www.genome.jp/dbget-bin/www_bget?cpd+C18210 + +$$$$ diff --git a/tests/data/sample_ligands/unfiltered/134970870.sdf b/tests/data/sample_ligands/unfiltered/134970870.sdf new file mode 100644 index 0000000..5d961b2 --- /dev/null +++ b/tests/data/sample_ligands/unfiltered/134970870.sdf @@ -0,0 +1,107 @@ +134970870 + -OEChem-02242004282D + + 12 11 0 1 0 0 0 0 0999 V2000 + 0.2500 -1.3250 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + -0.4625 0.7375 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 0.9625 0.7375 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + -1.1750 -1.3250 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3875 -0.0875 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + -2.6000 -0.5000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 0.2500 -0.5000 0.0000 C 0 0 3 0 0 0 0 0 0 0 0 0 + -0.4625 -0.0875 0.0000 C 0 0 3 0 0 0 0 0 0 0 0 0 + 0.9625 -0.0875 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.1750 -0.5000 0.0000 C 0 0 3 0 0 0 0 0 0 0 0 0 + 1.6750 -0.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.8875 -0.0875 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 7 1 1 1 0 0 0 + 8 2 1 1 0 0 0 + 3 9 2 0 0 0 0 + 10 4 1 6 0 0 0 + 5 11 1 0 0 0 0 + 6 12 1 0 0 0 0 + 7 8 1 0 0 0 0 + 7 9 1 0 0 0 0 + 8 10 1 0 0 0 0 + 9 11 1 0 0 0 0 + 10 12 1 0 0 0 0 +M END +> +0 + +> +0 + +> +134970870 + +> +1 + +> +ChemIDplus + +> +0000057487 + +> +Sweetening Agents + +> +10597-68-9 +149014-33-5 +196419-06-4 +3812-57-5 +57-48-7 +69-67-0 +AI3-23514 +Advantose FS 95 +CCRIS 3335 +D-(-)-Fructose +D-(-)-Levulose +D-Fructose +EINECS 200-333-3 +Fructose +Fructose solution +Fructose, D- +Fructose, pure +Fruit sugar +Furucton +Hi-Fructo 970 +Krystar 300 +Levulose +Nevulose +Sugar, fruit +UNII-6YSS42VSEV +arabino-Hexulose + +> +10597-68-9 +149014-33-5 +196419-06-4 +3812-57-5 +57-48-7 +69-67-0 + +> +0000057487 + +> +http://chem.sis.nlm.nih.gov/chemidplus/ + +> +http://chem.sis.nlm.nih.gov/chemidplus/direct.jsp?result=advanced®no=0000057487 + +> +5984 1 + +> +1 +3 + +> +7 1 5 +8 2 5 +10 4 6 + +$$$$ diff --git a/tests/data/sample_ligands/unfiltered/135191341.sdf b/tests/data/sample_ligands/unfiltered/135191341.sdf new file mode 100644 index 0000000..cf0ee8d --- /dev/null +++ b/tests/data/sample_ligands/unfiltered/135191341.sdf @@ -0,0 +1,105 @@ +135191341 + -OEChem-02242004592D + + 23 23 0 1 0 0 0 0 0999 V2000 + 6.7508 3.8281 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4207 2.5781 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 9.6951 -0.6918 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 3.5032 4.4531 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 1.3382 -1.7969 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 5.6683 1.9531 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 7.9640 0.7100 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 3.5032 0.7031 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 3.5032 -1.7969 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 7.8334 1.9531 0.0000 C 0 0 3 0 0 0 0 0 0 0 0 0 + 6.7508 2.5781 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5858 2.5781 0.0000 C 0 0 3 0 0 0 0 0 0 0 0 0 + 3.5032 1.9531 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 9.1867 0.4501 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 8.9753 2.4615 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4207 0.0781 0.0000 C 0 0 3 0 0 0 0 0 0 0 0 0 + 9.8117 1.5326 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5858 3.8281 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4207 -1.1719 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.3382 0.7031 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.2556 0.0781 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.2556 -1.1719 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.8269 0.7031 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1 11 2 0 0 0 0 + 2 13 2 0 0 0 0 + 3 14 2 0 0 0 0 + 4 18 1 0 0 0 0 + 5 19 2 0 0 0 0 + 6 11 1 0 0 0 0 + 12 6 1 6 0 0 0 + 7 10 1 0 0 0 0 + 7 14 1 0 0 0 0 + 8 13 1 0 0 0 0 + 16 8 1 6 0 0 0 + 9 19 1 0 0 0 0 + 10 11 1 6 0 0 0 + 10 15 1 0 0 0 0 + 12 13 1 0 0 0 0 + 12 18 1 0 0 0 0 + 14 17 1 0 0 0 0 + 15 17 1 0 0 0 0 + 16 19 1 0 0 0 0 + 16 20 1 0 0 0 0 + 20 21 1 0 0 0 0 + 21 22 1 0 0 0 0 + 21 23 1 0 0 0 0 +M END +> +0 + +> +0 + +> +135191341 + +> +1 + +> +ChemIDplus + +> +0073684807 + +> +73684-80-7 +L-Leucinamide, 5-oxo-L-prolyl-L-seryl- +Pyr-ser-leu-NH2 +Pyro-gln-ser-leu-amide +Pyroglutamine-serine-leucinamide +Pyroglutaminyl-seryl-leucinamide +Pyroglutamylserylleucinamide +Thyrotropin releasing hormone-AN +Trh-AN + +> +73684-80-7 + +> +0073684807 + +> +http://chem.sis.nlm.nih.gov/chemidplus/ + +> +http://chem.sis.nlm.nih.gov/chemidplus/direct.jsp?result=advanced®no=0073684807 + +> +173203 1 + +> +1 +3 + +> +10 11 6 +12 6 6 +16 8 6 + +$$$$ diff --git a/tests/data/sample_ligands/unfiltered/135355153.sdf b/tests/data/sample_ligands/unfiltered/135355153.sdf new file mode 100644 index 0000000..83712c0 --- /dev/null +++ b/tests/data/sample_ligands/unfiltered/135355153.sdf @@ -0,0 +1,41 @@ +135355153 + -OEChem-02242004292D + + 0 0 0 0 0 0 0 0 0999 V2000 +M END +> +0 + +> +Deposited Substance is allowed to be autogenerated + +> +135355153 + +> +1 + +> +ChemIDplus + +> +LK41100000 + +> +Natural Product + +> +F II (sugar fraction) +LK41100000 +NIOSH/LK4110000 + +> +LK41100000 + +> +http://chem.sis.nlm.nih.gov/chemidplus/ + +> +http://chem.sis.nlm.nih.gov/chemidplus/direct.jsp?result=advanced®no=LK41100000 + +$$$$ diff --git a/tests/utils/test_docking_utils.py b/tests/utils/test_docking_utils.py index 610e457..4e16e9e 100644 --- a/tests/utils/test_docking_utils.py +++ b/tests/utils/test_docking_utils.py @@ -3,10 +3,15 @@ from api.utils.docking_utils import Receptor, ComplexReceptor, MonomerReceptor from api.utils.docking_utils import Ligand from api.utils.docking_utils import Docker -from api.utils.docking_utils import MonomerDocking, ComplexDocking +from api.utils.docking_utils import Docking, MonomerDocking, ComplexDocking +from api.utils.docking_utils import SDFMapping import os -NOT_IN_BAR = not os.environ.get("BAR") == "true" + +if os.environ.get("BAR") == "None": + NOT_IN_BAR = True +else: + NOT_IN_BAR = False class TestReceptorClasses(unittest.TestCase): @@ -71,6 +76,35 @@ def test_create_complex_receptor(self): self.assertEqual(receptor.monomers_list, ["A", "B"]) self.assertEqual(receptor.line_numbers, [[48, 180], [181, 195]]) + @pytest.mark.skipif(NOT_IN_BAR, reason="Only works on BAR") + def test_create_valid_docking(self): + """Test that the Docking instance is correct.""" + + receptor = "AT4G36360" + ligand = "443454_Gibberellin_A24" + docking = Docker.create_docking(receptor, ligand, "tests/data/") + + self.assertIsInstance(docking, Docking) + + @pytest.mark.skipif(NOT_IN_BAR, reason="Only works on BAR") + def test_create_docking_invalid_receptor(self): + """Test that invalid receptor returns an error message.""" + + receptor = "AT9G99999" + ligand = "443454_Gibberellin_A24" + docking = Docker.create_docking(receptor, ligand, "tests/data/") + + self.assertEqual(docking, "Receptor file not found") + + @pytest.mark.skipif(NOT_IN_BAR, reason="Only works on BAR") + def test_create_docking_invalid_ligand(self): + """Test that invalid ligand returns an error message""" + receptor = "AT4G36360" + ligand = "ABCD" + docking = Docker.create_docking(receptor, ligand, "tests/data/") + + self.assertEqual(docking, "Ligand file not found") + @pytest.mark.skipif(NOT_IN_BAR, reason="Only works on BAR") def test_docking_exists(self): """Test that Docker.create_docking returns None when the docking @@ -129,5 +163,28 @@ def test_docking_monomer_results(self): self.assertIn('6325_Ethylene', normalized_results['AT9G99999_monomer']) +class TestSDFMappingClass(unittest.TestCase): + + def test_create_mapping_filtered(self): + """Test that the correct mapping is returned""" + + mapping_results = SDFMapping.create_mapping_filtered("tests/data/sample_ligands/filtered/", "tests/data/") + correct_mapping = [{"value": "443453_Gibberellin_A15.sdf", "text": "Gibberellin_A15"}, {"value": "5984_D-(-)-Fructose.sdf", "text": "D-(-)-Fructose"}, {"value": "801_Auxin.sdf", "text": "Auxin"}, {"value": "73672_isoxaben.sdf", "text": "isoxaben"}] + self.assertEqual(mapping_results, correct_mapping) + self.assertTrue(os.path.exists("tests/data/sdf_mapping_filtered.json")) + if os.path.exists("tests/data/sdf_mapping_filtered.json"): + os.remove("tests/data/sdf_mapping_filtered.json") + + def test_create_mapping_unfiltered(self): + """Test that the correct mapping is returned""" + mapping = SDFMapping() + mapping_results = mapping.create_mapping_unfiltered("tests/data/sample_ligands/unfiltered/", "tests/data/") + correct_mapping = [{"value": "135355153.sdf", "text": "F II (sugar fraction),LK41100000,NIOSH/LK4110000"}, {"value": "134970870.sdf", "text": "10597-68-9,149014-33-5,196419-06-4,3812-57-5,57-48-7,69-67-0,AI3-23514,Advantose FS 95,CCRIS 3335,D-(-)-Fructose,D-(-)-Levulose,D-Fructose,EINECS 200-333-3,Fructose,Fructose solution,Fructose, D-,Fructose, pure,Fruit sugar,Furucton,Hi-Fructo 970,Krystar 300,Levulose,Nevulose,Sugar, fruit,UNII-6YSS42VSEV,arabino-Hexulose"}, {"value": "103061392.sdf", "text": "C18210,Chorionic somatomammotropin hormone,PL,Placental lactogen"}, {"value": "135191341.sdf", "text": "73684-80-7,L-Leucinamide, 5-oxo-L-prolyl-L-seryl-,Pyr-ser-leu-NH2,Pyro-gln-ser-leu-amide,Pyroglutamine-serine-leucinamide,Pyroglutaminyl-seryl-leucinamide,Pyroglutamylserylleucinamide,Thyrotropin releasing hormone-AN,Trh-AN"}] + self.assertEqual(mapping_results, correct_mapping) + self.assertTrue(os.path.exists("tests/data/sdf_mapping_unfiltered.json")) + if os.path.exists("tests/data/sdf_mapping_unfiltered.json"): + os.remove("tests/data/sdf_mapping_unfiltered.json") + + if __name__ == '__main__': unittest.main() From d7483d3b9a5e3e55fcaa8bdfd43c6c76d10f7f5c Mon Sep 17 00:00:00 2001 From: Dien Nguyen Date: Thu, 4 Apr 2024 13:48:18 -0400 Subject: [PATCH 25/35] Fix tests to skip when not running on the BAR --- tests/utils/test_docking_utils.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/utils/test_docking_utils.py b/tests/utils/test_docking_utils.py index 4e16e9e..983f32f 100644 --- a/tests/utils/test_docking_utils.py +++ b/tests/utils/test_docking_utils.py @@ -8,10 +8,7 @@ import os -if os.environ.get("BAR") == "None": - NOT_IN_BAR = True -else: - NOT_IN_BAR = False +NOT_IN_BAR = not os.environ.get("BAR") == "true" class TestReceptorClasses(unittest.TestCase): From bfea1ac8f639b231d446c898befea07ff0cf5c77 Mon Sep 17 00:00:00 2001 From: Dien Nguyen Date: Thu, 4 Apr 2024 18:04:28 -0400 Subject: [PATCH 26/35] Add skip tags for tests and remove print statements --- api/utils/docking_utils.py | 4 ---- tests/utils/test_docking_utils.py | 2 ++ 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/api/utils/docking_utils.py b/api/utils/docking_utils.py index e2c6f4e..c128aed 100755 --- a/api/utils/docking_utils.py +++ b/api/utils/docking_utils.py @@ -533,9 +533,7 @@ def start(receptor: str, ligand: str, docking_pdb_path: str): normalized residue-energyy dictionary. """ # create docking object - ct = datetime.datetime.now() ct_string = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") - print("Starting the docking process at {}".format(ct)) docking = Docker.create_docking(receptor, ligand, docking_pdb_path) if isinstance(docking, list): # receptor = receptor.split('.')[0] @@ -618,7 +616,6 @@ def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str): stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) - print("return code" + str(completed_process.returncode)) if completed_process.returncode != 0: return "Receptor file not found" receptor_file = completed_process.stdout[:-1] @@ -627,7 +624,6 @@ def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str): receptor_name = receptor_file[4:(receptor_file.index('.') + 2)] results_path = docking_pdb_path + receptor_name + '_' + ligand_name + '/' - print(results_path) if os.path.exists(results_path): print("The docking between {0} and {1} has already been done.".format(receptor_name, diff --git a/tests/utils/test_docking_utils.py b/tests/utils/test_docking_utils.py index 983f32f..d5e0d6c 100644 --- a/tests/utils/test_docking_utils.py +++ b/tests/utils/test_docking_utils.py @@ -162,6 +162,7 @@ def test_docking_monomer_results(self): class TestSDFMappingClass(unittest.TestCase): + @pytest.mark.skipif(NOT_IN_BAR, reason="Only works on BAR") def test_create_mapping_filtered(self): """Test that the correct mapping is returned""" @@ -172,6 +173,7 @@ def test_create_mapping_filtered(self): if os.path.exists("tests/data/sdf_mapping_filtered.json"): os.remove("tests/data/sdf_mapping_filtered.json") + @pytest.mark.skipif(NOT_IN_BAR, reason="Only works on BAR") def test_create_mapping_unfiltered(self): """Test that the correct mapping is returned""" mapping = SDFMapping() From c2ec1cd9ec6a19a13af6cfc01d9bf222d9c244f5 Mon Sep 17 00:00:00 2001 From: asherpasha Date: Thu, 25 Apr 2024 19:54:52 -0400 Subject: [PATCH 27/35] Linting... --- api/resources/fastpheno.py | 1 + api/resources/sequence.py | 1 + api/utils/docking_utils.py | 161 ++++++++++++++++-------------- tests/utils/test_docking_utils.py | 40 +++++--- 4 files changed, 116 insertions(+), 87 deletions(-) diff --git a/api/resources/fastpheno.py b/api/resources/fastpheno.py index 81701f5..7a9b892 100644 --- a/api/resources/fastpheno.py +++ b/api/resources/fastpheno.py @@ -3,6 +3,7 @@ Author: Vince L Fastpheno endpoint for retrieving tree data """ + from flask_restx import Namespace, Resource from api import db from api.models.fastpheno import Sites, Trees, Band, Height diff --git a/api/resources/sequence.py b/api/resources/sequence.py index 8496669..9b802f5 100644 --- a/api/resources/sequence.py +++ b/api/resources/sequence.py @@ -4,6 +4,7 @@ Sequence endpoint that returns the amino acid sequence of a given protein, with additional options for predicted sequences (Phyre2) that we host """ + from flask_restx import Namespace, Resource from api.utils.bar_utils import BARUtils from markupsafe import escape diff --git a/api/utils/docking_utils.py b/api/utils/docking_utils.py index c128aed..f8cc951 100755 --- a/api/utils/docking_utils.py +++ b/api/utils/docking_utils.py @@ -8,7 +8,7 @@ import json import datetime -HEX_BIN_PATH = '/usr/local/bin/hex/bin/hex' +HEX_BIN_PATH = "/usr/local/bin/hex/bin/hex" class Receptor(ABC): @@ -18,6 +18,7 @@ class Receptor(ABC): name (str): the name of the receptor file_path (str): the relative path to the receptors pdb file """ + @abstractmethod def __init__(self, name: str, file_path: str): self.name = name @@ -25,13 +26,14 @@ def __init__(self, name: str, file_path: str): class MonomerReceptor(Receptor): - """ A class that represents a receptor that is a monomer, meaning it consists + """A class that represents a receptor that is a monomer, meaning it consists of only one chain. --- Attributes --- name (str): the name of the receptor file_path (str): the relative path to the receptors pdb file """ + name: str file_path: str @@ -40,7 +42,7 @@ def __init__(self, name, file_path): class ComplexReceptor(Receptor): - """ A class that represents a receptor that is a complex, meaning it consists + """A class that represents a receptor that is a complex, meaning it consists of more than one chain. --- Attributes --- @@ -49,6 +51,7 @@ class ComplexReceptor(Receptor): monomer_list (List[str]): the list of monomers that make up the complex line_numbers (List[List[int]]): the list of line numbers that separate the monomers, e.g. [[100,200],[300,500]] """ + def __init__(self, name: str, file_path: str, monomers_list: List[str]): super().__init__(name, file_path) self.monomers_list = monomers_list @@ -65,12 +68,12 @@ def separate_monomers(self): line = file.readline() prev = None curr_line = 0 - while line != '': + while line != "": # the first line of the first monomer if line[:12] == "ATOM 1 ": prev = curr_line - 1 # the last line of a monomer - elif line[:3] == 'TER': + elif line[:3] == "TER": # line_numbers.append(curr_line) line_numbers.append([prev + 1, curr_line]) prev = curr_line @@ -87,6 +90,7 @@ class Ligand: name (str): the name of the receptor file_path (str): the relative path to the receptors pdb file """ + def __init__(self, name: str, file_path: str): self.name = name self.file_path = file_path @@ -112,15 +116,19 @@ def __init__(self, receptor: Receptor, ligand: Ligand, results_path: str): self.ligand_reserved_list = [] def hex_docking(self): - """Run hex docking using the command line. - """ - hex_output_file = open(self.results_path + 'hex_output.txt', "w") - - # Function to call Hex, including hard coded settings - - # max_docking_solutions set at 5 for testing - hex_command = """ open_receptor """ + self.receptor.file_path + """ - open_ligand """ + self.ligand.file_path + """ + """Run hex docking using the command line.""" + hex_output_file = open(self.results_path + "hex_output.txt", "w") + + # Function to call Hex, including hard coded settings + + # max_docking_solutions set at 5 for testing + hex_command = ( + """ open_receptor """ + + self.receptor.file_path + + """ + open_ligand """ + + self.ligand.file_path + + """ docking_correlation 1 docking_score_threshold 0 max_docking_solutions 25 @@ -131,12 +139,13 @@ def hex_docking(self): receptor_origin C-825:VAL-O commit_edits activate_docking - save_range 1 100 """ \ - + self.results_path + """ %s pdb""" % (self.receptor.name + '_' + self.ligand.name) - subprocess.Popen(HEX_BIN_PATH, - stdin=subprocess.PIPE, - stderr=subprocess.STDOUT, - stdout=hex_output_file).communicate(bytes(hex_command.encode('utf-8'))) + save_range 1 100 """ + + self.results_path + + """ %s pdb""" % (self.receptor.name + "_" + self.ligand.name) + ) + subprocess.Popen( + HEX_BIN_PATH, stdin=subprocess.PIPE, stderr=subprocess.STDOUT, stdout=hex_output_file + ).communicate(bytes(hex_command.encode("utf-8"))) hex_output_file.close() ct = datetime.datetime.now() print("current time:-", ct) @@ -152,7 +161,7 @@ def crte_ligand_reserved_attr(self): """ line_numbers = [] for filename in os.listdir(self.results_path): - if filename[-3:] == 'pdb': + if filename[-3:] == "pdb": file = open(self.results_path + filename, "r") lines = file.readlines() for i in range(len(lines)): @@ -167,7 +176,7 @@ def parse_hex_output(self): where its value is the total number of solutions. For example: {num_soln : 5, 1 : [2, 4], 2 : [1, 3, 5]} """ - hex_output = open(self.results_path + 'hex_output.txt', "r") + hex_output = open(self.results_path + "hex_output.txt", "r") lines = hex_output.readlines() # line number where the clustering starts and ends result_start = 0 @@ -219,10 +228,10 @@ def result_dict_generator(self, monomer_number, threshold): reference = {} for line in receptor_file_lines: splitted_line = line.split() - if line[0:4] == 'ATOM': + if line[0:4] == "ATOM": # check if chain name and residue are in the same column, e.g. A1000 - if re.search(r'\d', splitted_line[4]) is None: + if re.search(r"\d", splitted_line[4]) is None: residue = splitted_line[5] else: residue = splitted_line[4][1:] @@ -239,7 +248,7 @@ def result_dict_generator(self, monomer_number, threshold): if int(residue) in reference: reference[int(residue)][int(splitted_line[1])] = tuple(coord) else: - reference[int(residue)] = {int(splitted_line[1]) : tuple(coord)} + reference[int(residue)] = {int(splitted_line[1]): tuple(coord)} # here, the structure of the reference dict is is {residue: {atom_num :(x, y, z)}}, @@ -247,7 +256,7 @@ def result_dict_generator(self, monomer_number, threshold): ac = {} result_list = [] for filename in os.listdir(self.results_path): - if filename[-3:] == 'pdb': + if filename[-3:] == "pdb": result_list.append(filename) lowest_en = None # to keep track of lowest energy @@ -256,10 +265,10 @@ def result_dict_generator(self, monomer_number, threshold): cluster_dict = self.parse_hex_output() for i in range(len(result_list)): - energy = '' + energy = "" # get the ligand_reserved section of the result file - file = open(self.results_path + result_list[i], 'r') + file = open(self.results_path + result_list[i], "r") ligand_reserved_start = self.ligand_reserved_list[i] ligand_reserved_section = file.readlines()[ligand_reserved_start:] @@ -267,18 +276,18 @@ def result_dict_generator(self, monomer_number, threshold): residue_set = set() coor = [] for line in ligand_reserved_section: - if 'REMARK' in line.split(' ') and 'Energy' in line.split(' '): + if "REMARK" in line.split(" ") and "Energy" in line.split(" "): cluster_size = len(cluster_dict[i + 1]) - total_solutions = cluster_dict['num_soln'] + total_solutions = cluster_dict["num_soln"] # energy is weighed according to the number of solutions # in that cluster - energy = ((float(line.split(' ')[6][:-1]))/total_solutions) * cluster_size + energy = ((float(line.split(" ")[6][:-1])) / total_solutions) * cluster_size # record values if lowest energy if lowest_en is None or energy < lowest_en: lowest_en = energy - elif line[:4] == 'ATOM': + elif line[:4] == "ATOM": # coordinates of one atom coordinates = tuple(map(float, filter(None, line.split()[6:9]))) coor.append(coordinates) @@ -292,9 +301,15 @@ def result_dict_generator(self, monomer_number, threshold): for aa in reference[res].keys(): # for each atom of that amino acid # check if the distance between atoms of the ligands # and of the amino acid are lower than chosen threshold (5) - distance = math.sqrt(sum([(reference[res][aa][0] - atom[0]) ** 2, - (reference[res][aa][1] - atom[1]) ** 2, - (reference[res][aa][2] - atom[2]) ** 2])) + distance = math.sqrt( + sum( + [ + (reference[res][aa][0] - atom[0]) ** 2, + (reference[res][aa][1] - atom[1]) ** 2, + (reference[res][aa][2] - atom[2]) ** 2, + ] + ) + ) distances.append(distance) @@ -347,7 +362,7 @@ def best_result(self): pass def crte_receptor_dict(self, threshold): - """"Return a dictionary that contains the residue-energy + """ "Return a dictionary that contains the residue-energy dictionary of the monomer. This is not necessary, but maintains consistency between monomer and complex receptor dictionaries. """ @@ -435,14 +450,14 @@ def separate_results(self): line = result_file.readline() curr_line = 0 prev = None - while line != '': + while line != "": # the start of the first chain if line.split()[0] == "ATOM" and line.split()[1] == "1": # if line.startswith('ATOM 1 '): prev = curr_line - 1 # the end of a chain - elif line[0:3] == 'TER': + elif line[0:3] == "TER": line_numbers.append([prev + 1, curr_line]) prev = curr_line @@ -462,7 +477,7 @@ def crte_receptor_dict(self, threshold): ligand_res = {} res_dict = self.result_dict_generator(i, threshold) ligand_res[self.ligand.name] = res_dict - all_monomers.append({self.receptor.name + '_' + self.receptor.monomers_list[i] : ligand_res}) + all_monomers.append({self.receptor.name + "_" + self.receptor.monomers_list[i]: ligand_res}) return all_monomers def normalize_results(self, threshold): @@ -547,7 +562,7 @@ def start(receptor: str, ligand: str, docking_pdb_path: str): elif docking == "Ligand file not found": return "Ligand file not found" - results_path = docking_pdb_path + docking.receptor.name + '_' + ligand + '/' + results_path = docking_pdb_path + docking.receptor.name + "_" + ligand + "/" # create folder to store docking results os.makedirs(results_path) @@ -559,11 +574,11 @@ def start(receptor: str, ligand: str, docking_pdb_path: str): normalized_results = docking.normalize_results(5) final_json = {} final_json["energies_json"] = normalized_results - final_json["path"] = '//bar.utoronto.ca/HEX_RESULTS/' + docking.receptor.name + '_' + ligand + '/' - final_json["best_HEX_result_path"] = final_json["path"] + docking.receptor.name + '_' + ligand + '0001.pdb' + final_json["path"] = "//bar.utoronto.ca/HEX_RESULTS/" + docking.receptor.name + "_" + ligand + "/" + final_json["best_HEX_result_path"] = final_json["path"] + docking.receptor.name + "_" + ligand + "0001.pdb" final_json["date"] = ct_string new_json = docking.results_path + "final.json" - with open(new_json, 'w') as file: + with open(new_json, "w") as file: file.write(json.dumps(final_json)) print("current time:-", datetime.datetime.now()) return final_json @@ -575,27 +590,24 @@ def create_receptor(receptor_name: str, receptor_file_path: str): with open(receptor_file_path) as f: is_monomer = True for line in f.readlines(): - if re.match(r'COMPND \d CHAIN: \w, \w*', line) is not None: + if re.match(r"COMPND \d CHAIN: \w, \w*", line) is not None: is_monomer = False # if the receptor would be a monomer the regex would be # r'COMPND \d CHAIN: \w;' # To make a list of the monomers' labels - print(receptor_name + ' identified as a protein complex') - if line[11:16] == 'CHAIN': - monomers_list = line.split(': ')[-1].split(', ') + print(receptor_name + " identified as a protein complex") + if line[11:16] == "CHAIN": + monomers_list = line.split(": ")[-1].split(", ") # The COMPND line ends with ';' therefore it needs to be # removed from the last label monomers_list[-1] = monomers_list[-1][0] - new_receptor = ComplexReceptor(receptor_name, - receptor_file_path, - monomers_list) + new_receptor = ComplexReceptor(receptor_name, receptor_file_path, monomers_list) return new_receptor print("Unknown pdb structure, need further investigation") if is_monomer: - new_receptor = MonomerReceptor(receptor_name, - receptor_file_path) + new_receptor = MonomerReceptor(receptor_name, receptor_file_path) return new_receptor def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str): @@ -607,40 +619,39 @@ def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str): # check that the docking combination has not been run before # results_path = docking_pdb_path + 'RESULTS/' + receptor_name + '_' + ligand_name + '/' - if '.' in receptor_name: - receptor_name = receptor_name[:receptor_name.index('.')] - command = ['ls ' + 'AF2_' + receptor_name + '*.pdb'] - completed_process = subprocess.run(command, - shell=True, - cwd=receptor_folder, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True) + if "." in receptor_name: + receptor_name = receptor_name[: receptor_name.index(".")] + command = ["ls " + "AF2_" + receptor_name + "*.pdb"] + completed_process = subprocess.run( + command, shell=True, cwd=receptor_folder, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True + ) if completed_process.returncode != 0: return "Receptor file not found" receptor_file = completed_process.stdout[:-1] receptor_file_path = receptor_folder + receptor_file - receptor_name = receptor_file[4:(receptor_file.index('.') + 2)] + receptor_name = receptor_file[4 : (receptor_file.index(".") + 2)] - results_path = docking_pdb_path + receptor_name + '_' + ligand_name + '/' + results_path = docking_pdb_path + receptor_name + "_" + ligand_name + "/" if os.path.exists(results_path): - print("The docking between {0} and {1} has already been done.".format(receptor_name, - ligand_name)) + print("The docking between {0} and {1} has already been done.".format(receptor_name, ligand_name)) return [None, results_path] receptor = Docker.create_receptor(receptor_name, receptor_file_path) # find ligand file and create ligand object - ligand_folder = '/DATA/HEX_API/HEX_SELECTED_LIGANDS/' + ligand_folder = "/DATA/HEX_API/HEX_SELECTED_LIGANDS/" ligand_file_found = False for ligand_file in os.listdir(ligand_folder): - if ligand_file[0] != '.' and len(ligand_file.split('.')) == 2 and \ - ligand_file.split('.')[1] == 'sdf' and \ - ligand_file[:-4].lower() == ligand_name.lower(): + if ( + ligand_file[0] != "." + and len(ligand_file.split(".")) == 2 + and ligand_file.split(".")[1] == "sdf" + and ligand_file[:-4].lower() == ligand_name.lower() + ): ligand_file_found = True - ligand_file_path = ligand_folder + '/' + ligand_file + ligand_file_path = ligand_folder + "/" + ligand_file ligand = Ligand(ligand_name, ligand_file_path) if not ligand_file_found: @@ -697,10 +708,10 @@ def create_mapping_filtered(folder_path: str, results_path: str): sdf_files = os.listdir(folder_path) for file in sdf_files: if file[0] != "." and file[-4:] == ".sdf": - name = file[file.index("_") + 1:-4] - mapped_sdf.append({'value': file, 'text': name}) + name = file[file.index("_") + 1 : -4] + mapped_sdf.append({"value": file, "text": name}) json_file = results_path + "sdf_mapping_filtered.json" - with open(json_file, 'w') as file: + with open(json_file, "w") as file: file.write(json.dumps(mapped_sdf)) return mapped_sdf @@ -720,8 +731,8 @@ def create_mapping_unfiltered(self, folder_path: str, results_path: str): if file[0] != "." and file[-4:] == ".sdf": names = self.get_substance_name(file, folder_path) all_names = ",".join(names) - mapped_sdf.append({'value': file, 'text': all_names}) + mapped_sdf.append({"value": file, "text": all_names}) json_file = results_path + "sdf_mapping_unfiltered.json" - with open(json_file, 'w') as file: + with open(json_file, "w") as file: file.write(json.dumps(mapped_sdf)) return mapped_sdf diff --git a/tests/utils/test_docking_utils.py b/tests/utils/test_docking_utils.py index d5e0d6c..7caf703 100644 --- a/tests/utils/test_docking_utils.py +++ b/tests/utils/test_docking_utils.py @@ -27,9 +27,9 @@ def test_complex_receptor_init(self): """ monomers_list = ["A", "B"] - complex_receptor = ComplexReceptor("test_complex_receptor", - "tests/data/AF2_AT8G88888_complex.pdb", - monomers_list) + complex_receptor = ComplexReceptor( + "test_complex_receptor", "tests/data/AF2_AT8G88888_complex.pdb", monomers_list + ) self.assertEqual(complex_receptor.name, "test_complex_receptor") self.assertEqual(complex_receptor.file_path, "tests/data/AF2_AT8G88888_complex.pdb") self.assertEqual(complex_receptor.monomers_list, monomers_list) @@ -134,10 +134,10 @@ def test_docking_complex_results(self): self.assertIsInstance(normalized_results, dict) self.assertIsNot(len(normalized_results), 0) - self.assertIn('AT8G88888_complex_A', normalized_results) - self.assertIn('AT8G88888_complex_B', normalized_results) - self.assertIn('6325_Ethylene', normalized_results['AT8G88888_complex_A']) - self.assertIn('6325_Ethylene', normalized_results['AT8G88888_complex_B']) + self.assertIn("AT8G88888_complex_A", normalized_results) + self.assertIn("AT8G88888_complex_B", normalized_results) + self.assertIn("6325_Ethylene", normalized_results["AT8G88888_complex_A"]) + self.assertIn("6325_Ethylene", normalized_results["AT8G88888_complex_B"]) def test_docking_monomer_results(self): """Test that correct dictionary is created in normalized_results for @@ -156,8 +156,8 @@ def test_docking_monomer_results(self): self.assertIsInstance(normalized_results, dict) self.assertIsNot(len(normalized_results), 0) - self.assertIn('AT9G99999_monomer', normalized_results) - self.assertIn('6325_Ethylene', normalized_results['AT9G99999_monomer']) + self.assertIn("AT9G99999_monomer", normalized_results) + self.assertIn("6325_Ethylene", normalized_results["AT9G99999_monomer"]) class TestSDFMappingClass(unittest.TestCase): @@ -167,7 +167,12 @@ def test_create_mapping_filtered(self): """Test that the correct mapping is returned""" mapping_results = SDFMapping.create_mapping_filtered("tests/data/sample_ligands/filtered/", "tests/data/") - correct_mapping = [{"value": "443453_Gibberellin_A15.sdf", "text": "Gibberellin_A15"}, {"value": "5984_D-(-)-Fructose.sdf", "text": "D-(-)-Fructose"}, {"value": "801_Auxin.sdf", "text": "Auxin"}, {"value": "73672_isoxaben.sdf", "text": "isoxaben"}] + correct_mapping = [ + {"value": "443453_Gibberellin_A15.sdf", "text": "Gibberellin_A15"}, + {"value": "5984_D-(-)-Fructose.sdf", "text": "D-(-)-Fructose"}, + {"value": "801_Auxin.sdf", "text": "Auxin"}, + {"value": "73672_isoxaben.sdf", "text": "isoxaben"}, + ] self.assertEqual(mapping_results, correct_mapping) self.assertTrue(os.path.exists("tests/data/sdf_mapping_filtered.json")) if os.path.exists("tests/data/sdf_mapping_filtered.json"): @@ -178,12 +183,23 @@ def test_create_mapping_unfiltered(self): """Test that the correct mapping is returned""" mapping = SDFMapping() mapping_results = mapping.create_mapping_unfiltered("tests/data/sample_ligands/unfiltered/", "tests/data/") - correct_mapping = [{"value": "135355153.sdf", "text": "F II (sugar fraction),LK41100000,NIOSH/LK4110000"}, {"value": "134970870.sdf", "text": "10597-68-9,149014-33-5,196419-06-4,3812-57-5,57-48-7,69-67-0,AI3-23514,Advantose FS 95,CCRIS 3335,D-(-)-Fructose,D-(-)-Levulose,D-Fructose,EINECS 200-333-3,Fructose,Fructose solution,Fructose, D-,Fructose, pure,Fruit sugar,Furucton,Hi-Fructo 970,Krystar 300,Levulose,Nevulose,Sugar, fruit,UNII-6YSS42VSEV,arabino-Hexulose"}, {"value": "103061392.sdf", "text": "C18210,Chorionic somatomammotropin hormone,PL,Placental lactogen"}, {"value": "135191341.sdf", "text": "73684-80-7,L-Leucinamide, 5-oxo-L-prolyl-L-seryl-,Pyr-ser-leu-NH2,Pyro-gln-ser-leu-amide,Pyroglutamine-serine-leucinamide,Pyroglutaminyl-seryl-leucinamide,Pyroglutamylserylleucinamide,Thyrotropin releasing hormone-AN,Trh-AN"}] + correct_mapping = [ + {"value": "135355153.sdf", "text": "F II (sugar fraction),LK41100000,NIOSH/LK4110000"}, + { + "value": "134970870.sdf", + "text": "10597-68-9,149014-33-5,196419-06-4,3812-57-5,57-48-7,69-67-0,AI3-23514,Advantose FS 95,CCRIS 3335,D-(-)-Fructose,D-(-)-Levulose,D-Fructose,EINECS 200-333-3,Fructose,Fructose solution,Fructose, D-,Fructose, pure,Fruit sugar,Furucton,Hi-Fructo 970,Krystar 300,Levulose,Nevulose,Sugar, fruit,UNII-6YSS42VSEV,arabino-Hexulose", + }, + {"value": "103061392.sdf", "text": "C18210,Chorionic somatomammotropin hormone,PL,Placental lactogen"}, + { + "value": "135191341.sdf", + "text": "73684-80-7,L-Leucinamide, 5-oxo-L-prolyl-L-seryl-,Pyr-ser-leu-NH2,Pyro-gln-ser-leu-amide,Pyroglutamine-serine-leucinamide,Pyroglutaminyl-seryl-leucinamide,Pyroglutamylserylleucinamide,Thyrotropin releasing hormone-AN,Trh-AN", + }, + ] self.assertEqual(mapping_results, correct_mapping) self.assertTrue(os.path.exists("tests/data/sdf_mapping_unfiltered.json")) if os.path.exists("tests/data/sdf_mapping_unfiltered.json"): os.remove("tests/data/sdf_mapping_unfiltered.json") -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() From e2b8045dce61b14b8887c1ebb361e819915533b4 Mon Sep 17 00:00:00 2001 From: asherpasha Date: Thu, 25 Apr 2024 20:13:21 -0400 Subject: [PATCH 28/35] Minor updates. --- api/utils/docking_utils.py | 15 ++++++++++++--- tests/utils/test_docking_utils.py | 4 ---- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/api/utils/docking_utils.py b/api/utils/docking_utils.py index f8cc951..024be5f 100755 --- a/api/utils/docking_utils.py +++ b/api/utils/docking_utils.py @@ -148,8 +148,6 @@ def hex_docking(self): ).communicate(bytes(hex_command.encode("utf-8"))) hex_output_file.close() ct = datetime.datetime.now() - print("current time:-", ct) - print("Hex docking completed") def crte_ligand_reserved_attr(self): """This function populates the Docking instance's ligand_reserved_list attribute @@ -160,14 +158,17 @@ def crte_ligand_reserved_attr(self): it begins at line 1499, and so on ... """ line_numbers = [] + for filename in os.listdir(self.results_path): if filename[-3:] == "pdb": file = open(self.results_path + filename, "r") lines = file.readlines() + for i in range(len(lines)): if "Docked ligand coordinates..." in lines[i]: line_numbers.append(i) break + self.ligand_reserved_list = line_numbers def parse_hex_output(self): @@ -181,28 +182,35 @@ def parse_hex_output(self): # line number where the clustering starts and ends result_start = 0 result_end = 0 + for i in range(len(lines)): splitted_line = lines[i].split(" ") if len(splitted_line) > 8 and splitted_line[0] == "Clst": result_start = i + 2 if len(splitted_line) > 2 and "save_range" in splitted_line: result_end = i - 2 + clustering_lines = lines[result_start:result_end] clusters = {} clusters["num_soln"] = len(clustering_lines) + for line in clustering_lines: cleaned_line = line.strip().split(" ") res = [] + # only keep non-blank items in line for ch in cleaned_line: if ch != "": res.append(ch) + clst = int(res[0]) sln = int(res[1]) + if clst not in clusters: clusters[clst] = [sln] else: clusters[clst].append(sln) + return clusters def result_dict_generator(self, monomer_number, threshold): @@ -228,8 +236,8 @@ def result_dict_generator(self, monomer_number, threshold): reference = {} for line in receptor_file_lines: splitted_line = line.split() - if line[0:4] == "ATOM": + if line[0:4] == "ATOM": # check if chain name and residue are in the same column, e.g. A1000 if re.search(r"\d", splitted_line[4]) is None: residue = splitted_line[5] @@ -255,6 +263,7 @@ def result_dict_generator(self, monomer_number, threshold): # The energy for each reference element will be stored in dictionary 'ac' ac = {} result_list = [] + for filename in os.listdir(self.results_path): if filename[-3:] == "pdb": result_list.append(filename) diff --git a/tests/utils/test_docking_utils.py b/tests/utils/test_docking_utils.py index 7caf703..22b8f8d 100644 --- a/tests/utils/test_docking_utils.py +++ b/tests/utils/test_docking_utils.py @@ -199,7 +199,3 @@ def test_create_mapping_unfiltered(self): self.assertTrue(os.path.exists("tests/data/sdf_mapping_unfiltered.json")) if os.path.exists("tests/data/sdf_mapping_unfiltered.json"): os.remove("tests/data/sdf_mapping_unfiltered.json") - - -if __name__ == "__main__": - unittest.main() From 2c4bace4a7ce5706b9241f3a6d43116279f42b99 Mon Sep 17 00:00:00 2001 From: asherpasha Date: Thu, 25 Apr 2024 20:20:00 -0400 Subject: [PATCH 29/35] Should work this time. --- api/utils/docking_utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/api/utils/docking_utils.py b/api/utils/docking_utils.py index 024be5f..68987ff 100755 --- a/api/utils/docking_utils.py +++ b/api/utils/docking_utils.py @@ -148,6 +148,8 @@ def hex_docking(self): ).communicate(bytes(hex_command.encode("utf-8"))) hex_output_file.close() ct = datetime.datetime.now() + print("current time:-", ct) + print("Hex docking completed") def crte_ligand_reserved_attr(self): """This function populates the Docking instance's ligand_reserved_list attribute From 37a3a040b75dee39e2c0f069c370b3a65690d80f Mon Sep 17 00:00:00 2001 From: asherpasha Date: Thu, 25 Apr 2024 20:26:03 -0400 Subject: [PATCH 30/35] Updated dependancies. --- requirements.txt | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/requirements.txt b/requirements.txt index 2af6bf4..3de5643 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,31 +1,31 @@ aniso8601==9.0.1 async-timeout==4.0.3 attrs==23.2.0 -black==24.3.0 +black==24.4.1 blinker==1.7.0 cachelib==0.9.0 certifi==2024.2.2 charset-normalizer==3.3.2 click==8.1.7 -coverage==7.4.4 +coverage==7.5.0 Deprecated==1.2.14 flake8==7.0.0 -Flask==3.0.2 +Flask==3.0.3 Flask-Caching==2.1.0 Flask-Cors==4.0.0 -Flask-Limiter==3.5.1 -flask-marshmallow==1.2.0 +Flask-Limiter==3.6.0 +flask-marshmallow==1.2.1 flask-restx==1.3.0 Flask-SQLAlchemy==3.1.1 greenlet==3.0.3 -idna==3.6 -importlib_resources==6.3.1 +idna==3.7 +importlib_resources==6.4.0 iniconfig==2.0.0 -itsdangerous==2.1.2 +itsdangerous==2.2.0 Jinja2==3.1.3 jsonschema==4.21.1 jsonschema-specifications==2023.12.1 -limits==3.10.1 +limits==3.11.0 markdown-it-py==3.0.0 MarkupSafe==2.1.5 marshmallow==3.21.1 @@ -36,8 +36,8 @@ mysqlclient==2.2.4 ordered-set==4.1.0 packaging==24.0 pathspec==0.12.1 -platformdirs==4.2.0 -pluggy==1.4.0 +platformdirs==4.2.1 +pluggy==1.5.0 pycodestyle==2.11.1 pyflakes==3.2.0 Pygments==2.17.2 @@ -45,14 +45,14 @@ pyrsistent==0.20.0 pytest==8.1.1 python-dateutil==2.9.0.post0 pytz==2024.1 -redis==5.0.3 -referencing==0.34.0 +redis==5.0.4 +referencing==0.35.0 requests==2.31.0 rich==13.7.1 rpds-py==0.18.0 six==1.16.0 -SQLAlchemy==2.0.28 -typing_extensions==4.10.0 +SQLAlchemy==2.0.29 +typing_extensions==4.11.0 urllib3==2.2.1 -Werkzeug==3.0.1 +Werkzeug==3.0.2 wrapt==1.16.0 From 114b5e43331cd8d3feff50d416d213ad277b9e9d Mon Sep 17 00:00:00 2001 From: asherpasha Date: Thu, 25 Apr 2024 20:31:56 -0400 Subject: [PATCH 31/35] Update GitHub Actions and Docker stuff (not tested). --- .github/workflows/bar-api.yml | 2 +- docker-compose.yml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/bar-api.yml b/.github/workflows/bar-api.yml index abd298e..ddc8dfb 100644 --- a/.github/workflows/bar-api.yml +++ b/.github/workflows/bar-api.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-22.04 strategy: matrix: - python-version: [3.8, 3.9, 3.10.13, 3.11, 3.12] + python-version: [3.8, 3.9, 3.10.14, 3.11, 3.12] services: redis: diff --git a/docker-compose.yml b/docker-compose.yml index a2fea40..e723e92 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,7 +3,7 @@ version: "3.7" services: mysqldb: - image: mysql:8.1.0 + image: mysql:8.3.0 container_name: BAR_mysqldb # Must use this for mariadb client to connect command: --default-authentication-plugin=mysql_native_password @@ -12,7 +12,7 @@ services: - MYSQL_ROOT_PASSWORD=root redis: - image: redis:7.2.1 + image: redis:7.2.4 container_name: BAR_redis restart: always ports: From 02e644696bfba477ab4ea9e844da3808a0cfc339 Mon Sep 17 00:00:00 2001 From: asherpasha Date: Thu, 25 Apr 2024 20:46:07 -0400 Subject: [PATCH 32/35] Working on readthedocs. --- docs/source/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 96e422b..735e5c9 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -18,7 +18,7 @@ # -- Project information ----------------------------------------------------- project = "BAR API" -copyright = "2023, BAR Developers" +copyright = "2024, BAR Developers" author = "BAR Developers" # The full version, including alpha/beta/rc tags From f16eb4e7cb8540bb0e96fc5540564d5e016afdb3 Mon Sep 17 00:00:00 2001 From: asherpasha Date: Thu, 25 Apr 2024 20:47:21 -0400 Subject: [PATCH 33/35] Added readthedocs.yaml --- .readthedocs.yaml | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 .readthedocs.yaml diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 0000000..789ff29 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,32 @@ +# Read the Docs configuration file for Sphinx projects +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Set the OS, Python version and other tools you might need +build: + os: ubuntu-22.04 + tools: + python: "3.12" + +# Build documentation in the "docs/" directory with Sphinx +sphinx: + configuration: docs/source/conf.py + # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs + # builder: "dirhtml" + # Fail on all warnings to avoid broken references + fail_on_warning: true + +# Optionally build your docs in additional formats such as PDF and ePub +formats: + - pdf + +# Optional but recommended, declare the Python requirements required +# to build your documentation +# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html +python: + install: + - requirements: docs/requirements.txt + - method: pip + path: . From 50bacc68eaa26fc86a4af5747ce505fdb05c9f85 Mon Sep 17 00:00:00 2001 From: asherpasha Date: Thu, 25 Apr 2024 20:59:07 -0400 Subject: [PATCH 34/35] Working on documentation. --- .readthedocs.yaml | 3 +-- docs/requirements.txt | 42 +++++++++++++++++++++--------------------- 2 files changed, 22 insertions(+), 23 deletions(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 789ff29..98f605d 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -21,6 +21,7 @@ sphinx: # Optionally build your docs in additional formats such as PDF and ePub formats: - pdf + - epub # Optional but recommended, declare the Python requirements required # to build your documentation @@ -28,5 +29,3 @@ formats: python: install: - requirements: docs/requirements.txt - - method: pip - path: . diff --git a/docs/requirements.txt b/docs/requirements.txt index 91f7292..8b37131 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,27 +1,27 @@ -alabaster==0.7.13 -Babel==2.12.1 -beautifulsoup4==4.12.2 -certifi==2023.7.22 -charset-normalizer==3.2.0 -docutils==0.20.1 -furo==2023.8.19 -idna==3.4 +alabaster==0.7.16 +Babel==2.14.0 +beautifulsoup4==4.12.3 +certifi==2024.2.2 +charset-normalizer==3.3.2 +docutils==0.21.2 +furo==2024.1.29 +idna==3.7 imagesize==1.4.1 -Jinja2==3.1.2 -MarkupSafe==2.1.3 -packaging==23.1 -Pygments==2.16.1 -pytz==2023.3 +Jinja2==3.1.3 +MarkupSafe==2.1.5 +packaging==24.0 +Pygments==2.17.2 +pytz==2024.1 requests==2.31.0 snowballstemmer==2.2.0 -soupsieve==2.4.1 -Sphinx==7.2.4 +soupsieve==2.5 +Sphinx==7.3.7 sphinx-basic-ng==1.0.0b1 sphinx-copybutton==0.5.2 -sphinxcontrib-applehelp==1.0.7 -sphinxcontrib-devhelp==1.0.5 -sphinxcontrib-htmlhelp==2.0.4 +sphinxcontrib-applehelp==1.0.8 +sphinxcontrib-devhelp==1.0.6 +sphinxcontrib-htmlhelp==2.0.5 sphinxcontrib-jsmath==1.0.1 -sphinxcontrib-qthelp==1.0.6 -sphinxcontrib-serializinghtml==1.1.9 -urllib3==2.0.4 +sphinxcontrib-qthelp==1.0.7 +sphinxcontrib-serializinghtml==1.1.10 +urllib3==2.2.1 From 1a9ac7c556955d7e4972de8b8b39581db9db2246 Mon Sep 17 00:00:00 2001 From: asherpasha Date: Thu, 25 Apr 2024 21:01:16 -0400 Subject: [PATCH 35/35] Don't fail on warning. --- .readthedocs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 98f605d..a7ebce6 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -16,7 +16,7 @@ sphinx: # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs # builder: "dirhtml" # Fail on all warnings to avoid broken references - fail_on_warning: true + # fail_on_warning: true # Optionally build your docs in additional formats such as PDF and ePub formats: