From ce0435525bf4cbe2c35935a64a00957956350d53 Mon Sep 17 00:00:00 2001
From: Dien Nguyen <diennguyen@DienNguyen>
Date: Thu, 26 Oct 2023 12:36:16 -0400
Subject: [PATCH 01/35] original docking_utils.py file

---
 api/utils/docking_utils.py | 497 +++++++++++++++++++++++++++++++++++++
 1 file changed, 497 insertions(+)
 create mode 100755 api/utils/docking_utils.py

diff --git a/api/utils/docking_utils.py b/api/utils/docking_utils.py
new file mode 100755
index 0000000..755007f
--- /dev/null
+++ b/api/utils/docking_utils.py
@@ -0,0 +1,497 @@
+from flask import Flask, flash, request, redirect, url_for, send_from_directory
+from flask_restx import Api
+from flask.templating import render_template
+from werkzeug.utils import secure_filename
+import re
+import os
+import math
+import shutil
+import json
+import subprocess
+import random
+import sys
+from datetime import date
+
+
+def hex_docking(rec_lig,rec_lig2,receptor, ligand, date,docking_pdb_path):
+
+# Function to call Hex, including hard coded settings
+
+
+	code = """ open_receptor  """ + docking_pdb_path + """results/receptor_to_dock/""" + receptor + """.pdb
+open_ligand  """ + docking_pdb_path +"""results/ligand_to_dock/""" + ligand + """.pdb
+docking_correlation 1
+docking_score_threshold 0
+max_docking_solutions 50
+docking_receptor_stepsize 5.50
+docking_ligand_stepsize 5.50
+docking_alpha_stepsize 2.80
+docking_main_scan 16
+receptor_origin C-825:VAL-O
+commit_edits
+activate_docking
+save_range 1 100 """ + docking_pdb_path + """results/%s_folder_%s/%s/result %s pdb""" % (rec_lig, date, rec_lig2, rec_lig)
+	subprocess.Popen('/home/yyu/hex/bin/hex', stdin=subprocess.PIPE, stderr=subprocess.STDOUT).communicate(bytes(code.encode('utf-8')))
+
+
+
+
+def best_result(file_name, monomer, rec_lig, receptor, ligand):
+
+	# Function to generate the "best docking results", being the result with the best score and with the residue with the best contact frequency
+
+	file_name_dir = str('/home/vlau/BAR_API_HEX_Jan_23/docking_test_pdbs/results/'+ receptor + '_' + ligand + '_folder_'  + str(date.today()) + '/' + receptor + '_' + monomer + '_' + ligand + '/result/') #directory for the docking results
+	file_name_path = str(file_name_dir + file_name[:-20] + '.pdb') #directory for the result, identifies as the best result
+	des1 = file_name_dir + 'best_docking_results_for_'+ file_name[:-24] + '.pdb' #destination directory for the best_docking_result file
+	shutil.copyfile(file_name_path,des1)
+
+	#Same thing done with the ligand file only
+	ori2='/home/vlau/BAR_API_HEX_Jan_23/docking_test_pdbs/results/'+ receptor + '_' + ligand + '_folder_' + str(date.today()) + '/' + receptor + '_' + monomer + '_' + ligand + '/ligand_reserved_pdb/' + file_name
+	des2='/home/vlau/BAR_API_HEX_Jan_23/docking_test_pdbs/results/'+ receptor + '_' + ligand + '_folder_' + str(date.today()) + '/' + receptor + '_' + monomer + '_' + ligand + '/ligand_reserved_pdb/best_docking_results.pdb'
+	shutil.copyfile(ori2,des2)
+
+
+	# This is to create a copy of that file with 'Z' as the name of the chain in the ligand,
+	# it is important for the 3dsjmol visualization
+
+	with open(str(file_name_dir + 'best_docking_results_for_' + file_name[:-24] + '.pdb'), 'r') as file: #to not modify the chain name for the protein chains
+		lines = file.readlines()
+		subpart1 = lines[:lines.index(
+			'REMARK    Docked ligand coordinates...\n')]  #subpart 1 is from start to 1st line in ligand coordinates
+		subpart2 = lines[lines.index(
+			'REMARK    Docked ligand coordinates...\n'):] #subpart 2 from 1st line in ligand coordinates to end of file
+	with open(str(file_name_dir + 'best_docking_results_for_' + receptor + '_' + monomer + '_' + ligand + '.pdb'), 'w') as file:
+		for l in subpart1:
+			file.write(l)
+		for line in subpart2:
+			if line[0:4] == 'ATOM' or line[:6] == 'HETATM' or line[:3] == 'TER':
+				newline = line[:21] + 'Z' + line[22:]
+				file.write(newline)
+			else:
+				file.write(line)
+	print('best docking result file is generated for ' + file_name[:-24])
+
+
+
+
+def separate_results(monomer, file_dir, first_file_name, dir_final, monomers_list):
+
+	# Function to separate the multimer file into its monomers for every result file created by hex
+
+	ends = [] #this list will be modified with the indices of every monomer's terminal line + the first coordinate's line index
+	# Open the .pdb file to separate
+	with open (file_dir + first_file_name, 'r+') as r:
+		lines = r.readlines()
+		for l in lines:
+			if l.startswith('ATOM      1  '):
+				ends.append(lines.index(l)) #and save the index of the first coordinate's line in the list ends
+
+		# Searches the .pdb files for the lines that indicate the end of a chain
+		for l in lines:
+			if l[0:3] == 'TER':
+				ends.append(lines.index(l)) #and add their indexes in the ends list
+
+		if os.path.isdir(dir_final) == False: #create folder to dump the new monomer file or files
+			os.makedirs(dir_final)
+
+		# LOGIC:The end of the previous chain is the start of the current one,
+		start_pos = ends[monomers_list.index(monomer)]
+		end_pos = ends[monomers_list.index(monomer)+1]
+
+	# It copies every line that is not referencing an atom coordinates
+	# or that it is in the range of the monomer we want to isolate
+	file_list = os.listdir(file_dir)
+	for r in file_list: #for every result file:
+		file_path = str(file_dir + '/' + r)
+		new_file_path = str(dir_final + r[:-4] + '_' + monomer + '.pdb') #create a new result file which will include only one protein chain, not all
+		with open(file_path, 'r') as file:
+			lines = [line for line in file.readlines()]
+			# Dump in the new file everything before the first coordinate line + between the lines that contain
+			# the monomer coordinates + after the last receptor's coordinates
+			lines = lines[:ends[1]] + lines[start_pos:end_pos] + lines[ends[-1]:]
+		with open(new_file_path, 'w') as file:
+			file.writelines(lines)
+
+
+
+
+def separate_monomers(monomer, file_dir, file_name, dir_final, monomers_list):
+
+	# Function to separate the original protein pdb file in its monomers
+
+	# Open the .pdb file to separate
+	with open (file_dir + '/' + file_name + '.pdb', 'r+') as r:
+		lines = r.readlines()
+		ends = [0]
+
+		# Searches the .pdb files for the lines that indicate the end of a chain
+		for l in lines:
+			if l[0:3] == 'TER':
+				ends.append(lines.index(l))
+		if os.path.isdir(dir_final) == False:
+			os.makedirs(dir_final)
+		monomer_pdb = open(dir_final + '/' + file_name + '_' + monomer + '.pdb', 'a+')
+
+
+		# The end of the previous chain is the start of the current one,
+		# 0 was previously included in the list ends to be the start of the first chain
+		start_pos = ends[monomers_list.index(monomer)]
+		end_pos = ends[monomers_list.index(monomer)+1]
+
+		# It copies every line that is not referencing an atom coordinates
+		# or that it is in the range of the monomer we want to isolate
+		for l in lines:
+			if l[0:4] != 'ATOM' or lines.index(l) in range(start_pos, end_pos):
+				monomer_pdb.write(l)
+			# It needs to copy also the ligand data (if there is any) which is labeled with SDF
+			elif l[17:20] == 'SDF':
+				monomer_pdb.write(l)
+
+
+
+
+
+def ligand_reserved(monomer, rec_lig, receptor, ligand,docking_pdb_path):
+
+	# Function to separate the ligand coordinates of every solution, it's useful to simply the calculation of the contact frequencies
+
+	dir_path = str(docking_pdb_path + '/results/'+ rec_lig + '_folder_' + str(date.today()) + '/' + receptor + '_' + monomer + '_' + ligand + '/result') #results directory
+	print('Isolating ' + rec_lig + '_' + monomer)
+
+	os.makedirs(docking_pdb_path + '/results/'+ rec_lig + '_folder_' + str(date.today()) + '/' + receptor + '_' + monomer + '_' + ligand + '/ligand_reserved_pdb') #ligand_reserved directory
+	file_list = os.listdir(dir_path)
+	result_list = []
+
+	# Some operative system will create hidden files, the script consider .pdb files only
+	for i in file_list:
+		if i[0] != '.' and len(i.split('.')) == 2 and i.split('.')[1] == 'pdb':
+			result_list.append(i)
+	for r in result_list:
+		file_path = str(dir_path + '/' + r)
+		ligand_reserved_file_path = str(docking_pdb_path + '/results/'+ rec_lig + '_folder_' + str(date.today()) + '/' + receptor + '_' + monomer + '_' + ligand + '/ligand_reserved_pdb/' + r[:-4] + '_ligand_reserved.pdb')
+		with open(file_path, 'r') as file:
+			lines = [line for line in file.readlines()]
+			# Everything below the line 'REMARK    Docked ligand coordinates...' is data of the ligand
+			lines = lines[lines.index('REMARK    Docked ligand coordinates...\n'):]
+		with open(ligand_reserved_file_path, 'w') as file:
+			file.writelines(lines)
+
+
+
+
+
+def result_dict_generator(threshold, monomer, rec_lig, receptor, ligand):
+
+	# Function to calculate the contact frequencies of every amino acid
+
+	result_dir_path = str('/home/vlau/BAR_API_HEX_Jan_23/docking_test_pdbs/results/'+ rec_lig + '_folder_'+ str(date.today()) + '/' + receptor + '_' + monomer + '_' + ligand + '/ligand_reserved_pdb/') #directory for the results files, the ligand only ones we created with the ligand_reserved function!
+	receptor_file_path = str('/home/vlau/BAR_API_HEX_Jan_23/docking_test_pdbs/results/receptor_to_dock/monomers/'+ receptor + '_' + monomer + '.pdb') #directory for the receptor protein pdb file
+
+	# Store every receptor's atom coordinates information as a nested dictionary called 'reference'
+	with open(receptor_file_path, 'r') as file:
+		reference = {}
+		for line in file.readlines():
+			if line[0:4] == 'ATOM':
+				if int(line[22:27]) in reference:
+					reference[int(line[22:27])][int(line[6:11])] = tuple(map(float, filter(None, line[31:54].split(' '))))
+				else:
+					reference[int(line[22:27])] = {int(line[6:11]) : tuple(map(float, filter(None, line[31:54].split(' '))))}
+
+	#so the reference is {residue: {atom :(x, y, z)}}
+
+	# The energy for each reference element will be stored in dictionary 'ac'
+	ac = {}
+	file_list = os.listdir(result_dir_path)
+	result_list = []
+
+	# Generate the list for all .pdb names in the directory
+	for i in file_list:
+		if i[0] != '.' and len(i.split('.')) == 2 and i.split('.')[1] == 'pdb':
+			result_list.append(i)
+
+	en_list = [] #future list of energies
+	file_names = [] #future list of file names
+	resi_list = [] #future list of aa
+
+	#reading the first file and saving its lines will make things much quicker for the rest of them
+	first_file_path = str(result_dir_path + receptor + '_' + ligand + '0001_' + monomer + '_ligand_reserved.pdb')
+	z=open(first_file_path)
+	lines_first=z.readlines()
+	x=lines_first[2]
+	print (x)
+
+
+	# Store energy values for each ligand_reserved file
+	for r in result_list:
+		print('current file:' + r)
+		energy = ''
+		file_path = str(result_dir_path + r)
+
+		with open(file_path) as file:
+			lines = file.readlines()
+			for l in lines:
+				if 'REMARK' in l.split(' ') and 'Energy' in l.split(' '):
+					# The energy is divided by the number of results to
+					# later obtain an average energy when we will sum the
+					energy = (float(l.split(' ')[6][:-1]))/(len(result_list))
+					# Generate file and energy list by order
+					file_names.append(str(r))
+					en_list.append(energy)
+
+			# Go over every coordinate of atoms in the ligand_reserved file and store into coor
+			coor = [tuple(map(float, filter(None, line[31:54].split(' '))))
+					for line in lines if line[0:4] == 'ATOM']
+			lst = []
+
+			for res in reference.keys(): # for each amino acid in the receptor file:
+				distances = []
+
+				for atom in coor: # for each atom of the ligand
+
+					for aa in reference[res].keys(): # for each atom of that amino acid
+						# check if the distance between atoms of the ligands
+						# and of the amino acid are lower than chosen threshold (5)
+						distances.append(math.sqrt((reference[res][aa][0] - atom[0]) ** 2 + (reference[res][aa][1] - atom[1])** 2
+									 + (reference[res][aa][2] - atom[2]) ** 2))
+
+				if all(d >= threshold for d in distances): #if none of the distances is lower than the threshold, skip
+					continue
+
+				else: # if at least one distance is lower then add this aminoacid to the ac dict
+					if res in ac.keys():
+						ac[res] += energy	# adding energy (previosly divided by the number of results) more times if
+					else:				 	# found multiple times, that way you would have an average
+						ac[res] = energy
+
+					# Store the resi number into lst
+				if res not in lst:
+						lst.append(res)
+			# Store rei_num for one file into resi_list as a list
+			resi_list.append(lst)
+
+
+
+	best_result_name = ''
+	# Find the resi number with the lowest energy
+	red_resi = ''
+	for k, v in ac.items():
+		if v == min(ac.values()):
+			red_resi = k
+	print('best_residue: ' + str(red_resi))
+
+	# Find the file that both satisfies the lowest energy and containing the lowest energy resi
+	max_en = 0
+	for f in file_names:
+		if en_list[file_names.index(f)] <= max_en:
+			temp = resi_list[file_names.index(f)]
+			for i in temp:
+				if i == red_resi:
+					best_result_name = f
+
+
+	res_dict_path = result_dir_path + 'res_dict.json'
+
+	# Use the result file from /result/, change the name to best docking result, and convert it into chain Z
+	try:
+		best_result(best_result_name, monomer, rec_lig, receptor, ligand)
+	# sometimes the simulations results are not good enough to satisfy both requirements,
+	# it's common especially when one monomer is never close to the ligand.
+	# Not including this line would stop an otherwise useful simulation
+	except FileNotFoundError:
+		f_file = receptor + '_' + ligand + '0001_' + monomer + '_ligand_reserved.pdb'
+		best_result(f_file, monomer, rec_lig, receptor, ligand)
+
+	print(ac)
+
+	with open(res_dict_path, 'w') as file:
+		file.write(json.dumps(ac))
+	print('res_dict.json is generated')
+	return ac
+
+
+
+
+
+def color_surfaces(monomer, receptor, ligand, rec_lig, docking_pdb_path):
+
+	# Function to create the nested dictionary with every monomer as key with value a dictionary with its amino acids as keys and contact frequencies as values
+
+	result_dict = {} #this will be the dictionary
+
+	folder_name = str(receptor + '_' + monomer + '_' + ligand)
+
+	if receptor + '_' + monomer not in result_dict.keys():
+		result_dict[receptor + '_' + monomer] = {}
+	if os.path.isfile(docking_pdb_path + '/results/' + rec_lig + '_folder_' + str(date.today()) + '/' + folder_name + '/ligand_reserved_pdb/res_dict.json') == False:
+		result_dict[receptor+ '_' + monomer][ligand] = result_dict_generator(5, monomer, rec_lig, receptor, ligand)
+	else:
+		result_dict[receptor+ '_' + monomer][ligand] = eval(
+			open(docking_pdb_path + '/results/' + rec_lig + '_folder_' + str(date.today()) + '/' + folder_name + '/ligand_reserved_pdb/res_dict.json', 'r').read())
+		print('res_dict.json previously exists and has read')
+
+	resultjson_path = docking_pdb_path + '/results/' + rec_lig + '_folder_' + str(date.today()) + '/' + folder_name + '/results.json'
+
+	# Initialize results.json
+	ini = {}
+	with open(resultjson_path, 'w') as file:
+		file.write(json.dumps(ini))
+	results = {}
+	for r in result_dict: #result_dict is where we have our contact freuquencies
+		if r in results.keys():
+			for v in result_dict[r]:
+				results[r][v] == result_dict[r][v]
+		else:
+			results[r] = result_dict[r]
+	with open(resultjson_path, 'w') as file:
+		file.write(json.dumps(results))
+	print('result.json is finished')
+
+
+
+
+
+def pipeline(rec_lig, is_monomer, receptor, ligand, monomers_list, docking_pdb_path):
+
+	print('Current pair:' + rec_lig)
+
+	today_dir = docking_pdb_path + '/results/' + rec_lig + '_folder_' + str(date.today()) + '/'
+
+	datetoday = str(date.today())
+
+	results_dir = today_dir + rec_lig + '/result/'
+	os.makedirs(results_dir)
+
+	hex_docking(rec_lig, rec_lig, receptor, ligand, datetoday,docking_pdb_path) # CALL HEX
+
+	results_list = os.listdir(results_dir)
+	first_file_name = str(receptor + '_' + ligand + '0001.pdb')
+
+
+	# Repeats the analysis for every monomer in the receptor file
+	for monomer in monomers_list:
+		dir_final = today_dir + receptor + '_' + monomer + '_' + ligand + '/result/'
+		print('plotting monomer: ' + monomer + ' with the ligand: ' + ligand)
+		separate_results(monomer, results_dir, first_file_name, dir_final, monomers_list)
+		ligand_reserved(monomer, rec_lig, receptor, ligand,docking_pdb_path)
+		print('Ligands are now reserved in docking results.')
+		color_surfaces(monomer, receptor, ligand, rec_lig, docking_pdb_path)
+		#plot_frequencies(monomer)
+
+
+
+class Protein_Docking:
+	@staticmethod
+	def start(receptor,ligand,docking_pdb_path):
+
+		# Check if the receptor is a monomer or a complex and save the receptor and ligand names as variables
+
+		receptor_folder =  docking_pdb_path + '/results/receptor_to_dock'
+		receptor_folder_list = os.listdir(receptor_folder)
+		ligand_folder = os.listdir(docking_pdb_path + '/results/ligand_to_dock')
+
+		for rec in receptor_folder_list:
+			# There could be hidden files in the receptor or ligand directory so only consider pdb files
+			if rec[0] != '.' and len(rec.split('.')) == 2 and rec.split('.')[1] == 'pdb':
+				receptor = rec[:-4]
+
+				# To check if the receptor is a monomer or not, the script will search the .pdb file
+				# for the line that indicated the presence of multiple chains,
+				with open(receptor_folder + '/' + rec, 'r+') as f:
+					is_monomer = True
+					for x in f.readlines():
+						if re.match(r'COMPND   \d CHAIN: \w, \w*', x) != None:
+							is_monomer = False
+							#if the receptor would be a monomer the regex would be r'COMPND   \d CHAIN: \w;'
+
+							# To make a list of the monomers' labels
+							print(receptor + ' identified as a protein complex')
+							if x[11:16] == 'CHAIN':
+								monomers_list = x.split(': ')[-1].split(', ')
+								# The COMPND line ends with ';' therefore it needs to be removed from the last label
+								monomers_list[-1] = monomers_list[-1][0]
+
+		for lig in ligand_folder:
+			if lig[0] != '.' and len(lig.split('.')) == 2 and lig.split('.')[1] == 'pdb':
+			#DO NOT USE PDB FOR LIGAND FILES, it is possible but it can lead to errors due to the missing hydrogens
+				ligand = lig[:-4]
+
+		rec_lig = receptor + '_' + ligand
+
+		# To save the terminal output later (very important)
+		stdoutOrigin=sys.stdout
+		sys.stdout = open(docking_pdb_path + 'results/Terminal_recordings/' + receptor + '_' + ligand + '_' + str(date.today()) + '.txt' , "w")
+
+		# Call to the pipeline with different parameters whether the receptor is a monomer or a complex
+		if is_monomer == False:
+			dir_final = docking_pdb_path + '/results/receptor_to_dock/monomers'
+			for monomer in monomers_list:
+				print('separating monomer: ' + monomer)
+				separate_monomers(monomer, receptor_folder, receptor, dir_final, monomers_list) # To separate the monomers in the multimer file
+
+			pipeline(rec_lig, is_monomer, receptor, ligand, monomers_list,docking_pdb_path)
+		else:
+			dir_final = docking_pdb_path + '/results/receptor_to_dock/monomers'
+			monomers_list = ['monomer']
+			separate_monomers('monomer', receptor_folder, receptor, dir_final, monomers_list) # To analyze the data from hex you still need to separate it.
+																							# It allows to use the same functions in both cases
+			pipeline(rec_lig, is_monomer, receptor, ligand, monomers_list,docking_pdb_path)
+
+		#To put together the json files with all the data from all monomers
+		new_json = docking_pdb_path + '/results/'+ rec_lig + '_folder_'  + str(date.today()) + '/' + '/final.json'
+		final_json = {}
+		min_values = []
+		max_values = []
+		abs_max = None
+		abs_min = None
+
+		for monomer in monomers_list:
+			monomer_json = docking_pdb_path + '/results/' +rec_lig + '_folder_' + str(date.today()) + '/' + str(receptor + '_' + monomer + '_' + ligand) +'/results.json'
+			with open(monomer_json, 'r') as file:
+				monomer_dict = json.load(file)
+
+				monomer_key = list(monomer_dict.keys())[0]
+				ligand_key = list(monomer_dict[monomer_key].keys())[0]
+
+				inside_dict = monomer_dict[monomer_key][ligand_key]
+
+			# To eliminate empty dictionaries that might cause division errors below  normalized_mon_dicitonary calculations
+				if  inside_dict == {}:
+					continue
+				else:
+					mini = min(inside_dict.values())
+					maxi = max(inside_dict.values())
+
+				min_values.append(mini)
+				max_values.append(maxi)
+
+				abs_max = max(max_values)
+				abs_min = min(min_values)
+
+				print("This is the maximum value: ",abs_max, file=sys.stderr)
+				print("This is the minimum value: ",abs_min, file=sys.stderr)
+
+		#Now looping through every monomer, and calculating every residue energy to be normalized by using absolute minimum and maximum.
+		for monomer in monomers_list:
+			monomer_json = docking_pdb_path + '/results/' +rec_lig + '_folder_' + str(date.today()) + '/' + str(receptor + '_' + monomer + '_' + ligand) +'/results.json'
+			with open(monomer_json, 'r') as file:
+				monomer_dict = json.load(file)
+
+				monomer_key = list(monomer_dict.keys())[0]
+				ligand_key = list(monomer_dict[monomer_key].keys())[0]
+
+				inside_dict = monomer_dict[monomer_key][ligand_key]
+
+				# It is here to prevent substraction of equal values or values that doesn't make any sense in terms of accuracy
+
+				if abs_min == abs_max :
+					normalized_mon_dict = {monomer_key:{ligand_key:{k:1 for k,v in inside_dict.items()}}}
+					final_json.update(normalized_mon_dict)
+				else:
+					normalized_mon_dict = {monomer_key:{ligand_key:{k:(v-abs_min)/(abs_max - abs_min) for k,v in inside_dict.items()}}}
+					final_json.update(normalized_mon_dict)
+		#Opening and writing new_json file that was directed to be final.json and was updated with normalization dictionary values
+
+		with open(new_json,'w') as file:
+			file.write(json.dumps(final_json))
+		print('Final json is finished')
+		print(new_json, file=sys.stderr)

From ffe6b47805325ea90cdb78034c6c9c09f082cedf Mon Sep 17 00:00:00 2001
From: Dien Nguyen <diennguyen@DienNguyen>
Date: Thu, 26 Oct 2023 12:43:54 -0400
Subject: [PATCH 02/35] Add constant for hex execution path Add
 docking_pdb_path parameter to best_result function to avoid hard-coding file
 paths Modify snps.py to pass in receptor and ligand of interest through
 endpoint Change naming conventions to not include the date, and only run hex
 if the protein-ligand pair has not been run before Add a block in
 docking_utils.py to search for the receptor and ligand indicated by endpoints

---
 api/resources/snps.py      |  49 +++++++++
 api/utils/docking_utils.py | 213 ++++++++++++++++++++-----------------
 2 files changed, 164 insertions(+), 98 deletions(-)

diff --git a/api/resources/snps.py b/api/resources/snps.py
index da1860f..fc9c0a4 100644
--- a/api/resources/snps.py
+++ b/api/resources/snps.py
@@ -26,6 +26,7 @@
 from api.utils.hotspot_utils import HotspotUtils
 import sys
 from api import db, cache, limiter
+from api.utils.docking_utils import Protein_Docking
 
 
 snps = Namespace("SNPs", description="Information about SNPs", path="/snps")
@@ -46,6 +47,54 @@
     default="None",
 )
 
+@snps.route("/docking/<receptor>/<ligand>")
+class Docking(Resource):
+    @snps.param("receptor", _in="path", default="bri1")
+    @snps.param("ligand", _in="path", default="brass")
+    def get(self, receptor, ligand):
+        # receptor= escape(receptor)
+        # ligand = escape(ligand)
+
+        #arabidopsis_pdb_path = "/var/www/html/eplant_legacy/java/Phyre2-Models/Phyre2_"
+        #poplar_pdb_path = "/var/www/html/eplant_poplar/pdb/"
+        #tomato_pdb_path = "/var/www/html/eplant_tomato/pdbc/"
+        #docking_pdb_link = "//bar.utoronto.ca/docking-pdbs/"
+        #docking_pdb_path = "/var/www/html/docking-pdbs/"
+        #arabidopsis_pdb_path = "/home/metyumelkonyan/BCB330/results/receptor_to_dock"
+        #poplar_pdb_path = "/home/metyumelkonyan/BCB330/results/receptor_to_dock"
+        #tomato_pdb_path = "/home/metyumelkonyan/BCB330/results/receptor_to_dock"
+        docking_pdb_link = "//bar.utoronto.ca/docking-pdbs/"
+        docking_pdb_path = "/home/diennguyen/BAR_API/docking_test_pdbs"
+
+        #Receptors can be adjusted please adjust the file format on the directories as well (sdf vs pdb)
+        # receptor = "3riz"
+        # ligand = "TDR"
+        # receptor = "5gij_ATOM"
+        # ligand = "TDIF"
+
+        # if BARUtils.is_arabidopsis_gene_valid(receptor_pdb):
+        #     receptor_pdb_path = arabidopsis_pdb_path + \
+        #                         receptor_pdb.upper() + ".pdb"
+        # elif BARUtils.is_poplar_gene_valid(receptor_pdb):
+        #     receptor_pdb_path = (
+        #             poplar_pdb_path + BARUtils.format_poplar(
+        #         receptor_pdb) + ".pdb"
+        #     )
+        # elif BARUtils.is_tomato_gene_valid(receptor_pdb, True):
+        #     receptor_pdb_path = tomato_pdb_path + receptor_pdb.capitalize() + ".pdb"
+        # else:
+        #     return BARUtils.error_exit("Invalid receptor pdb gene id"), 400
+
+        #ligand_sdf_path = "/home/yyu/public_html/library" + ligand + ".pdb"
+
+        docking_file_name = receptor.upper() + "-" + ligand.upper() + \
+                "-docking0001.pdb "
+        response = requests.get("https:" + docking_pdb_link + docking_file_name)
+
+        # Importing start function to initiate docking_utils  file
+
+        Protein_Docking.start(receptor,ligand,docking_pdb_path)
+
 
 @snps.route("/phenix/<fixed_pdb>/<moving_pdb>")
 class Phenix(Resource):
diff --git a/api/utils/docking_utils.py b/api/utils/docking_utils.py
index 755007f..0772f38 100755
--- a/api/utils/docking_utils.py
+++ b/api/utils/docking_utils.py
@@ -12,17 +12,18 @@
 import sys
 from datetime import date
 
+HEX_BIN_PATH = '/home/diennguyen/hex/bin/hex'
 
-def hex_docking(rec_lig,rec_lig2,receptor, ligand, date,docking_pdb_path):
+def hex_docking(rec_lig,rec_lig2,receptor, ligand, docking_pdb_path):
 
 # Function to call Hex, including hard coded settings
 
-
-	code = """ open_receptor  """ + docking_pdb_path + """results/receptor_to_dock/""" + receptor + """.pdb
-open_ligand  """ + docking_pdb_path +"""results/ligand_to_dock/""" + ligand + """.pdb
+# max_docking_solutions set at 5 for testing
+	code = """ open_receptor  """ + docking_pdb_path + """/results/receptor_to_dock/""" + receptor + """.pdb
+open_ligand  """ + docking_pdb_path +"""/results/ligand_to_dock/""" + ligand + """.pdb
 docking_correlation 1
 docking_score_threshold 0
-max_docking_solutions 50
+max_docking_solutions 5
 docking_receptor_stepsize 5.50
 docking_ligand_stepsize 5.50
 docking_alpha_stepsize 2.80
@@ -30,24 +31,24 @@ def hex_docking(rec_lig,rec_lig2,receptor, ligand, date,docking_pdb_path):
 receptor_origin C-825:VAL-O
 commit_edits
 activate_docking
-save_range 1 100 """ + docking_pdb_path + """results/%s_folder_%s/%s/result %s pdb""" % (rec_lig, date, rec_lig2, rec_lig)
-	subprocess.Popen('/home/yyu/hex/bin/hex', stdin=subprocess.PIPE, stderr=subprocess.STDOUT).communicate(bytes(code.encode('utf-8')))
+save_range 1 100 """ + docking_pdb_path + """/results/%s/%s/result %s pdb""" % (rec_lig, rec_lig2, rec_lig)
+	subprocess.Popen(HEX_BIN_PATH, stdin=subprocess.PIPE, stderr=subprocess.STDOUT).communicate(bytes(code.encode('utf-8')))
 
 
 
 
-def best_result(file_name, monomer, rec_lig, receptor, ligand):
+def best_result(file_name, monomer, rec_lig, receptor, ligand, docking_pdb_path):
 
 	# Function to generate the "best docking results", being the result with the best score and with the residue with the best contact frequency
 
-	file_name_dir = str('/home/vlau/BAR_API_HEX_Jan_23/docking_test_pdbs/results/'+ receptor + '_' + ligand + '_folder_'  + str(date.today()) + '/' + receptor + '_' + monomer + '_' + ligand + '/result/') #directory for the docking results
+	file_name_dir = str(docking_pdb_path + '/results/'+ receptor + '_' + ligand + '/' + receptor + '_' + monomer + '_' + ligand + '/result/') #directory for the docking results
 	file_name_path = str(file_name_dir + file_name[:-20] + '.pdb') #directory for the result, identifies as the best result
 	des1 = file_name_dir + 'best_docking_results_for_'+ file_name[:-24] + '.pdb' #destination directory for the best_docking_result file
 	shutil.copyfile(file_name_path,des1)
 
 	#Same thing done with the ligand file only
-	ori2='/home/vlau/BAR_API_HEX_Jan_23/docking_test_pdbs/results/'+ receptor + '_' + ligand + '_folder_' + str(date.today()) + '/' + receptor + '_' + monomer + '_' + ligand + '/ligand_reserved_pdb/' + file_name
-	des2='/home/vlau/BAR_API_HEX_Jan_23/docking_test_pdbs/results/'+ receptor + '_' + ligand + '_folder_' + str(date.today()) + '/' + receptor + '_' + monomer + '_' + ligand + '/ligand_reserved_pdb/best_docking_results.pdb'
+	ori2 = docking_pdb_path + '/results/'+ receptor + '_' + ligand + '/' + receptor + '_' + monomer + '_' + ligand + '/ligand_reserved_pdb/' + file_name
+	des2 = docking_pdb_path + '/results/'+ receptor + '_' + ligand + '/' + receptor + '_' + monomer + '_' + ligand + '/ligand_reserved_pdb/best_docking_results.pdb'
 	shutil.copyfile(ori2,des2)
 
 
@@ -122,7 +123,7 @@ def separate_monomers(monomer, file_dir, file_name, dir_final, monomers_list):
 	# Open the .pdb file to separate
 	with open (file_dir + '/' + file_name + '.pdb', 'r+') as r:
 		lines = r.readlines()
-		ends = [0]
+		ends = [0] # ends contains all line numbers of "TER"
 
 		# Searches the .pdb files for the lines that indicate the end of a chain
 		for l in lines:
@@ -155,10 +156,10 @@ def ligand_reserved(monomer, rec_lig, receptor, ligand,docking_pdb_path):
 
 	# Function to separate the ligand coordinates of every solution, it's useful to simply the calculation of the contact frequencies
 
-	dir_path = str(docking_pdb_path + '/results/'+ rec_lig + '_folder_' + str(date.today()) + '/' + receptor + '_' + monomer + '_' + ligand + '/result') #results directory
+	dir_path = str(docking_pdb_path + '/results/'+ rec_lig + '/' + receptor + '_' + monomer + '_' + ligand + '/result') #results directory
 	print('Isolating ' + rec_lig + '_' + monomer)
 
-	os.makedirs(docking_pdb_path + '/results/'+ rec_lig + '_folder_' + str(date.today()) + '/' + receptor + '_' + monomer + '_' + ligand + '/ligand_reserved_pdb') #ligand_reserved directory
+	os.makedirs(docking_pdb_path + '/results/'+ rec_lig + '/' + receptor + '_' + monomer + '_' + ligand + '/ligand_reserved_pdb') #ligand_reserved directory
 	file_list = os.listdir(dir_path)
 	result_list = []
 
@@ -168,7 +169,7 @@ def ligand_reserved(monomer, rec_lig, receptor, ligand,docking_pdb_path):
 			result_list.append(i)
 	for r in result_list:
 		file_path = str(dir_path + '/' + r)
-		ligand_reserved_file_path = str(docking_pdb_path + '/results/'+ rec_lig + '_folder_' + str(date.today()) + '/' + receptor + '_' + monomer + '_' + ligand + '/ligand_reserved_pdb/' + r[:-4] + '_ligand_reserved.pdb')
+		ligand_reserved_file_path = str(docking_pdb_path + '/results/'+ rec_lig + '/' + receptor + '_' + monomer + '_' + ligand + '/ligand_reserved_pdb/' + r[:-4] + '_ligand_reserved.pdb')
 		with open(file_path, 'r') as file:
 			lines = [line for line in file.readlines()]
 			# Everything below the line 'REMARK    Docked ligand coordinates...' is data of the ligand
@@ -180,12 +181,12 @@ def ligand_reserved(monomer, rec_lig, receptor, ligand,docking_pdb_path):
 
 
 
-def result_dict_generator(threshold, monomer, rec_lig, receptor, ligand):
+def result_dict_generator(threshold, monomer, rec_lig, receptor, ligand, docking_pdb_path):
 
 	# Function to calculate the contact frequencies of every amino acid
 
-	result_dir_path = str('/home/vlau/BAR_API_HEX_Jan_23/docking_test_pdbs/results/'+ rec_lig + '_folder_'+ str(date.today()) + '/' + receptor + '_' + monomer + '_' + ligand + '/ligand_reserved_pdb/') #directory for the results files, the ligand only ones we created with the ligand_reserved function!
-	receptor_file_path = str('/home/vlau/BAR_API_HEX_Jan_23/docking_test_pdbs/results/receptor_to_dock/monomers/'+ receptor + '_' + monomer + '.pdb') #directory for the receptor protein pdb file
+	result_dir_path = str(docking_pdb_path + '/results/'+ rec_lig + '/' + receptor + '_' + monomer + '_' + ligand + '/ligand_reserved_pdb/') #directory for the results files, the ligand only ones we created with the ligand_reserved function!
+	receptor_file_path = str(docking_pdb_path + '/results/receptor_to_dock/monomers/'+ receptor + '_' + monomer + '.pdb') #directory for the receptor protein pdb file
 
 	# Store every receptor's atom coordinates information as a nested dictionary called 'reference'
 	with open(receptor_file_path, 'r') as file:
@@ -293,13 +294,13 @@ def result_dict_generator(threshold, monomer, rec_lig, receptor, ligand):
 
 	# Use the result file from /result/, change the name to best docking result, and convert it into chain Z
 	try:
-		best_result(best_result_name, monomer, rec_lig, receptor, ligand)
+		best_result(best_result_name, monomer, rec_lig, receptor, ligand, docking_pdb_path)
 	# sometimes the simulations results are not good enough to satisfy both requirements,
 	# it's common especially when one monomer is never close to the ligand.
 	# Not including this line would stop an otherwise useful simulation
 	except FileNotFoundError:
 		f_file = receptor + '_' + ligand + '0001_' + monomer + '_ligand_reserved.pdb'
-		best_result(f_file, monomer, rec_lig, receptor, ligand)
+		best_result(f_file, monomer, rec_lig, receptor, ligand, docking_pdb_path)
 
 	print(ac)
 
@@ -322,14 +323,14 @@ def color_surfaces(monomer, receptor, ligand, rec_lig, docking_pdb_path):
 
 	if receptor + '_' + monomer not in result_dict.keys():
 		result_dict[receptor + '_' + monomer] = {}
-	if os.path.isfile(docking_pdb_path + '/results/' + rec_lig + '_folder_' + str(date.today()) + '/' + folder_name + '/ligand_reserved_pdb/res_dict.json') == False:
-		result_dict[receptor+ '_' + monomer][ligand] = result_dict_generator(5, monomer, rec_lig, receptor, ligand)
+	if os.path.isfile(docking_pdb_path + '/results/' + rec_lig + '/' + folder_name + '/ligand_reserved_pdb/res_dict.json') == False:
+		result_dict[receptor+ '_' + monomer][ligand] = result_dict_generator(5, monomer, rec_lig, receptor, ligand, docking_pdb_path)
 	else:
 		result_dict[receptor+ '_' + monomer][ligand] = eval(
-			open(docking_pdb_path + '/results/' + rec_lig + '_folder_' + str(date.today()) + '/' + folder_name + '/ligand_reserved_pdb/res_dict.json', 'r').read())
+			open(docking_pdb_path + '/results/' + rec_lig + '/' + folder_name + '/ligand_reserved_pdb/res_dict.json', 'r').read())
 		print('res_dict.json previously exists and has read')
 
-	resultjson_path = docking_pdb_path + '/results/' + rec_lig + '_folder_' + str(date.today()) + '/' + folder_name + '/results.json'
+	resultjson_path = docking_pdb_path + '/results/' + rec_lig + '/' + folder_name + '/results.json'
 
 	# Initialize results.json
 	ini = {}
@@ -354,14 +355,12 @@ def pipeline(rec_lig, is_monomer, receptor, ligand, monomers_list, docking_pdb_p
 
 	print('Current pair:' + rec_lig)
 
-	today_dir = docking_pdb_path + '/results/' + rec_lig + '_folder_' + str(date.today()) + '/'
-
-	datetoday = str(date.today())
+	today_dir = docking_pdb_path + '/results/' + rec_lig + '/'
 
 	results_dir = today_dir + rec_lig + '/result/'
 	os.makedirs(results_dir)
 
-	hex_docking(rec_lig, rec_lig, receptor, ligand, datetoday,docking_pdb_path) # CALL HEX
+	hex_docking(rec_lig, rec_lig, receptor, ligand,docking_pdb_path) # CALL HEX
 
 	results_list = os.listdir(results_dir)
 	first_file_name = str(receptor + '_' + ligand + '0001.pdb')
@@ -389,11 +388,14 @@ def start(receptor,ligand,docking_pdb_path):
 		receptor_folder_list = os.listdir(receptor_folder)
 		ligand_folder = os.listdir(docking_pdb_path + '/results/ligand_to_dock')
 
+		receptor_file_found = False
 		for rec in receptor_folder_list:
+			sys.stdout.write(rec)
 			# There could be hidden files in the receptor or ligand directory so only consider pdb files
-			if rec[0] != '.' and len(rec.split('.')) == 2 and rec.split('.')[1] == 'pdb':
+			if rec[0] != '.' and len(rec.split('.')) == 2 and rec.split('.')[1] == 'pdb'\
+				and rec[:-4].lower() == receptor.lower():
+				receptor_file_found = True
 				receptor = rec[:-4]
-
 				# To check if the receptor is a monomer or not, the script will search the .pdb file
 				# for the line that indicated the presence of multiple chains,
 				with open(receptor_folder + '/' + rec, 'r+') as f:
@@ -409,89 +411,104 @@ def start(receptor,ligand,docking_pdb_path):
 								monomers_list = x.split(': ')[-1].split(', ')
 								# The COMPND line ends with ';' therefore it needs to be removed from the last label
 								monomers_list[-1] = monomers_list[-1][0]
+				break
 
+		ligand_file_found = False
 		for lig in ligand_folder:
-			if lig[0] != '.' and len(lig.split('.')) == 2 and lig.split('.')[1] == 'pdb':
+			sys.stdout.write(lig)
+			if lig[0] != '.' and len(lig.split('.')) == 2 and lig.split('.')[1] == 'pdb'\
+				and lig[:4].lower() == ligand.lower():
+				ligand_file_found = True
 			#DO NOT USE PDB FOR LIGAND FILES, it is possible but it can lead to errors due to the missing hydrogens
 				ligand = lig[:-4]
+				break
+
+		
+		##TODO: Add block to raise error if receptor or ligand files are not found
 
 		rec_lig = receptor + '_' + ligand
 
+		#check if results folder already exists
+		results_path = docking_pdb_path + '/results/' + rec_lig
+		if not os.path.exists(results_path):
 		# To save the terminal output later (very important)
-		stdoutOrigin=sys.stdout
-		sys.stdout = open(docking_pdb_path + 'results/Terminal_recordings/' + receptor + '_' + ligand + '_' + str(date.today()) + '.txt' , "w")
+			stdoutOrigin=sys.stdout
+			sys.stdout = open(docking_pdb_path + '/results/Terminal_recordings/' + rec_lig + '_' + str(date.today()) + '.txt' , "w")
 
-		# Call to the pipeline with different parameters whether the receptor is a monomer or a complex
-		if is_monomer == False:
-			dir_final = docking_pdb_path + '/results/receptor_to_dock/monomers'
-			for monomer in monomers_list:
-				print('separating monomer: ' + monomer)
-				separate_monomers(monomer, receptor_folder, receptor, dir_final, monomers_list) # To separate the monomers in the multimer file
+			# Call to the pipeline with different parameters whether the receptor is a monomer or a complex
+			if is_monomer == False:
+				dir_final = docking_pdb_path + '/results/receptor_to_dock/monomers'
+				for monomer in monomers_list:
+					print('separating monomer: ' + monomer)
+					separate_monomers(monomer, receptor_folder, receptor, dir_final, monomers_list) # To separate the monomers in the multimer file
 
-			pipeline(rec_lig, is_monomer, receptor, ligand, monomers_list,docking_pdb_path)
-		else:
-			dir_final = docking_pdb_path + '/results/receptor_to_dock/monomers'
-			monomers_list = ['monomer']
-			separate_monomers('monomer', receptor_folder, receptor, dir_final, monomers_list) # To analyze the data from hex you still need to separate it.
-																							# It allows to use the same functions in both cases
-			pipeline(rec_lig, is_monomer, receptor, ligand, monomers_list,docking_pdb_path)
-
-		#To put together the json files with all the data from all monomers
-		new_json = docking_pdb_path + '/results/'+ rec_lig + '_folder_'  + str(date.today()) + '/' + '/final.json'
-		final_json = {}
-		min_values = []
-		max_values = []
-		abs_max = None
-		abs_min = None
-
-		for monomer in monomers_list:
-			monomer_json = docking_pdb_path + '/results/' +rec_lig + '_folder_' + str(date.today()) + '/' + str(receptor + '_' + monomer + '_' + ligand) +'/results.json'
-			with open(monomer_json, 'r') as file:
-				monomer_dict = json.load(file)
-
-				monomer_key = list(monomer_dict.keys())[0]
-				ligand_key = list(monomer_dict[monomer_key].keys())[0]
-
-				inside_dict = monomer_dict[monomer_key][ligand_key]
-
-			# To eliminate empty dictionaries that might cause division errors below  normalized_mon_dicitonary calculations
-				if  inside_dict == {}:
-					continue
-				else:
-					mini = min(inside_dict.values())
-					maxi = max(inside_dict.values())
+				pipeline(rec_lig, is_monomer, receptor, ligand, monomers_list,docking_pdb_path)
+			else:
+				dir_final = docking_pdb_path + '/results/receptor_to_dock/monomers'
+				monomers_list = ['monomer']
+				separate_monomers('monomer', receptor_folder, receptor, dir_final, monomers_list) # To analyze the data from hex you still need to separate it.
+																								# It allows to use the same functions in both cases
+				pipeline(rec_lig, is_monomer, receptor, ligand, monomers_list,docking_pdb_path)
+
+			#To put together the json files with all the data from all monomers
+			new_json = docking_pdb_path + '/results/'+ rec_lig + '/' + '/final.json'
+			final_json = {}
+			min_values = []
+			max_values = []
+			abs_max = None
+			abs_min = None
 
-				min_values.append(mini)
-				max_values.append(maxi)
+			for monomer in monomers_list:
+				monomer_json = docking_pdb_path + '/results/' + rec_lig + '/' + str(receptor + '_' + monomer + '_' + ligand) +'/results.json'
+				with open(monomer_json, 'r') as file:
+					monomer_dict = json.load(file)
 
-				abs_max = max(max_values)
-				abs_min = min(min_values)
+					monomer_key = list(monomer_dict.keys())[0]
+					ligand_key = list(monomer_dict[monomer_key].keys())[0]
 
-				print("This is the maximum value: ",abs_max, file=sys.stderr)
-				print("This is the minimum value: ",abs_min, file=sys.stderr)
+					inside_dict = monomer_dict[monomer_key][ligand_key]
 
-		#Now looping through every monomer, and calculating every residue energy to be normalized by using absolute minimum and maximum.
-		for monomer in monomers_list:
-			monomer_json = docking_pdb_path + '/results/' +rec_lig + '_folder_' + str(date.today()) + '/' + str(receptor + '_' + monomer + '_' + ligand) +'/results.json'
-			with open(monomer_json, 'r') as file:
-				monomer_dict = json.load(file)
+				# To eliminate empty dictionaries that might cause division errors below  normalized_mon_dicitonary calculations
+					if  inside_dict == {}:
+						continue
+					else:
+						mini = min(inside_dict.values())
+						maxi = max(inside_dict.values())
 
-				monomer_key = list(monomer_dict.keys())[0]
-				ligand_key = list(monomer_dict[monomer_key].keys())[0]
+					min_values.append(mini)
+					max_values.append(maxi)
 
-				inside_dict = monomer_dict[monomer_key][ligand_key]
+					abs_max = max(max_values)
+					abs_min = min(min_values)
 
-				# It is here to prevent substraction of equal values or values that doesn't make any sense in terms of accuracy
+					print("This is the maximum value: ",abs_max, file=sys.stderr)
+					print("This is the minimum value: ",abs_min, file=sys.stderr)
 
-				if abs_min == abs_max :
-					normalized_mon_dict = {monomer_key:{ligand_key:{k:1 for k,v in inside_dict.items()}}}
-					final_json.update(normalized_mon_dict)
-				else:
-					normalized_mon_dict = {monomer_key:{ligand_key:{k:(v-abs_min)/(abs_max - abs_min) for k,v in inside_dict.items()}}}
-					final_json.update(normalized_mon_dict)
-		#Opening and writing new_json file that was directed to be final.json and was updated with normalization dictionary values
-
-		with open(new_json,'w') as file:
-			file.write(json.dumps(final_json))
-		print('Final json is finished')
-		print(new_json, file=sys.stderr)
+			#Now looping through every monomer, and calculating every residue energy to be normalized by using absolute minimum and maximum.
+			for monomer in monomers_list:
+				monomer_json = docking_pdb_path + '/results/' +rec_lig + '/' + str(receptor + '_' + monomer + '_' + ligand) +'/results.json'
+				with open(monomer_json, 'r') as file:
+					monomer_dict = json.load(file)
+
+					monomer_key = list(monomer_dict.keys())[0]
+					ligand_key = list(monomer_dict[monomer_key].keys())[0]
+
+					inside_dict = monomer_dict[monomer_key][ligand_key]
+
+					# It is here to prevent substraction of equal values or values that doesn't make any sense in terms of accuracy
+
+					if abs_min == abs_max :
+						normalized_mon_dict = {monomer_key:{ligand_key:{k:1 for k,v in inside_dict.items()}}}
+						final_json.update(normalized_mon_dict)
+					else:
+						normalized_mon_dict = {monomer_key:{ligand_key:{k:(v-abs_min)/(abs_max - abs_min) for k,v in inside_dict.items()}}}
+						final_json.update(normalized_mon_dict)
+			#Opening and writing new_json file that was directed to be final.json and was updated with normalization dictionary values
+
+			with open(new_json,'w') as file:
+				file.write(json.dumps(final_json))
+			print('Final json is finished')
+			print(new_json, file=sys.stderr)
+			sys.stdout.close()
+		else:
+			print("Docking has already been done on this protein-ligand.")

From 470fd9632f5e6bf33976b81540da7828e0a38ccf Mon Sep 17 00:00:00 2001
From: Dien Nguyen <diennguyen@DienNguyen>
Date: Thu, 2 Nov 2023 11:39:47 -0400
Subject: [PATCH 03/35] redirect hex output to a text file, add function to
 parse hex output

---
 api/utils/docking_utils.py | 39 +++++++++++++++++++++++++++++++++-----
 1 file changed, 34 insertions(+), 5 deletions(-)

diff --git a/api/utils/docking_utils.py b/api/utils/docking_utils.py
index 0772f38..cd1d604 100755
--- a/api/utils/docking_utils.py
+++ b/api/utils/docking_utils.py
@@ -16,6 +16,9 @@
 
 def hex_docking(rec_lig,rec_lig2,receptor, ligand, docking_pdb_path):
 
+	hex_output = open(docking_pdb_path + "/results/" + rec_lig + 
+				   "/{}_hex_output.txt".format(rec_lig), "w")
+
 # Function to call Hex, including hard coded settings
 
 # max_docking_solutions set at 5 for testing
@@ -23,7 +26,7 @@ def hex_docking(rec_lig,rec_lig2,receptor, ligand, docking_pdb_path):
 open_ligand  """ + docking_pdb_path +"""/results/ligand_to_dock/""" + ligand + """.pdb
 docking_correlation 1
 docking_score_threshold 0
-max_docking_solutions 5
+max_docking_solutions 25
 docking_receptor_stepsize 5.50
 docking_ligand_stepsize 5.50
 docking_alpha_stepsize 2.80
@@ -32,7 +35,8 @@ def hex_docking(rec_lig,rec_lig2,receptor, ligand, docking_pdb_path):
 commit_edits
 activate_docking
 save_range 1 100 """ + docking_pdb_path + """/results/%s/%s/result %s pdb""" % (rec_lig, rec_lig2, rec_lig)
-	subprocess.Popen(HEX_BIN_PATH, stdin=subprocess.PIPE, stderr=subprocess.STDOUT).communicate(bytes(code.encode('utf-8')))
+	subprocess.Popen(HEX_BIN_PATH, stdin=subprocess.PIPE, stderr=subprocess.STDOUT, stdout=hex_output).communicate(bytes(code.encode('utf-8')))
+	hex_output.close()
 
 
 
@@ -310,8 +314,34 @@ def result_dict_generator(threshold, monomer, rec_lig, receptor, ligand, docking
 	return ac
 
 
-
-
+def parse_hex_output(rec_lig, docking_pdb_path):
+	hex_output = open(docking_pdb_path + "/results/" + rec_lig + 
+				   "/{}_hex_output.txt".format(rec_lig), "r")
+	lines = hex_output.readlines()
+	result_start = 0
+	result_end = 0
+	for i in range(len(lines)):
+		splitted_line = lines[i].split(" ")
+		if len(splitted_line) > 8 and splitted_line[0] == "Clst":
+			result_start = i + 2
+		if len(splitted_line) > 2 and splitted_line[1] == "save_range":
+			result_end = i - 2
+	clustering_lines = lines[result_start:result_end]
+	clusters = {}
+	for line in clustering_lines:
+		cleaned_line = line.strip().split(" ")
+		res = []
+		for ch in cleaned_line:
+			if ch != "":
+				res.append(ch)
+		clst = int(res[0])
+		sln = int(res[1])
+		if clst not in clusters:
+			clusters[clst] = [sln]
+		else:
+			clusters[clst].append(sln)
+	return(clusters)
+		
 
 def color_surfaces(monomer, receptor, ligand, rec_lig, docking_pdb_path):
 
@@ -390,7 +420,6 @@ def start(receptor,ligand,docking_pdb_path):
 
 		receptor_file_found = False
 		for rec in receptor_folder_list:
-			sys.stdout.write(rec)
 			# There could be hidden files in the receptor or ligand directory so only consider pdb files
 			if rec[0] != '.' and len(rec.split('.')) == 2 and rec.split('.')[1] == 'pdb'\
 				and rec[:-4].lower() == receptor.lower():

From ed5a58ef99d6a57432a58fc628ab0b0c5cd0e357 Mon Sep 17 00:00:00 2001
From: Dien Nguyen <dien.nguyen@mail.utoronto.ca>
Date: Tue, 21 Nov 2023 01:02:18 -0500
Subject: [PATCH 04/35] Add classes for refactoring of docking_utils.py file

The following classes were added: Receptor, MonomerReceptor, ComplexReceptor,
Ligand, Docking, MonomerDocking, ComplexDocking, and Docker.
__init__ methods were added for each class. Methods for creating
receptor, ligand and docking objects were written in Docker class.
Hex_docking function was written for Docking class, which is inherited
by MonomerDocking and ComplexDocking.
---
 api/utils/refactored_docking_utils.py | 285 ++++++++++++++++++++++++++
 1 file changed, 285 insertions(+)
 create mode 100644 api/utils/refactored_docking_utils.py

diff --git a/api/utils/refactored_docking_utils.py b/api/utils/refactored_docking_utils.py
new file mode 100644
index 0000000..1c811ce
--- /dev/null
+++ b/api/utils/refactored_docking_utils.py
@@ -0,0 +1,285 @@
+from abc import ABC, abstractmethod
+from typing import List
+import os
+import re
+import subprocess
+
+HEX_BIN_PATH = '/home/diennguyen/hex/bin/hex'
+
+class Receptor(ABC):
+    """An abstract class that represents a receptor
+
+    --- Attributes ---
+    name (str): the name of the receptor
+    file_path (str): the relative path to the receptors pdb file
+    """
+    @abstractmethod
+    def __init__(self, name: str, file_path: str):
+        self.name = name
+        self.file_path = file_path
+
+class MonomerReceptor(Receptor):
+    """ A class that represents a receptor that is a monomer, meaning it consists
+    of only one chain.
+
+    --- Attributes ---
+    name (str): the name of the receptor
+    file_path (str): the relative path to the receptors pdb file
+    """
+    name: str
+    file_path: str
+
+    def __init__(self, name, file_path):
+        super().__init__(name, file_path)
+
+
+class ComplexReceptor(Receptor):
+    """ A class that represents a receptor that is a complex, meaning it consists
+    of more than one chain.
+
+    --- Attributes ---
+    name (str): the name of the receptor
+    file_path (str): the relative path to the receptors pdb file
+    monomer_list (List[str]): the list of monomers that make up the complex
+    line_numbers (List[int]): the list of line numbers that separate the monomers
+    """
+    def __init__(self, name: str, file_path: str, monomers_list: List[str]):
+        super().__init__(name, file_path)
+        self.monomers_list = monomers_list
+        self.line_numbers = []
+
+    def separate_monomers(self):
+        pass
+
+class Ligand:
+    """A class that represents a ligand.
+    
+    --- Attributes ---
+    name (str): the name of the receptor
+    file_path (str): the relative path to the receptors pdb file
+    """
+    def __init__(self, name: str, file_path: str):
+        self.name = name
+        self.file_path = file_path
+
+class Docking(ABC):
+    """An abstract class that represents the docking between a receptor and a
+    ligand.
+
+    --- Attributes ---
+    receptor (Receptor): a Receptor object that represents a receptor
+    ligand (Ligand): a Ligand object that represents a ligand
+    results_path (str): the file path to where the results are stored
+    ligand_reserved_list (List[int]): a list of line numbers, one for each solution,
+    the indicates where the "Docked ligand" section begins
+    """
+
+    @abstractmethod
+    def __init__(self, receptor: Receptor, ligand: Ligand, results_path: str):
+        self.receptor = receptor
+        self.ligand = ligand
+        self.results_path = results_path
+        self.ligand_reserved_list = []
+
+    def hex_docking(self):
+        hex_output_file = open(self.results_path + 'hex_output.txt', "w")
+
+    # Function to call Hex, including hard coded settings
+
+    # max_docking_solutions set at 5 for testing
+        code = """ open_receptor  """ + self.receptor.file_path + """
+    open_ligand  """ + self.ligand.file_path + """
+    docking_correlation 1
+    docking_score_threshold 0
+    max_docking_solutions 5
+    docking_receptor_stepsize 5.50
+    docking_ligand_stepsize 5.50
+    docking_alpha_stepsize 2.80
+    docking_main_scan 16
+    receptor_origin C-825:VAL-O
+    commit_edits
+    activate_docking
+    save_range 1 100 """ + self.results_path + """ %s pdb""" % (self.receptor.name + '_' + self.ligand.name)
+        subprocess.Popen(HEX_BIN_PATH, 
+                         stdin=subprocess.PIPE,
+                         stderr=subprocess.STDOUT, 
+                         stdout=hex_output_file).communicate(bytes(code.encode('utf-8')))
+        hex_output_file.close()
+        print("Hex docking completed")
+
+    @abstractmethod
+    def ligand_reserved(self):
+        pass
+
+    @abstractmethod
+    def result_dict_generator(self):
+        pass
+
+    @abstractmethod
+    def best_result(self):
+        pass
+
+    @abstractmethod
+    def color_surfaces(self):
+        pass
+
+class MonomerDocking(Docking):
+    """A class the represents a docking between a monomer receptor and a monomer.
+    
+    --- Attributes ---
+    receptor (MonomerReceptor): a Receptor object that represents a monomer receptor
+    ligand (Ligand): a Ligand object that represents a ligand
+    results_path (str): the file path to where the results are stored
+    ligand_reserved (List[int]): a list of line numbers, one for each solution,
+        the indicates where the "Docked ligand" section begins
+    """
+
+    def __init__(self, receptor: MonomerReceptor, ligand: Ligand, results_path: str):
+        super().__init__(receptor, ligand, results_path)
+
+    def ligand_reserved(self):
+        pass
+
+    def result_dict_generator(self):
+        pass
+
+    def best_result(self):
+        pass
+
+    def color_surfaces(self):
+        pass
+
+class ComplexDocking(Docking):
+    """A class that represents a docking between a complex receptor and a ligand.
+    
+        --- Attributes ---
+    receptor (MonomerReceptor): a Receptor object that represents a monomer receptor
+    ligand (Ligand): a Ligand object that represents a ligand
+    results_path (str): the file path to where the results are stored
+    ligand_reserved (List[int]): a list of line numbers, one for each solution,
+        the indicates where the "Docked ligand" section begins
+    split_results (List[List[Tuple[int]]]): a list where each sublist is a chain,
+        which contains a list of tuples. Each tuple indicates the line numbers
+        of the start and end of that chain in a results file.
+    """
+
+    def __init__(self, receptor: ComplexReceptor, ligand: Ligand, results_path: str):
+        super().__init__(receptor, ligand, results_path)
+        split_results = []
+
+    def separate_results(self):
+        pass
+
+    def ligand_reserved(self):
+        pass
+
+    def result_dict_generator(self):
+        pass
+
+    def best_result(self):
+        pass
+
+    def color_surfaces(self):
+        pass
+
+class Docker:
+    """A class that represents the controller to create docking pairs and carry
+    out the docking"""
+
+    @staticmethod
+    def start(receptor: str, ligand: str, docking_pdb_path: str):
+        
+        # create docking object
+        docking = Docker.create_docking(receptor, ligand, docking_pdb_path)
+        if docking is None:
+            return
+        
+        docking.hex_docking()
+    
+    def create_receptor(receptor_name: str, receptor_file_path: str):
+        with open(receptor_file_path) as f:
+            is_monomer = True
+            for line in f.readlines():
+                if re.match(r'COMPND   \d CHAIN: \w, \w*', line) != None:
+                    is_monomer = False
+					#if the receptor would be a monomer the regex would be 
+                    # r'COMPND   \d CHAIN: \w;'
+
+					# To make a list of the monomers' labels
+                    print(receptor_name + ' identified as a protein complex')
+                    if line[11:16] == 'CHAIN':
+                        monomers_list = line.split(': ')[-1].split(', ')
+					# The COMPND line ends with ';' therefore it needs to be 
+                    # removed from the last label
+                        monomers_list[-1] = monomers_list[-1][0]
+                        new_receptor = ComplexReceptor(receptor_name, 
+                                                       receptor_file_path, 
+                                                       monomers_list)
+                        return new_receptor
+                    print("Unknown pdb structure, need further investigation")
+
+            if is_monomer:
+                new_receptor = MonomerReceptor(receptor_name,
+                                               receptor_file_path)
+                return new_receptor
+    
+    def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str):
+        
+        # check that the docking combination has not been run before
+        results_path = docking_pdb_path + 'results/' + receptor_name + '_' + ligand_name + '_testing/'
+        if os.path.exists(results_path):
+            print("The docking between {0} and {1} has already been done.".format(receptor_name, ligand_name))
+            return None
+        
+        os.makedirs(results_path)
+        
+        # find receptor file and create receptor object
+        receptor_folder =  docking_pdb_path + 'results/receptor_to_dock'
+        receptor_found = False
+
+        for receptor_file in os.listdir(receptor_folder):
+            if receptor_file[0] != '.' and len(receptor_file.split('.')) == 2 and \
+            receptor_file.split('.')[1] == 'pdb' and \
+            receptor_file[:-4].lower() == receptor_name.lower():
+                receptor_file_found = True
+                receptor_file_path = receptor_folder + '/' + receptor_file
+                receptor = Docker.create_receptor(receptor_name, receptor_file_path)
+
+        # find ligand file and create ligand object
+        ligand_folder = docking_pdb_path + 'results/ligand_to_dock'
+        ligand_file_found = False
+
+        for ligand_file in os.listdir(ligand_folder):
+            if ligand_file[0] != '.' and len(ligand_file.split('.')) == 2 and \
+            ligand_file.split('.')[1] == 'pdb' and \
+            ligand_file[:-4].lower() == ligand_name.lower():
+                ligand_file_found = True
+                ligand_file_path = ligand_folder + '/' + ligand_file
+                ligand = Ligand(ligand_name, ligand_file_path)
+
+        if not receptor_file_found:
+            print("Receptor file not found")
+            return
+        elif not ligand_file_found:
+            print("Ligand file not found")
+            return 
+        
+        # receptor and ligand objects are created and ready for docking
+        if isinstance(receptor, MonomerReceptor):
+            docking = MonomerDocking(receptor, ligand, results_path)
+        else:
+            docking = ComplexDocking(receptor, ligand, results_path)
+        return docking
+            
+if __name__ == "__main__":
+    # receptor = Docker.create_receptor("5gij_ATOM", "/home/diennguyen/BAR_API/docking_test_pdbs/results/receptor_to_dock/5gij_ATOM.pdb")
+    # print(receptor.name)
+    # print(receptor.file_path)
+    # receptor2 = Docker.create_receptor("8g2j", "/home/diennguyen/BAR_API/docking_test_pdbs/results/receptor_to_dock/8g2j.pdb")
+    # print(receptor2.name)
+    # print(receptor2.file_path)
+    # print(receptor2.monomers_list)
+    docking = Docker.create_docking("8g2j", "UPG", "/home/diennguyen/BAR_API/docking_test_pdbs/")
+    print(docking.results_path)
+    print(docking.receptor.file_path)
+    docking.hex_docking()
\ No newline at end of file

From be00a1a5733575f56fa5a06795cd48603b06f003 Mon Sep 17 00:00:00 2001
From: Dien Nguyen <dien.nguyen@mail.utoronto.ca>
Date: Tue, 16 Jan 2024 22:21:03 -0500
Subject: [PATCH 05/35] Add methods for each class Add a controller class
 called Docker, which creates instances of other classes for docking.

---
 api/utils/refactored_docking_utils.py | 450 +++++++++++++++++++++++---
 1 file changed, 400 insertions(+), 50 deletions(-)

diff --git a/api/utils/refactored_docking_utils.py b/api/utils/refactored_docking_utils.py
index 1c811ce..32f80b7 100644
--- a/api/utils/refactored_docking_utils.py
+++ b/api/utils/refactored_docking_utils.py
@@ -3,6 +3,11 @@
 import os
 import re
 import subprocess
+import pickle
+import math
+import sys
+import json
+import datetime
 
 HEX_BIN_PATH = '/home/diennguyen/hex/bin/hex'
 
@@ -41,21 +46,39 @@ class ComplexReceptor(Receptor):
     name (str): the name of the receptor
     file_path (str): the relative path to the receptors pdb file
     monomer_list (List[str]): the list of monomers that make up the complex
-    line_numbers (List[int]): the list of line numbers that separate the monomers
+    line_numbers (List[List[int]]): the list of line numbers that separate the monomers, e.g. [[100,200],[300,500]]
     """
     def __init__(self, name: str, file_path: str, monomers_list: List[str]):
         super().__init__(name, file_path)
         self.monomers_list = monomers_list
-        self.line_numbers = []
+        self.line_numbers = self.separate_monomers()
 
     def separate_monomers(self):
-        pass
+        line_numbers = []
+        file = open(self.file_path, "r")
+        line = file.readline()
+        prev = None
+        curr_line = 0
+        while line != '':
+            # the first line of the first monomer
+            if line[:12] == "ATOM      1 ":
+                prev = curr_line - 1
+            # the last line of a monomer
+            elif line[:3] == 'TER':
+                # line_numbers.append(curr_line)
+                line_numbers.append([prev + 1, curr_line])
+                prev = curr_line
+            curr_line += 1
+            line = file.readline()
+        
+        return line_numbers
+
 
 class Ligand:
     """A class that represents a ligand.
     
     --- Attributes ---
-    name (str): the name of the receptor
+    name (str): the name of the receptor 
     file_path (str): the relative path to the receptors pdb file
     """
     def __init__(self, name: str, file_path: str):
@@ -87,40 +110,194 @@ def hex_docking(self):
     # Function to call Hex, including hard coded settings
 
     # max_docking_solutions set at 5 for testing
-        code = """ open_receptor  """ + self.receptor.file_path + """
-    open_ligand  """ + self.ligand.file_path + """
-    docking_correlation 1
-    docking_score_threshold 0
-    max_docking_solutions 5
-    docking_receptor_stepsize 5.50
-    docking_ligand_stepsize 5.50
-    docking_alpha_stepsize 2.80
-    docking_main_scan 16
-    receptor_origin C-825:VAL-O
-    commit_edits
-    activate_docking
-    save_range 1 100 """ + self.results_path + """ %s pdb""" % (self.receptor.name + '_' + self.ligand.name)
+        hex_command = """ open_receptor  """ + self.receptor.file_path + """
+                open_ligand  """ + self.ligand.file_path + """
+                docking_correlation 1
+                docking_score_threshold 0
+                max_docking_solutions 25
+                docking_receptor_stepsize 5.50
+                docking_ligand_stepsize 5.50
+                docking_alpha_stepsize 2.80
+                docking_main_scan 16
+                receptor_origin C-825:VAL-O
+                commit_edits
+                activate_docking
+                save_range 1 100 """ \
+        + self.results_path + """ %s pdb""" % (self.receptor.name + '_' + self.ligand.name)
+        print(hex_command)
         subprocess.Popen(HEX_BIN_PATH, 
                          stdin=subprocess.PIPE,
                          stderr=subprocess.STDOUT, 
-                         stdout=hex_output_file).communicate(bytes(code.encode('utf-8')))
+                         stdout=hex_output_file).communicate(bytes(hex_command.encode('utf-8')))
         hex_output_file.close()
+        ct = datetime.datetime.now()
+        print("current time:-", ct)
         print("Hex docking completed")
 
-    @abstractmethod
-    def ligand_reserved(self):
-        pass
+    def crte_ligand_reserved_attr(self):
+        """This function populates the Docking instance's ligand_reserved_list attribute
+        with a list of line numbers. Each line number is where the Docked Ligand section
+        begins for each result.
+        For example, [1500, 1499, 1500] means that there are three solutions. In the first
+        solution, the "Docked Ligand" section begins at line 1500. In the second solution,
+        it begins at line 1499, and so on ...
+
+        # TODO: Check if having the same chain name (e.g. C) in BOTH ligand
+        # and receptor will be a problem for execution.
+        """
+        line_numbers = []
+        for filename in os.listdir(self.results_path):
+            if filename[-3:] == 'pdb':
+                file = open(self.results_path + filename, "r")
+                lines = file.readlines()
+                for i in range(len(lines)):
+                    if "Docked ligand coordinates..." in lines[i]:
+                        line_numbers.append(i)
+                        break
+        self.ligand_reserved_list = line_numbers
+    
+    def parse_hex_output(self):
+        hex_output = open(self.results_path + 'hex_output.txt', "r")
+        lines = hex_output.readlines()
+        # line number where the clustering starts and ends
+        result_start = 0
+        result_end = 0
+        for i in range(len(lines)):
+            splitted_line = lines[i].split(" ")
+            if len(splitted_line) > 8 and splitted_line[0] == "Clst":
+                result_start = i + 2
+            if len(splitted_line) > 2 and "save_range" in splitted_line:
+                result_end = i - 2
+        clustering_lines = lines[result_start:result_end]
+        clusters = {}
+        clusters["num_soln"] = len(clustering_lines)
+        for line in clustering_lines:
+            cleaned_line = line.strip().split(" ")
+            res = []
+            # only keep non-blank items in line
+            for ch in cleaned_line:
+                if ch != "":
+                    res.append(ch)
+            clst = int(res[0])
+            sln = int(res[1])
+            if clst not in clusters:
+                clusters[clst] = [sln]
+            else:
+                clusters[clst].append(sln)
+        return(clusters)
+            
 
-    @abstractmethod
-    def result_dict_generator(self):
-        pass
+    def result_dict_generator(self, monomer_number, threshold):
+        receptor_file = open(self.receptor.file_path, "r")
+
+        if monomer_number != -1: # if -1, go to monomer logic
+            # get the start and end line numbers of the monomer in the receptor pdb
+            monomer_start = self.receptor.line_numbers[monomer_number][0]
+            monomer_end = self.receptor.line_numbers[monomer_number][1]
+
+            # get the lines for that receptor only
+            receptor_file_lines = receptor_file.readlines()[monomer_start:monomer_end]
+        else: # Monomer logic
+            receptor_file_lines = receptor_file.readlines()
+
+        # Store every receptor's atom coordinates information as a nested 
+        # dictionary called 'reference'
+        reference = {}
+        for line in receptor_file_lines:
+            splitted_line = line.split()
+            if line[0:4] == 'ATOM':
+                coord = map(float, filter(None, splitted_line[6:9]))
+                if int(splitted_line[5]) in reference:
+                    reference[int(splitted_line[5])][int(splitted_line[1])] = tuple(coord)
+                else:
+                    reference[int(splitted_line[5])] = {int(splitted_line[1]) : tuple(coord)}
+
+        # here, the structure of the reference dict is is {residue: {atom_num :(x, y, z)}},
+
+
+        # The energy for each reference element will be stored in dictionary 'ac'
+        ac = {}
+        result_list = []
+        for filename in os.listdir(self.results_path):
+            if filename[-3:] == 'pdb':
+                result_list.append(filename)
+
+        lowest_en = None # to keep track of lowest energy
+        lowest_en_file = None # the file with the lowest energy
+        lowest_residue_list = None # list of residues of file with lowest energy
+        all_residue_list = []
+
+        cluster_dict = self.parse_hex_output()
+
+        for i in range(len(result_list)):
+            print('current file: ' + result_list[i])
+            energy = ''
+
+            # get the ligand_reserved section of the result file
+            file = open(self.results_path + result_list[i], 'r')
+            ligand_reserved_start = self.ligand_reserved_list[i]
+            ligand_reserved_section = file.readlines()[ligand_reserved_start:]
+
+            # go through ligand reserved section to calculate energy
+            residue_set = set()
+            coor = []
+            for line in ligand_reserved_section:
+                if 'REMARK' in line.split(' ') and 'Energy' in line.split(' '):
+                    cluster_size = len(cluster_dict[i + 1])
+                    total_solutions = cluster_dict['num_soln']
+
+                    # energy is weighed according to the number of solutions
+                    # in that cluster
+                    energy = ((float(line.split(' ')[6][:-1]))/total_solutions) * cluster_size
+
+                    # record values if lowest energy
+                    if lowest_en is None or energy < lowest_en:
+                        lowest_en_file = result_list[i]
+                        lowest_en = energy
+                elif line[:4] == 'ATOM':
+                        # coordinates of one atom
+                        coordinates = tuple(map(float, filter(None, line.split()[6:9])))
+                        coor.append(coordinates)
+            # each atom's coordinates is now stored in the list coordinates
+
+            residue_set = set()
+            for res in reference.keys(): # for each amino acid in the receptor file:
+                distances = []
+                
+                for atom in coor: # for each atom of the ligand
+                    for aa in reference[res].keys(): # for each atom of that amino acid
+						# check if the distance between atoms of the ligands
+						# and of the amino acid are lower than chosen threshold (5)
+                        distance = math.sqrt(sum([(reference[res][aa][0] - atom[0]) ** 2,
+                                                  (reference[res][aa][1] - atom[1]) ** 2,
+                                                  (reference[res][aa][2] - atom[2]) ** 2]))
+                        
+                        distances.append(distance)
+
+                # if at least one of the distances is lower than the threshold, otherwise skip
+                if all(d >= threshold for d in distances):
+                    continue
+                else:
+                    # adding energy (previosly divided by the number of results)
+                    # if found multiple times, we would get an average
+                    if res in ac.keys():
+                        ac[res] += energy
+                    else:
+                        ac[res] = energy
+
+					# Store the resi number into set 
+                residue_set.add(res)
+
+            all_residue_list.append(residue_set)
+
+        return ac
 
     @abstractmethod
     def best_result(self):
         pass
 
     @abstractmethod
-    def color_surfaces(self):
+    def crte_receptor_dict(self):
         pass
 
 class MonomerDocking(Docking):
@@ -130,34 +307,69 @@ class MonomerDocking(Docking):
     receptor (MonomerReceptor): a Receptor object that represents a monomer receptor
     ligand (Ligand): a Ligand object that represents a ligand
     results_path (str): the file path to where the results are stored
-    ligand_reserved (List[int]): a list of line numbers, one for each solution,
+    ligand_reserved_list (List[int]): a list of line numbers, one for each solution,
         the indicates where the "Docked ligand" section begins
     """
 
     def __init__(self, receptor: MonomerReceptor, ligand: Ligand, results_path: str):
         super().__init__(receptor, ligand, results_path)
 
-    def ligand_reserved(self):
+    def best_result(self):
         pass
 
-    def result_dict_generator(self):
-        pass
+    def crte_receptor_dict(self, threshold):
+        receptor_res = {}
+        res_dict = self.result_dict_generator(-1, threshold)
+        ligand_res = {}
+        ligand_res[self.ligand.name] = res_dict
+        receptor_res[self.receptor.name] = ligand_res
+        return receptor_res
+    
+    def normalize_results(self, threshold):
+        results_dict = self.crte_receptor_dict(threshold)
+        receptor_key = list(results_dict.keys())[0]
+        ligand_key = list(results_dict[receptor_key].keys())[0]
+
+        inside_dict = results_dict[receptor_key][ligand_key]
+        abs_max = None
+        abs_min = None
+
+        # To eliminate empty dictionaries that might cause division errors below 
+        # normalized_mon_dicitonary calculations
+        if  inside_dict != {}:
+            abs_min = min(inside_dict.values())
+            abs_max = max(inside_dict.values())
+
+            print("This is the maximum value: ",abs_max, file=sys.stderr)
+            print("This is the minimum value: ",abs_min, file=sys.stderr)
+        
+        all_normalized_results = {}
 
-    def best_result(self):
-        pass
+        normalized_mon_dict = {}
+        normalized_mon_dict[receptor_key] = {}
+        normalized_mon_dict[receptor_key][ligand_key] = {}
+
+        # prevent substraction of equal values or values that doesn't make any sense in terms of accuracy
+        if abs_min == abs_max:
+            for k, v in inside_dict.items():
+                normalized_mon_dict[receptor_key][ligand_key][k] = 1
+        else:
+            for k, v in inside_dict.items():
+                normalized_value = (v - abs_min) / (abs_max - abs_min)
+                normalized_mon_dict[receptor_key][ligand_key][k] = normalized_value
+        all_normalized_results.update(normalized_mon_dict)
+        return all_normalized_results
 
-    def color_surfaces(self):
-        pass
 
 class ComplexDocking(Docking):
     """A class that represents a docking between a complex receptor and a ligand.
     
         --- Attributes ---
-    receptor (MonomerReceptor): a Receptor object that represents a monomer receptor
+    receptor (ComplexReceptor): a Receptor object that represents a monomer receptor
     ligand (Ligand): a Ligand object that represents a ligand
     results_path (str): the file path to where the results are stored
     ligand_reserved (List[int]): a list of line numbers, one for each solution,
-        the indicates where the "Docked ligand" section begins
+        which indicates where the "Docked ligand" section begins
     split_results (List[List[Tuple[int]]]): a list where each sublist is a chain,
         which contains a list of tuples. Each tuple indicates the line numbers
         of the start and end of that chain in a results file.
@@ -165,22 +377,116 @@ class ComplexDocking(Docking):
 
     def __init__(self, receptor: ComplexReceptor, ligand: Ligand, results_path: str):
         super().__init__(receptor, ligand, results_path)
-        split_results = []
+        self.split_results = []
 
     def separate_results(self):
-        pass
-
-    def ligand_reserved(self):
-        pass
+        """For each solution, record the start and end line number (0-based) of
+        each chain. Then, populate self.split_results with the final list.
+        
+        Each sublist represents one solution file. Each tuple in the sublist
+        contains the start and end of one chain. The order of the tuples in
+        the sublist is the same as the order of the monomers in the receptor's
+        monomers_list."""
 
-    def result_dict_generator(self):
-        pass
+        results_files = os.listdir(self.results_path)
+        
+        all_chains = []
+
+        # for each solution
+        for file in results_files:
+            if file[-3:] != "pdb":
+                break
+            result_file = open(self.results_path + file)
+
+            # this list contains indices of the start and end of each chain
+            line_numbers = []
+            line = result_file.readline()
+            curr_line = 0
+            prev = None
+            while line != '':
+                # the start of the first chain
+                if line.split()[0] == "ATOM" and line.split()[1] == "1":
+                # if line.startswith('ATOM      1  '):
+                    prev = curr_line - 1
+
+                # the end of a chain
+                elif line[0:3] == 'TER':
+                    line_numbers.append([prev + 1, curr_line])
+                    prev = curr_line
+
+                # read next line
+                line = result_file.readline()
+                curr_line += 1
+
+        # populate split_results attribute
+        self.split_results = line_numbers
 
     def best_result(self):
         pass
 
-    def color_surfaces(self):
-        pass
+    def crte_receptor_dict(self, threshold):
+        all_monomers = []
+        for i in range(len(self.receptor.monomers_list)):
+            ligand_res = {}
+            res_dict = self.result_dict_generator(i, threshold)
+            ligand_res[self.ligand.name] = res_dict
+            all_monomers.append({self.receptor.name + '_' + self.receptor.monomers_list[i] : ligand_res})
+        return all_monomers
+    
+    def normalize_results(self, threshold):
+        min_values = []
+        max_values = []
+        abs_max = None
+        abs_min = None
+        all_monomers_dict = self.crte_receptor_dict(threshold)
+        for i in range(len(all_monomers_dict)):
+            monomer_dict = all_monomers_dict[i]
+            monomer_key = list(monomer_dict.keys())[0]
+            ligand_key = list(monomer_dict[monomer_key].keys())[0]
+
+            inside_dict = monomer_dict[monomer_key][ligand_key]
+
+            # To eliminate empty dictionaries that might cause division errors below 
+            # normalized_mon_dicitonary calculations
+            if  inside_dict == {}:
+                continue
+            else:
+                mini = min(inside_dict.values())
+                maxi = max(inside_dict.values())
+                
+                min_values.append(mini)
+                max_values.append(maxi)
+                
+                abs_max = max(max_values)
+                abs_min = min(min_values)
+
+                print("This is the maximum value: ",abs_max, file=sys.stderr)
+                print("This is the minimum value: ",abs_min, file=sys.stderr)
+        
+        # Now looping through every monomer, and calculating every residue energy to be 
+        # normalized by using absolute minimum and maximum.
+        all_normalized_results = {}
+        for i in range(len(all_monomers_dict)):
+            monomer_dict = all_monomers_dict[i]
+            monomer_key = list(monomer_dict.keys())[0]
+            ligand_key = list(monomer_dict[monomer_key].keys())[0]
+
+            inside_dict = monomer_dict[monomer_key][ligand_key]
+
+            normalized_mon_dict = {}
+            normalized_mon_dict[monomer_key] = {}
+            normalized_mon_dict[monomer_key][ligand_key] = {}
+
+            # prevent substraction of equal values or values that doesn't make any sense in terms of accuracy
+            if abs_min == abs_max:
+                for k, v in inside_dict.items():
+                    normalized_mon_dict[monomer_key][ligand_key][k] = 1
+            else:
+                for k, v in inside_dict.items():
+                    normalized_value = (v - abs_min) / (abs_max - abs_min)
+                    normalized_mon_dict[monomer_key][ligand_key][k] = normalized_value
+            all_normalized_results.update(normalized_mon_dict)
+        return all_normalized_results
 
 class Docker:
     """A class that represents the controller to create docking pairs and carry
@@ -190,11 +496,27 @@ class Docker:
     def start(receptor: str, ligand: str, docking_pdb_path: str):
         
         # create docking object
+        ct = datetime.datetime.now()
+        print("Starting the docking process at {}".format(ct))
         docking = Docker.create_docking(receptor, ligand, docking_pdb_path)
         if docking is None:
-            return
+            results_path = docking_pdb_path + 'results/' + receptor + '_' + ligand + '_testing/'
+            with open(results_path + "final.json") as json_file:
+                final_json = json.load(json_file)
+            return final_json
         
         docking.hex_docking()
+        if isinstance(docking, ComplexDocking):
+            docking.separate_results()
+        docking.crte_ligand_reserved_attr()
+        normalized_results = docking.normalize_results(5)
+        new_json = docking.results_path + "final.json"
+        with open(new_json,'w') as file:
+            file.write(json.dumps(normalized_results))
+        ct = datetime.datetime.now()
+        print("current time:-", ct)
+        return normalized_results
+
     
     def create_receptor(receptor_name: str, receptor_file_path: str):
         with open(receptor_file_path) as f:
@@ -235,7 +557,7 @@ def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str):
         
         # find receptor file and create receptor object
         receptor_folder =  docking_pdb_path + 'results/receptor_to_dock'
-        receptor_found = False
+        receptor_file_found = False
 
         for receptor_file in os.listdir(receptor_folder):
             if receptor_file[0] != '.' and len(receptor_file.split('.')) == 2 and \
@@ -276,10 +598,38 @@ def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str):
     # print(receptor.name)
     # print(receptor.file_path)
     # receptor2 = Docker.create_receptor("8g2j", "/home/diennguyen/BAR_API/docking_test_pdbs/results/receptor_to_dock/8g2j.pdb")
+    # print(receptor2.line_numbers)
     # print(receptor2.name)
     # print(receptor2.file_path)
     # print(receptor2.monomers_list)
-    docking = Docker.create_docking("8g2j", "UPG", "/home/diennguyen/BAR_API/docking_test_pdbs/")
-    print(docking.results_path)
-    print(docking.receptor.file_path)
-    docking.hex_docking()
\ No newline at end of file
+    # Load the instance from the file
+    # with open('/home/diennguyen/BAR_API/docking_test_pdbs/results/docking_test.pkl', 'rb') as file:
+    #     docking = pickle.load(file)
+    # with open('/home/diennguyen/BAR_API/docking_test_pdbs/results/5gij_TDIF_docking_test.pkl', 'rb') as file:
+    #     docking = pickle.load(file)
+        
+    # docking.ligand_reserved_list = docking.ligand_reserved()
+
+    # docking = Docker.create_docking("8g2j", "UPG", "/home/diennguyen/BAR_API/docking_test_pdbs/")
+    # print(docking.results_path)
+    # print(docking.receptor.file_path)
+    # docking.hex_docking()
+    # docking.ligand_reserved()
+    # docking.separate_results()
+    # print(docking.split_results)
+    # # Save instance to file
+    # with open('/home/diennguyen/BAR_API/docking_test_pdbs/results/docking_test.pkl', 'wb') as file:
+    #     pickle.dump(docking, file)
+
+    # docking = Docker.create_docking("5gij_ATOM", "TDIF", "/home/diennguyen/BAR_API/docking_test_pdbs/")
+    # docking.hex_docking()
+    # with open('/home/diennguyen/BAR_API/docking_test_pdbs/results/5gij_TDIF_docking_test.pkl', 'wb') as file:
+    #     pickle.dump(docking, file)
+
+    # print(docking.receptor.monomers_list)
+    # print(docking.receptor.line_numbers)
+    # docking.ligand_reserved()
+    # print(docking.ligand_reserved_list)
+    # print(docking.normalize_results(5))
+    print(Docker.start("8g2j", "UPG", "/home/diennguyen/BAR_API/docking_test_pdbs/"))
+    # print(Docker.start("5gij_ATOM", "TDIF", "/home/diennguyen/BAR_API/docking_test_pdbs/"))
\ No newline at end of file

From 9001a046bfffd613b9713710e89cf198ee988a62 Mon Sep 17 00:00:00 2001
From: Dien Nguyen <dien.nguyen@mail.utoronto.ca>
Date: Thu, 18 Jan 2024 14:05:03 -0500
Subject: [PATCH 06/35] Fix styling issues and add more documentation

---
 api/utils/refactored_docking_utils.py | 227 ++++++++++++--------------
 1 file changed, 107 insertions(+), 120 deletions(-)

diff --git a/api/utils/refactored_docking_utils.py b/api/utils/refactored_docking_utils.py
index 32f80b7..d7b3176 100644
--- a/api/utils/refactored_docking_utils.py
+++ b/api/utils/refactored_docking_utils.py
@@ -3,7 +3,6 @@
 import os
 import re
 import subprocess
-import pickle
 import math
 import sys
 import json
@@ -11,6 +10,7 @@
 
 HEX_BIN_PATH = '/home/diennguyen/hex/bin/hex'
 
+
 class Receptor(ABC):
     """An abstract class that represents a receptor
 
@@ -23,6 +23,7 @@ def __init__(self, name: str, file_path: str):
         self.name = name
         self.file_path = file_path
 
+
 class MonomerReceptor(Receptor):
     """ A class that represents a receptor that is a monomer, meaning it consists
     of only one chain.
@@ -54,6 +55,11 @@ def __init__(self, name: str, file_path: str, monomers_list: List[str]):
         self.line_numbers = self.separate_monomers()
 
     def separate_monomers(self):
+        """Returns a list of lists, where each sublist contains the line
+        numbers of the start and end of a monomer.
+        For example, receptor X has 3 chains in this order: A, B, C.
+        The method will return [[1, 6], [7, 9], [10, 15]].
+        """
         line_numbers = []
         file = open(self.file_path, "r")
         line = file.readline()
@@ -70,21 +76,22 @@ def separate_monomers(self):
                 prev = curr_line
             curr_line += 1
             line = file.readline()
-        
+
         return line_numbers
 
 
 class Ligand:
     """A class that represents a ligand.
-    
+
     --- Attributes ---
-    name (str): the name of the receptor 
+    name (str): the name of the receptor
     file_path (str): the relative path to the receptors pdb file
     """
     def __init__(self, name: str, file_path: str):
         self.name = name
         self.file_path = file_path
 
+
 class Docking(ABC):
     """An abstract class that represents the docking between a receptor and a
     ligand.
@@ -105,6 +112,8 @@ def __init__(self, receptor: Receptor, ligand: Ligand, results_path: str):
         self.ligand_reserved_list = []
 
     def hex_docking(self):
+        """Run hex docking using the command line.
+        """
         hex_output_file = open(self.results_path + 'hex_output.txt', "w")
 
     # Function to call Hex, including hard coded settings
@@ -124,10 +133,9 @@ def hex_docking(self):
                 activate_docking
                 save_range 1 100 """ \
         + self.results_path + """ %s pdb""" % (self.receptor.name + '_' + self.ligand.name)
-        print(hex_command)
-        subprocess.Popen(HEX_BIN_PATH, 
+        subprocess.Popen(HEX_BIN_PATH,
                          stdin=subprocess.PIPE,
-                         stderr=subprocess.STDOUT, 
+                         stderr=subprocess.STDOUT,
                          stdout=hex_output_file).communicate(bytes(hex_command.encode('utf-8')))
         hex_output_file.close()
         ct = datetime.datetime.now()
@@ -141,9 +149,6 @@ def crte_ligand_reserved_attr(self):
         For example, [1500, 1499, 1500] means that there are three solutions. In the first
         solution, the "Docked Ligand" section begins at line 1500. In the second solution,
         it begins at line 1499, and so on ...
-
-        # TODO: Check if having the same chain name (e.g. C) in BOTH ligand
-        # and receptor will be a problem for execution.
         """
         line_numbers = []
         for filename in os.listdir(self.results_path):
@@ -155,8 +160,13 @@ def crte_ligand_reserved_attr(self):
                         line_numbers.append(i)
                         break
         self.ligand_reserved_list = line_numbers
-    
+
     def parse_hex_output(self):
+        """Returns a dictionary where the key is the cluster number and the
+        value is a list of solution numbers. One of the keys is "num_soln",
+        where its value is the total number of solutions.
+        For example: {num_soln : 5, 1 : [2, 4], 2 : [1, 3, 5]}
+        """
         hex_output = open(self.results_path + 'hex_output.txt', "r")
         lines = hex_output.readlines()
         # line number where the clustering starts and ends
@@ -184,23 +194,27 @@ def parse_hex_output(self):
                 clusters[clst] = [sln]
             else:
                 clusters[clst].append(sln)
-        return(clusters)
-            
+        return clusters
 
     def result_dict_generator(self, monomer_number, threshold):
+        """Return a dictionary where each key is a residue and each value is
+        the energy. The distance between each residue in the monomer and each
+        atom in the ligand is calculated, and only residues with distances
+        below the threshold are included.
+        """
         receptor_file = open(self.receptor.file_path, "r")
 
-        if monomer_number != -1: # if -1, go to monomer logic
+        if monomer_number != -1:  # if -1, go to monomer logic
             # get the start and end line numbers of the monomer in the receptor pdb
             monomer_start = self.receptor.line_numbers[monomer_number][0]
             monomer_end = self.receptor.line_numbers[monomer_number][1]
 
             # get the lines for that receptor only
             receptor_file_lines = receptor_file.readlines()[monomer_start:monomer_end]
-        else: # Monomer logic
+        else:  # Monomer logic
             receptor_file_lines = receptor_file.readlines()
 
-        # Store every receptor's atom coordinates information as a nested 
+        # Store every receptor's atom coordinates information as a nested
         # dictionary called 'reference'
         reference = {}
         for line in receptor_file_lines:
@@ -214,7 +228,6 @@ def result_dict_generator(self, monomer_number, threshold):
 
         # here, the structure of the reference dict is is {residue: {atom_num :(x, y, z)}},
 
-
         # The energy for each reference element will be stored in dictionary 'ac'
         ac = {}
         result_list = []
@@ -222,15 +235,12 @@ def result_dict_generator(self, monomer_number, threshold):
             if filename[-3:] == 'pdb':
                 result_list.append(filename)
 
-        lowest_en = None # to keep track of lowest energy
-        lowest_en_file = None # the file with the lowest energy
-        lowest_residue_list = None # list of residues of file with lowest energy
+        lowest_en = None  # to keep track of lowest energy
         all_residue_list = []
 
         cluster_dict = self.parse_hex_output()
 
         for i in range(len(result_list)):
-            print('current file: ' + result_list[i])
             energy = ''
 
             # get the ligand_reserved section of the result file
@@ -252,26 +262,25 @@ def result_dict_generator(self, monomer_number, threshold):
 
                     # record values if lowest energy
                     if lowest_en is None or energy < lowest_en:
-                        lowest_en_file = result_list[i]
                         lowest_en = energy
                 elif line[:4] == 'ATOM':
-                        # coordinates of one atom
-                        coordinates = tuple(map(float, filter(None, line.split()[6:9])))
-                        coor.append(coordinates)
+                    # coordinates of one atom
+                    coordinates = tuple(map(float, filter(None, line.split()[6:9])))
+                    coor.append(coordinates)
             # each atom's coordinates is now stored in the list coordinates
 
             residue_set = set()
-            for res in reference.keys(): # for each amino acid in the receptor file:
+            for res in reference.keys():  # for each amino acid in the receptor file:
                 distances = []
-                
-                for atom in coor: # for each atom of the ligand
-                    for aa in reference[res].keys(): # for each atom of that amino acid
-						# check if the distance between atoms of the ligands
-						# and of the amino acid are lower than chosen threshold (5)
+
+                for atom in coor:  # for each atom of the ligand
+                    for aa in reference[res].keys():  # for each atom of that amino acid
+                        # check if the distance between atoms of the ligands
+                        # and of the amino acid are lower than chosen threshold (5)
                         distance = math.sqrt(sum([(reference[res][aa][0] - atom[0]) ** 2,
                                                   (reference[res][aa][1] - atom[1]) ** 2,
                                                   (reference[res][aa][2] - atom[2]) ** 2]))
-                        
+
                         distances.append(distance)
 
                 # if at least one of the distances is lower than the threshold, otherwise skip
@@ -285,7 +294,7 @@ def result_dict_generator(self, monomer_number, threshold):
                     else:
                         ac[res] = energy
 
-					# Store the resi number into set 
+                # Store the resi number into set
                 residue_set.add(res)
 
             all_residue_list.append(residue_set)
@@ -300,9 +309,14 @@ def best_result(self):
     def crte_receptor_dict(self):
         pass
 
+    @abstractmethod
+    def normalize_results(self, threshold):
+        pass
+
+
 class MonomerDocking(Docking):
     """A class the represents a docking between a monomer receptor and a monomer.
-    
+
     --- Attributes ---
     receptor (MonomerReceptor): a Receptor object that represents a monomer receptor
     ligand (Ligand): a Ligand object that represents a ligand
@@ -318,14 +332,21 @@ def best_result(self):
         pass
 
     def crte_receptor_dict(self, threshold):
+        """"Return a dictionary that contains the residue-energy
+        dictionary of the monomer. This is not necessary, but maintains
+        consistency between monomer and complex receptor dictionaries.
+        """
         receptor_res = {}
         res_dict = self.result_dict_generator(-1, threshold)
         ligand_res = {}
         ligand_res[self.ligand.name] = res_dict
         receptor_res[self.receptor.name] = ligand_res
         return receptor_res
-    
+
     def normalize_results(self, threshold):
+        """Return normalized residue-energy dictionaries for the
+        receptor.
+        """
         results_dict = self.crte_receptor_dict(threshold)
         receptor_key = list(results_dict.keys())[0]
         ligand_key = list(results_dict[receptor_key].keys())[0]
@@ -334,15 +355,12 @@ def normalize_results(self, threshold):
         abs_max = None
         abs_min = None
 
-        # To eliminate empty dictionaries that might cause division errors below 
+        # To eliminate empty dictionaries that might cause division errors below
         # normalized_mon_dicitonary calculations
-        if  inside_dict != {}:
+        if inside_dict != {}:
             abs_min = min(inside_dict.values())
             abs_max = max(inside_dict.values())
 
-            print("This is the maximum value: ",abs_max, file=sys.stderr)
-            print("This is the minimum value: ",abs_min, file=sys.stderr)
-        
         all_normalized_results = {}
 
         normalized_mon_dict = {}
@@ -363,7 +381,7 @@ def normalize_results(self, threshold):
 
 class ComplexDocking(Docking):
     """A class that represents a docking between a complex receptor and a ligand.
-    
+
         --- Attributes ---
     receptor (ComplexReceptor): a Receptor object that represents a monomer receptor
     ligand (Ligand): a Ligand object that represents a ligand
@@ -382,15 +400,13 @@ def __init__(self, receptor: ComplexReceptor, ligand: Ligand, results_path: str)
     def separate_results(self):
         """For each solution, record the start and end line number (0-based) of
         each chain. Then, populate self.split_results with the final list.
-        
+
         Each sublist represents one solution file. Each tuple in the sublist
         contains the start and end of one chain. The order of the tuples in
         the sublist is the same as the order of the monomers in the receptor's
-        monomers_list."""
-
+        monomers_list.
+        """
         results_files = os.listdir(self.results_path)
-        
-        all_chains = []
 
         # for each solution
         for file in results_files:
@@ -406,7 +422,7 @@ def separate_results(self):
             while line != '':
                 # the start of the first chain
                 if line.split()[0] == "ATOM" and line.split()[1] == "1":
-                # if line.startswith('ATOM      1  '):
+                    # if line.startswith('ATOM      1  '):
                     prev = curr_line - 1
 
                 # the end of a chain
@@ -432,7 +448,7 @@ def crte_receptor_dict(self, threshold):
             ligand_res[self.ligand.name] = res_dict
             all_monomers.append({self.receptor.name + '_' + self.receptor.monomers_list[i] : ligand_res})
         return all_monomers
-    
+
     def normalize_results(self, threshold):
         min_values = []
         max_values = []
@@ -446,24 +462,24 @@ def normalize_results(self, threshold):
 
             inside_dict = monomer_dict[monomer_key][ligand_key]
 
-            # To eliminate empty dictionaries that might cause division errors below 
+            # To eliminate empty dictionaries that might cause division errors below
             # normalized_mon_dicitonary calculations
-            if  inside_dict == {}:
+            if inside_dict == {}:
                 continue
             else:
                 mini = min(inside_dict.values())
                 maxi = max(inside_dict.values())
-                
+
                 min_values.append(mini)
                 max_values.append(maxi)
-                
+
                 abs_max = max(max_values)
                 abs_min = min(min_values)
 
-                print("This is the maximum value: ",abs_max, file=sys.stderr)
-                print("This is the minimum value: ",abs_min, file=sys.stderr)
-        
-        # Now looping through every monomer, and calculating every residue energy to be 
+                print("This is the maximum value: ", abs_max, file=sys.stderr)
+                print("This is the minimum value: ", abs_min, file=sys.stderr)
+
+        # Now looping through every monomer, and calculating every residue energy to be
         # normalized by using absolute minimum and maximum.
         all_normalized_results = {}
         for i in range(len(all_monomers_dict)):
@@ -488,54 +504,60 @@ def normalize_results(self, threshold):
             all_normalized_results.update(normalized_mon_dict)
         return all_normalized_results
 
+
 class Docker:
     """A class that represents the controller to create docking pairs and carry
-    out the docking"""
+    out the docking.
+    """
 
     @staticmethod
     def start(receptor: str, ligand: str, docking_pdb_path: str):
-        
+        """Start the docking process and analyze results. Return the
+        normalized residue-energyy dictionary.
+        """
         # create docking object
         ct = datetime.datetime.now()
         print("Starting the docking process at {}".format(ct))
         docking = Docker.create_docking(receptor, ligand, docking_pdb_path)
         if docking is None:
-            results_path = docking_pdb_path + 'results/' + receptor + '_' + ligand + '_testing/'
+            results_path = docking_pdb_path + 'results/' + receptor + '_' + ligand + '/'
             with open(results_path + "final.json") as json_file:
                 final_json = json.load(json_file)
             return final_json
-        
+
         docking.hex_docking()
         if isinstance(docking, ComplexDocking):
             docking.separate_results()
         docking.crte_ligand_reserved_attr()
         normalized_results = docking.normalize_results(5)
         new_json = docking.results_path + "final.json"
-        with open(new_json,'w') as file:
+        with open(new_json, 'w') as file:
             file.write(json.dumps(normalized_results))
         ct = datetime.datetime.now()
         print("current time:-", ct)
         return normalized_results
 
-    
     def create_receptor(receptor_name: str, receptor_file_path: str):
+        """Return a new receptor with the name receptor_name, by parsing
+        the file at recepter_file_path.
+        """
         with open(receptor_file_path) as f:
             is_monomer = True
             for line in f.readlines():
-                if re.match(r'COMPND   \d CHAIN: \w, \w*', line) != None:
+                if re.match(r'COMPND   \d CHAIN: \w, \w*', line) is not None:
                     is_monomer = False
-					#if the receptor would be a monomer the regex would be 
+                    # if the receptor would be a monomer the regex would be
                     # r'COMPND   \d CHAIN: \w;'
 
-					# To make a list of the monomers' labels
+                    # To make a list of the monomers' labels
                     print(receptor_name + ' identified as a protein complex')
                     if line[11:16] == 'CHAIN':
                         monomers_list = line.split(': ')[-1].split(', ')
-					# The COMPND line ends with ';' therefore it needs to be 
-                    # removed from the last label
+                        # The COMPND line ends with ';' therefore it needs to be
+                        # removed from the last label
                         monomers_list[-1] = monomers_list[-1][0]
-                        new_receptor = ComplexReceptor(receptor_name, 
-                                                       receptor_file_path, 
+                        new_receptor = ComplexReceptor(receptor_name,
+                                                       receptor_file_path,
                                                        monomers_list)
                         return new_receptor
                     print("Unknown pdb structure, need further investigation")
@@ -544,25 +566,27 @@ def create_receptor(receptor_name: str, receptor_file_path: str):
                 new_receptor = MonomerReceptor(receptor_name,
                                                receptor_file_path)
                 return new_receptor
-    
+
     def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str):
-        
+        """Return a docking pair, which contains a Receptor and a Ligand, as
+        specified by receptor_name and ligand_name, respectively.
+        """
         # check that the docking combination has not been run before
-        results_path = docking_pdb_path + 'results/' + receptor_name + '_' + ligand_name + '_testing/'
+        results_path = docking_pdb_path + 'results/' + receptor_name + '_' + ligand_name + '/'
         if os.path.exists(results_path):
             print("The docking between {0} and {1} has already been done.".format(receptor_name, ligand_name))
             return None
-        
+
         os.makedirs(results_path)
-        
+
         # find receptor file and create receptor object
-        receptor_folder =  docking_pdb_path + 'results/receptor_to_dock'
+        receptor_folder = docking_pdb_path + 'results/receptor_to_dock'
         receptor_file_found = False
 
         for receptor_file in os.listdir(receptor_folder):
             if receptor_file[0] != '.' and len(receptor_file.split('.')) == 2 and \
-            receptor_file.split('.')[1] == 'pdb' and \
-            receptor_file[:-4].lower() == receptor_name.lower():
+                receptor_file.split('.')[1] == 'pdb' and \
+                    receptor_file[:-4].lower() == receptor_name.lower():
                 receptor_file_found = True
                 receptor_file_path = receptor_folder + '/' + receptor_file
                 receptor = Docker.create_receptor(receptor_name, receptor_file_path)
@@ -573,8 +597,8 @@ def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str):
 
         for ligand_file in os.listdir(ligand_folder):
             if ligand_file[0] != '.' and len(ligand_file.split('.')) == 2 and \
-            ligand_file.split('.')[1] == 'pdb' and \
-            ligand_file[:-4].lower() == ligand_name.lower():
+                ligand_file.split('.')[1] == 'pdb' and \
+                    ligand_file[:-4].lower() == ligand_name.lower():
                 ligand_file_found = True
                 ligand_file_path = ligand_folder + '/' + ligand_file
                 ligand = Ligand(ligand_name, ligand_file_path)
@@ -584,52 +608,15 @@ def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str):
             return
         elif not ligand_file_found:
             print("Ligand file not found")
-            return 
-        
+            return
+
         # receptor and ligand objects are created and ready for docking
         if isinstance(receptor, MonomerReceptor):
             docking = MonomerDocking(receptor, ligand, results_path)
         else:
             docking = ComplexDocking(receptor, ligand, results_path)
         return docking
-            
+
+
 if __name__ == "__main__":
-    # receptor = Docker.create_receptor("5gij_ATOM", "/home/diennguyen/BAR_API/docking_test_pdbs/results/receptor_to_dock/5gij_ATOM.pdb")
-    # print(receptor.name)
-    # print(receptor.file_path)
-    # receptor2 = Docker.create_receptor("8g2j", "/home/diennguyen/BAR_API/docking_test_pdbs/results/receptor_to_dock/8g2j.pdb")
-    # print(receptor2.line_numbers)
-    # print(receptor2.name)
-    # print(receptor2.file_path)
-    # print(receptor2.monomers_list)
-    # Load the instance from the file
-    # with open('/home/diennguyen/BAR_API/docking_test_pdbs/results/docking_test.pkl', 'rb') as file:
-    #     docking = pickle.load(file)
-    # with open('/home/diennguyen/BAR_API/docking_test_pdbs/results/5gij_TDIF_docking_test.pkl', 'rb') as file:
-    #     docking = pickle.load(file)
-        
-    # docking.ligand_reserved_list = docking.ligand_reserved()
-
-    # docking = Docker.create_docking("8g2j", "UPG", "/home/diennguyen/BAR_API/docking_test_pdbs/")
-    # print(docking.results_path)
-    # print(docking.receptor.file_path)
-    # docking.hex_docking()
-    # docking.ligand_reserved()
-    # docking.separate_results()
-    # print(docking.split_results)
-    # # Save instance to file
-    # with open('/home/diennguyen/BAR_API/docking_test_pdbs/results/docking_test.pkl', 'wb') as file:
-    #     pickle.dump(docking, file)
-
-    # docking = Docker.create_docking("5gij_ATOM", "TDIF", "/home/diennguyen/BAR_API/docking_test_pdbs/")
-    # docking.hex_docking()
-    # with open('/home/diennguyen/BAR_API/docking_test_pdbs/results/5gij_TDIF_docking_test.pkl', 'wb') as file:
-    #     pickle.dump(docking, file)
-
-    # print(docking.receptor.monomers_list)
-    # print(docking.receptor.line_numbers)
-    # docking.ligand_reserved()
-    # print(docking.ligand_reserved_list)
-    # print(docking.normalize_results(5))
     print(Docker.start("8g2j", "UPG", "/home/diennguyen/BAR_API/docking_test_pdbs/"))
-    # print(Docker.start("5gij_ATOM", "TDIF", "/home/diennguyen/BAR_API/docking_test_pdbs/"))
\ No newline at end of file

From 4195dc1f327bd71cd65383fc709cd82b5267d1e7 Mon Sep 17 00:00:00 2001
From: Dien Nguyen <dien.nguyen@mail.utoronto.ca>
Date: Thu, 1 Feb 2024 15:57:55 -0500
Subject: [PATCH 07/35] change paths to match the BAR's file structure

---
 api/resources/snps.py                 | 54 +++++++++++----------------
 api/utils/refactored_docking_utils.py | 50 ++++++++++++++++---------
 2 files changed, 54 insertions(+), 50 deletions(-)
 mode change 100644 => 100755 api/resources/snps.py
 mode change 100644 => 100755 api/utils/refactored_docking_utils.py

diff --git a/api/resources/snps.py b/api/resources/snps.py
old mode 100644
new mode 100755
index fc9c0a4..d721be0
--- a/api/resources/snps.py
+++ b/api/resources/snps.py
@@ -26,7 +26,7 @@
 from api.utils.hotspot_utils import HotspotUtils
 import sys
 from api import db, cache, limiter
-from api.utils.docking_utils import Protein_Docking
+from api.utils.refactored_docking_utils import Docker
 
 
 snps = Namespace("SNPs", description="Information about SNPs", path="/snps")
@@ -47,45 +47,32 @@
     default="None",
 )
 
+
 @snps.route("/docking/<receptor>/<ligand>")
 class Docking(Resource):
+    decorators = [limiter.limit("2/minute")]    
+
     @snps.param("receptor", _in="path", default="bri1")
     @snps.param("ligand", _in="path", default="brass")
     def get(self, receptor, ligand):
-        # receptor= escape(receptor)
-        # ligand = escape(ligand)
-
-        #arabidopsis_pdb_path = "/var/www/html/eplant_legacy/java/Phyre2-Models/Phyre2_"
-        #poplar_pdb_path = "/var/www/html/eplant_poplar/pdb/"
-        #tomato_pdb_path = "/var/www/html/eplant_tomato/pdbc/"
-        #docking_pdb_link = "//bar.utoronto.ca/docking-pdbs/"
-        #docking_pdb_path = "/var/www/html/docking-pdbs/"
-        #arabidopsis_pdb_path = "/home/metyumelkonyan/BCB330/results/receptor_to_dock"
-        #poplar_pdb_path = "/home/metyumelkonyan/BCB330/results/receptor_to_dock"
-        #tomato_pdb_path = "/home/metyumelkonyan/BCB330/results/receptor_to_dock"
+        receptor = escape(receptor)
+        ligand = escape(ligand)
+
+        # TODO: Clean comments left by metyu before commit
+    
         docking_pdb_link = "//bar.utoronto.ca/docking-pdbs/"
-        docking_pdb_path = "/home/diennguyen/BAR_API/docking_test_pdbs"
+        docking_pdb_path = "/DATA/HEX_API/RESULTS/"
 
+        # TODO: Then add regex check to receptors/ligands (For Arabidopsis genes, simply reuse 
+        # is_arabidopsis_gene_valid; but you will need make regex check for your SDFs)
         #Receptors can be adjusted please adjust the file format on the directories as well (sdf vs pdb)
-        # receptor = "3riz"
-        # ligand = "TDR"
-        # receptor = "5gij_ATOM"
-        # ligand = "TDIF"
-
-        # if BARUtils.is_arabidopsis_gene_valid(receptor_pdb):
-        #     receptor_pdb_path = arabidopsis_pdb_path + \
-        #                         receptor_pdb.upper() + ".pdb"
-        # elif BARUtils.is_poplar_gene_valid(receptor_pdb):
-        #     receptor_pdb_path = (
-        #             poplar_pdb_path + BARUtils.format_poplar(
-        #         receptor_pdb) + ".pdb"
-        #     )
-        # elif BARUtils.is_tomato_gene_valid(receptor_pdb, True):
-        #     receptor_pdb_path = tomato_pdb_path + receptor_pdb.capitalize() + ".pdb"
-        # else:
-        #     return BARUtils.error_exit("Invalid receptor pdb gene id"), 400
-
-        #ligand_sdf_path = "/home/yyu/public_html/library" + ligand + ".pdb"
+
+        if not BARUtils.is_arabidopsis_gene_valid(receptor):
+            return BARUtils.error_exit("Invalid arapbidopsis pdb gene id"), 400
+        
+        matched = re.search("[a-z]", ligand)
+        if matched is None:
+            return BARUtils.error_exit("Invalid ligand name"), 400
 
         docking_file_name = receptor.upper() + "-" + ligand.upper() + \
                 "-docking0001.pdb "
@@ -93,7 +80,8 @@ def get(self, receptor, ligand):
 
         # Importing start function to initiate docking_utils  file
 
-        Protein_Docking.start(receptor,ligand,docking_pdb_path)
+        final_json = Docker.start(receptor, ligand, docking_pdb_path)
+        return BARUtils.success_exit(final_json)
 
 
 @snps.route("/phenix/<fixed_pdb>/<moving_pdb>")
diff --git a/api/utils/refactored_docking_utils.py b/api/utils/refactored_docking_utils.py
old mode 100644
new mode 100755
index d7b3176..ab7d15d
--- a/api/utils/refactored_docking_utils.py
+++ b/api/utils/refactored_docking_utils.py
@@ -8,7 +8,7 @@
 import json
 import datetime
 
-HEX_BIN_PATH = '/home/diennguyen/hex/bin/hex'
+HEX_BIN_PATH = '/usr/local/bin/hex/bin/hex'
 
 
 class Receptor(ABC):
@@ -520,10 +520,15 @@ def start(receptor: str, ligand: str, docking_pdb_path: str):
         print("Starting the docking process at {}".format(ct))
         docking = Docker.create_docking(receptor, ligand, docking_pdb_path)
         if docking is None:
-            results_path = docking_pdb_path + 'results/' + receptor + '_' + ligand + '/'
+            receptor = receptor.split('.')[0]
+            results_path = docking_pdb_path + receptor + '_' + ligand + '/'
             with open(results_path + "final.json") as json_file:
                 final_json = json.load(json_file)
             return final_json
+        elif docking == "Receptor file not found":
+            return "Receptor file not found"
+        elif docking == "Ligand file not found":
+            return "Ligand file not found"
 
         docking.hex_docking()
         if isinstance(docking, ComplexDocking):
@@ -572,43 +577,52 @@ def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str):
         specified by receptor_name and ligand_name, respectively.
         """
         # check that the docking combination has not been run before
-        results_path = docking_pdb_path + 'results/' + receptor_name + '_' + ligand_name + '/'
-        if os.path.exists(results_path):
-            print("The docking between {0} and {1} has already been done.".format(receptor_name, ligand_name))
+        # results_path = docking_pdb_path + 'RESULTS/' + receptor_name + '_' + ligand_name + '/'
+        if '.' in receptor_name:
+            receptor_name = receptor_name[:receptor_name.index('.')]
+        results_path = docking_pdb_path + receptor_name + '_' + ligand_name + '/'
+        print(results_path)
+        if os.path.exists(results_path): #or \
+            #os.path.exists(docking_pdb_path + receptor_name + '.1_' + ligand_name + '/'):
+            print("The docking between {0} and {1} has already been done.".format(receptor_name, 
+                                                                                  ligand_name))
             return None
 
+
         os.makedirs(results_path)
 
         # find receptor file and create receptor object
-        receptor_folder = docking_pdb_path + 'results/receptor_to_dock'
+        receptor_folder = '/DATA/AF2-pdbs/Arabidopsis/AF2_Ath_PDBs_FAs_renamed/'
+        # receptor_folder = '/var/www/html/eplant/AF2_Ath_PDBs'
         receptor_file_found = False
 
         for receptor_file in os.listdir(receptor_folder):
-            if receptor_file[0] != '.' and len(receptor_file.split('.')) == 2 and \
-                receptor_file.split('.')[1] == 'pdb' and \
-                    receptor_file[:-4].lower() == receptor_name.lower():
+            # if receptor_file[0] != '.' and len(receptor_file.split('.')) == 2 and \
+            #     receptor_file[-4:] == 'pdb' and \
+            #         receptor_file[:-4].lower() == receptor_name.lower():
+            if receptor_file[0] != '.' and receptor_file[-4:] == '.pdb' and \
+                    (receptor_name in receptor_file):
                 receptor_file_found = True
-                receptor_file_path = receptor_folder + '/' + receptor_file
+                receptor_file_path = receptor_folder + receptor_file
                 receptor = Docker.create_receptor(receptor_name, receptor_file_path)
 
         # find ligand file and create ligand object
-        ligand_folder = docking_pdb_path + 'results/ligand_to_dock'
+        # ligand_folder = docking_pdb_path + 'HEX_SELECTED_LIGANDS/'
+        ligand_folder = '/DATA/HEX_API/HEX_SELECTED_LIGANDS/'
         ligand_file_found = False
 
         for ligand_file in os.listdir(ligand_folder):
             if ligand_file[0] != '.' and len(ligand_file.split('.')) == 2 and \
-                ligand_file.split('.')[1] == 'pdb' and \
+                ligand_file.split('.')[1] == 'sdf' and \
                     ligand_file[:-4].lower() == ligand_name.lower():
                 ligand_file_found = True
                 ligand_file_path = ligand_folder + '/' + ligand_file
                 ligand = Ligand(ligand_name, ligand_file_path)
 
         if not receptor_file_found:
-            print("Receptor file not found")
-            return
+            return "Receptor file not found"
         elif not ligand_file_found:
-            print("Ligand file not found")
-            return
+            return "Ligand file not found"
 
         # receptor and ligand objects are created and ready for docking
         if isinstance(receptor, MonomerReceptor):
@@ -619,4 +633,6 @@ def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str):
 
 
 if __name__ == "__main__":
-    print(Docker.start("8g2j", "UPG", "/home/diennguyen/BAR_API/docking_test_pdbs/"))
+    # print(Docker.start("8g2j", "UPG", "/DATA/HEX_API/"))
+    print(Docker.start("AT1G66340", "6325_Ethylene", "/DATA/HEX_API/RESULTS/"))
+    

From 6756ff473ba435877b32354641733d33f5d2b946 Mon Sep 17 00:00:00 2001
From: Dien Nguyen <dien.nguyen@mail.utoronto.ca>
Date: Thu, 1 Feb 2024 16:05:42 -0500
Subject: [PATCH 08/35] add code for mapping sdf names to number

---
 api/utils/sdf_mapping.py | 49 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)
 create mode 100644 api/utils/sdf_mapping.py

diff --git a/api/utils/sdf_mapping.py b/api/utils/sdf_mapping.py
new file mode 100644
index 0000000..9bf2033
--- /dev/null
+++ b/api/utils/sdf_mapping.py
@@ -0,0 +1,49 @@
+import os
+import re
+from typing import List
+
+def get_substance_name(filename: str, folder_path: str):
+    file = open(folder_path + filename, "r")
+    line = file.readline().strip()
+    if line == "":
+        return None
+    while line != "> <PUBCHEM_SUBSTANCE_SYNONYM>" and line != "$$$$":
+        line = file.readline().strip()
+    # right now, line == "> <PUBCHEM_SUBSTANCE_SYNONYM>" or line is empty
+    if line == "$$$$":
+        return None
+    line = file.readline().strip()
+    names = []
+    while line != "":
+        if len(line) > 0 and line[0] == ">":
+            break
+    # while line != "":
+        # check regex to see if it contains lowercase
+        # matched = re.search("[a-z]", line)
+        # if matched is not None:
+        #     return line
+        # else:
+        #     line = file.readline().strip()
+        names.append(line)
+        line = file.readline().strip()
+    return names
+
+def create_mapping(folder_path: str):
+    mapped_sdf = {}
+    sdf_files = os.listdir(folder_path)
+    for file in sdf_files:
+        if file[0] != "." and file[-4:] == ".sdf":
+            file_number = file[:file.index("_")]
+            name = file[file.index("_") + 1:-4]
+            # the commented out section is for sdfs that have not been filtered
+            # names = get_substance_name(file, folder_path)
+            # print(name)
+            # sdf_number = file.split(".")[0]
+            # mapped_sdf[sdf_number] = ",".join(names)
+            mapped_sdf[file_number] = name  # check if want to map file_number or file name
+    return mapped_sdf
+
+if __name__ == "__main__":
+    sdf_folder_paths = ['/home/diennguyen/BAR_API/HEX_API/HEX_SMALL_MOLECULES']
+    print(create_mapping(sdf_folder_paths[0]))
+

From 740cce9b59ba4c94fa399050cc3410c1dface6da Mon Sep 17 00:00:00 2001
From: Dien Nguyen <dien.nguyen@mail.utoronto.ca>
Date: Thu, 1 Feb 2024 22:49:38 -0500
Subject: [PATCH 09/35] Replace docking_utils.py and remove old version

---
 api/resources/snps.py      |    2 +-
 api/utils/docking_utils.py | 1175 +++++++++++++++++++-----------------
 2 files changed, 636 insertions(+), 541 deletions(-)

diff --git a/api/resources/snps.py b/api/resources/snps.py
index d721be0..aa5616e 100755
--- a/api/resources/snps.py
+++ b/api/resources/snps.py
@@ -26,7 +26,7 @@
 from api.utils.hotspot_utils import HotspotUtils
 import sys
 from api import db, cache, limiter
-from api.utils.refactored_docking_utils import Docker
+from api.utils.docking_utils import Docker
 
 
 snps = Namespace("SNPs", description="Information about SNPs", path="/snps")
diff --git a/api/utils/docking_utils.py b/api/utils/docking_utils.py
index cd1d604..ab7d15d 100755
--- a/api/utils/docking_utils.py
+++ b/api/utils/docking_utils.py
@@ -1,543 +1,638 @@
-from flask import Flask, flash, request, redirect, url_for, send_from_directory
-from flask_restx import Api
-from flask.templating import render_template
-from werkzeug.utils import secure_filename
-import re
+from abc import ABC, abstractmethod
+from typing import List
 import os
-import math
-import shutil
-import json
+import re
 import subprocess
-import random
+import math
 import sys
-from datetime import date
-
-HEX_BIN_PATH = '/home/diennguyen/hex/bin/hex'
-
-def hex_docking(rec_lig,rec_lig2,receptor, ligand, docking_pdb_path):
-
-	hex_output = open(docking_pdb_path + "/results/" + rec_lig + 
-				   "/{}_hex_output.txt".format(rec_lig), "w")
-
-# Function to call Hex, including hard coded settings
-
-# max_docking_solutions set at 5 for testing
-	code = """ open_receptor  """ + docking_pdb_path + """/results/receptor_to_dock/""" + receptor + """.pdb
-open_ligand  """ + docking_pdb_path +"""/results/ligand_to_dock/""" + ligand + """.pdb
-docking_correlation 1
-docking_score_threshold 0
-max_docking_solutions 25
-docking_receptor_stepsize 5.50
-docking_ligand_stepsize 5.50
-docking_alpha_stepsize 2.80
-docking_main_scan 16
-receptor_origin C-825:VAL-O
-commit_edits
-activate_docking
-save_range 1 100 """ + docking_pdb_path + """/results/%s/%s/result %s pdb""" % (rec_lig, rec_lig2, rec_lig)
-	subprocess.Popen(HEX_BIN_PATH, stdin=subprocess.PIPE, stderr=subprocess.STDOUT, stdout=hex_output).communicate(bytes(code.encode('utf-8')))
-	hex_output.close()
-
-
-
-
-def best_result(file_name, monomer, rec_lig, receptor, ligand, docking_pdb_path):
-
-	# Function to generate the "best docking results", being the result with the best score and with the residue with the best contact frequency
-
-	file_name_dir = str(docking_pdb_path + '/results/'+ receptor + '_' + ligand + '/' + receptor + '_' + monomer + '_' + ligand + '/result/') #directory for the docking results
-	file_name_path = str(file_name_dir + file_name[:-20] + '.pdb') #directory for the result, identifies as the best result
-	des1 = file_name_dir + 'best_docking_results_for_'+ file_name[:-24] + '.pdb' #destination directory for the best_docking_result file
-	shutil.copyfile(file_name_path,des1)
-
-	#Same thing done with the ligand file only
-	ori2 = docking_pdb_path + '/results/'+ receptor + '_' + ligand + '/' + receptor + '_' + monomer + '_' + ligand + '/ligand_reserved_pdb/' + file_name
-	des2 = docking_pdb_path + '/results/'+ receptor + '_' + ligand + '/' + receptor + '_' + monomer + '_' + ligand + '/ligand_reserved_pdb/best_docking_results.pdb'
-	shutil.copyfile(ori2,des2)
-
-
-	# This is to create a copy of that file with 'Z' as the name of the chain in the ligand,
-	# it is important for the 3dsjmol visualization
-
-	with open(str(file_name_dir + 'best_docking_results_for_' + file_name[:-24] + '.pdb'), 'r') as file: #to not modify the chain name for the protein chains
-		lines = file.readlines()
-		subpart1 = lines[:lines.index(
-			'REMARK    Docked ligand coordinates...\n')]  #subpart 1 is from start to 1st line in ligand coordinates
-		subpart2 = lines[lines.index(
-			'REMARK    Docked ligand coordinates...\n'):] #subpart 2 from 1st line in ligand coordinates to end of file
-	with open(str(file_name_dir + 'best_docking_results_for_' + receptor + '_' + monomer + '_' + ligand + '.pdb'), 'w') as file:
-		for l in subpart1:
-			file.write(l)
-		for line in subpart2:
-			if line[0:4] == 'ATOM' or line[:6] == 'HETATM' or line[:3] == 'TER':
-				newline = line[:21] + 'Z' + line[22:]
-				file.write(newline)
-			else:
-				file.write(line)
-	print('best docking result file is generated for ' + file_name[:-24])
-
-
-
-
-def separate_results(monomer, file_dir, first_file_name, dir_final, monomers_list):
-
-	# Function to separate the multimer file into its monomers for every result file created by hex
-
-	ends = [] #this list will be modified with the indices of every monomer's terminal line + the first coordinate's line index
-	# Open the .pdb file to separate
-	with open (file_dir + first_file_name, 'r+') as r:
-		lines = r.readlines()
-		for l in lines:
-			if l.startswith('ATOM      1  '):
-				ends.append(lines.index(l)) #and save the index of the first coordinate's line in the list ends
-
-		# Searches the .pdb files for the lines that indicate the end of a chain
-		for l in lines:
-			if l[0:3] == 'TER':
-				ends.append(lines.index(l)) #and add their indexes in the ends list
-
-		if os.path.isdir(dir_final) == False: #create folder to dump the new monomer file or files
-			os.makedirs(dir_final)
-
-		# LOGIC:The end of the previous chain is the start of the current one,
-		start_pos = ends[monomers_list.index(monomer)]
-		end_pos = ends[monomers_list.index(monomer)+1]
-
-	# It copies every line that is not referencing an atom coordinates
-	# or that it is in the range of the monomer we want to isolate
-	file_list = os.listdir(file_dir)
-	for r in file_list: #for every result file:
-		file_path = str(file_dir + '/' + r)
-		new_file_path = str(dir_final + r[:-4] + '_' + monomer + '.pdb') #create a new result file which will include only one protein chain, not all
-		with open(file_path, 'r') as file:
-			lines = [line for line in file.readlines()]
-			# Dump in the new file everything before the first coordinate line + between the lines that contain
-			# the monomer coordinates + after the last receptor's coordinates
-			lines = lines[:ends[1]] + lines[start_pos:end_pos] + lines[ends[-1]:]
-		with open(new_file_path, 'w') as file:
-			file.writelines(lines)
-
-
-
-
-def separate_monomers(monomer, file_dir, file_name, dir_final, monomers_list):
-
-	# Function to separate the original protein pdb file in its monomers
-
-	# Open the .pdb file to separate
-	with open (file_dir + '/' + file_name + '.pdb', 'r+') as r:
-		lines = r.readlines()
-		ends = [0] # ends contains all line numbers of "TER"
-
-		# Searches the .pdb files for the lines that indicate the end of a chain
-		for l in lines:
-			if l[0:3] == 'TER':
-				ends.append(lines.index(l))
-		if os.path.isdir(dir_final) == False:
-			os.makedirs(dir_final)
-		monomer_pdb = open(dir_final + '/' + file_name + '_' + monomer + '.pdb', 'a+')
-
-
-		# The end of the previous chain is the start of the current one,
-		# 0 was previously included in the list ends to be the start of the first chain
-		start_pos = ends[monomers_list.index(monomer)]
-		end_pos = ends[monomers_list.index(monomer)+1]
-
-		# It copies every line that is not referencing an atom coordinates
-		# or that it is in the range of the monomer we want to isolate
-		for l in lines:
-			if l[0:4] != 'ATOM' or lines.index(l) in range(start_pos, end_pos):
-				monomer_pdb.write(l)
-			# It needs to copy also the ligand data (if there is any) which is labeled with SDF
-			elif l[17:20] == 'SDF':
-				monomer_pdb.write(l)
-
-
-
-
-
-def ligand_reserved(monomer, rec_lig, receptor, ligand,docking_pdb_path):
-
-	# Function to separate the ligand coordinates of every solution, it's useful to simply the calculation of the contact frequencies
-
-	dir_path = str(docking_pdb_path + '/results/'+ rec_lig + '/' + receptor + '_' + monomer + '_' + ligand + '/result') #results directory
-	print('Isolating ' + rec_lig + '_' + monomer)
-
-	os.makedirs(docking_pdb_path + '/results/'+ rec_lig + '/' + receptor + '_' + monomer + '_' + ligand + '/ligand_reserved_pdb') #ligand_reserved directory
-	file_list = os.listdir(dir_path)
-	result_list = []
-
-	# Some operative system will create hidden files, the script consider .pdb files only
-	for i in file_list:
-		if i[0] != '.' and len(i.split('.')) == 2 and i.split('.')[1] == 'pdb':
-			result_list.append(i)
-	for r in result_list:
-		file_path = str(dir_path + '/' + r)
-		ligand_reserved_file_path = str(docking_pdb_path + '/results/'+ rec_lig + '/' + receptor + '_' + monomer + '_' + ligand + '/ligand_reserved_pdb/' + r[:-4] + '_ligand_reserved.pdb')
-		with open(file_path, 'r') as file:
-			lines = [line for line in file.readlines()]
-			# Everything below the line 'REMARK    Docked ligand coordinates...' is data of the ligand
-			lines = lines[lines.index('REMARK    Docked ligand coordinates...\n'):]
-		with open(ligand_reserved_file_path, 'w') as file:
-			file.writelines(lines)
-
-
-
-
-
-def result_dict_generator(threshold, monomer, rec_lig, receptor, ligand, docking_pdb_path):
-
-	# Function to calculate the contact frequencies of every amino acid
-
-	result_dir_path = str(docking_pdb_path + '/results/'+ rec_lig + '/' + receptor + '_' + monomer + '_' + ligand + '/ligand_reserved_pdb/') #directory for the results files, the ligand only ones we created with the ligand_reserved function!
-	receptor_file_path = str(docking_pdb_path + '/results/receptor_to_dock/monomers/'+ receptor + '_' + monomer + '.pdb') #directory for the receptor protein pdb file
-
-	# Store every receptor's atom coordinates information as a nested dictionary called 'reference'
-	with open(receptor_file_path, 'r') as file:
-		reference = {}
-		for line in file.readlines():
-			if line[0:4] == 'ATOM':
-				if int(line[22:27]) in reference:
-					reference[int(line[22:27])][int(line[6:11])] = tuple(map(float, filter(None, line[31:54].split(' '))))
-				else:
-					reference[int(line[22:27])] = {int(line[6:11]) : tuple(map(float, filter(None, line[31:54].split(' '))))}
-
-	#so the reference is {residue: {atom :(x, y, z)}}
-
-	# The energy for each reference element will be stored in dictionary 'ac'
-	ac = {}
-	file_list = os.listdir(result_dir_path)
-	result_list = []
-
-	# Generate the list for all .pdb names in the directory
-	for i in file_list:
-		if i[0] != '.' and len(i.split('.')) == 2 and i.split('.')[1] == 'pdb':
-			result_list.append(i)
-
-	en_list = [] #future list of energies
-	file_names = [] #future list of file names
-	resi_list = [] #future list of aa
-
-	#reading the first file and saving its lines will make things much quicker for the rest of them
-	first_file_path = str(result_dir_path + receptor + '_' + ligand + '0001_' + monomer + '_ligand_reserved.pdb')
-	z=open(first_file_path)
-	lines_first=z.readlines()
-	x=lines_first[2]
-	print (x)
-
-
-	# Store energy values for each ligand_reserved file
-	for r in result_list:
-		print('current file:' + r)
-		energy = ''
-		file_path = str(result_dir_path + r)
-
-		with open(file_path) as file:
-			lines = file.readlines()
-			for l in lines:
-				if 'REMARK' in l.split(' ') and 'Energy' in l.split(' '):
-					# The energy is divided by the number of results to
-					# later obtain an average energy when we will sum the
-					energy = (float(l.split(' ')[6][:-1]))/(len(result_list))
-					# Generate file and energy list by order
-					file_names.append(str(r))
-					en_list.append(energy)
-
-			# Go over every coordinate of atoms in the ligand_reserved file and store into coor
-			coor = [tuple(map(float, filter(None, line[31:54].split(' '))))
-					for line in lines if line[0:4] == 'ATOM']
-			lst = []
-
-			for res in reference.keys(): # for each amino acid in the receptor file:
-				distances = []
-
-				for atom in coor: # for each atom of the ligand
-
-					for aa in reference[res].keys(): # for each atom of that amino acid
-						# check if the distance between atoms of the ligands
-						# and of the amino acid are lower than chosen threshold (5)
-						distances.append(math.sqrt((reference[res][aa][0] - atom[0]) ** 2 + (reference[res][aa][1] - atom[1])** 2
-									 + (reference[res][aa][2] - atom[2]) ** 2))
-
-				if all(d >= threshold for d in distances): #if none of the distances is lower than the threshold, skip
-					continue
-
-				else: # if at least one distance is lower then add this aminoacid to the ac dict
-					if res in ac.keys():
-						ac[res] += energy	# adding energy (previosly divided by the number of results) more times if
-					else:				 	# found multiple times, that way you would have an average
-						ac[res] = energy
-
-					# Store the resi number into lst
-				if res not in lst:
-						lst.append(res)
-			# Store rei_num for one file into resi_list as a list
-			resi_list.append(lst)
-
-
-
-	best_result_name = ''
-	# Find the resi number with the lowest energy
-	red_resi = ''
-	for k, v in ac.items():
-		if v == min(ac.values()):
-			red_resi = k
-	print('best_residue: ' + str(red_resi))
-
-	# Find the file that both satisfies the lowest energy and containing the lowest energy resi
-	max_en = 0
-	for f in file_names:
-		if en_list[file_names.index(f)] <= max_en:
-			temp = resi_list[file_names.index(f)]
-			for i in temp:
-				if i == red_resi:
-					best_result_name = f
-
-
-	res_dict_path = result_dir_path + 'res_dict.json'
-
-	# Use the result file from /result/, change the name to best docking result, and convert it into chain Z
-	try:
-		best_result(best_result_name, monomer, rec_lig, receptor, ligand, docking_pdb_path)
-	# sometimes the simulations results are not good enough to satisfy both requirements,
-	# it's common especially when one monomer is never close to the ligand.
-	# Not including this line would stop an otherwise useful simulation
-	except FileNotFoundError:
-		f_file = receptor + '_' + ligand + '0001_' + monomer + '_ligand_reserved.pdb'
-		best_result(f_file, monomer, rec_lig, receptor, ligand, docking_pdb_path)
-
-	print(ac)
-
-	with open(res_dict_path, 'w') as file:
-		file.write(json.dumps(ac))
-	print('res_dict.json is generated')
-	return ac
-
-
-def parse_hex_output(rec_lig, docking_pdb_path):
-	hex_output = open(docking_pdb_path + "/results/" + rec_lig + 
-				   "/{}_hex_output.txt".format(rec_lig), "r")
-	lines = hex_output.readlines()
-	result_start = 0
-	result_end = 0
-	for i in range(len(lines)):
-		splitted_line = lines[i].split(" ")
-		if len(splitted_line) > 8 and splitted_line[0] == "Clst":
-			result_start = i + 2
-		if len(splitted_line) > 2 and splitted_line[1] == "save_range":
-			result_end = i - 2
-	clustering_lines = lines[result_start:result_end]
-	clusters = {}
-	for line in clustering_lines:
-		cleaned_line = line.strip().split(" ")
-		res = []
-		for ch in cleaned_line:
-			if ch != "":
-				res.append(ch)
-		clst = int(res[0])
-		sln = int(res[1])
-		if clst not in clusters:
-			clusters[clst] = [sln]
-		else:
-			clusters[clst].append(sln)
-	return(clusters)
-		
-
-def color_surfaces(monomer, receptor, ligand, rec_lig, docking_pdb_path):
-
-	# Function to create the nested dictionary with every monomer as key with value a dictionary with its amino acids as keys and contact frequencies as values
-
-	result_dict = {} #this will be the dictionary
-
-	folder_name = str(receptor + '_' + monomer + '_' + ligand)
-
-	if receptor + '_' + monomer not in result_dict.keys():
-		result_dict[receptor + '_' + monomer] = {}
-	if os.path.isfile(docking_pdb_path + '/results/' + rec_lig + '/' + folder_name + '/ligand_reserved_pdb/res_dict.json') == False:
-		result_dict[receptor+ '_' + monomer][ligand] = result_dict_generator(5, monomer, rec_lig, receptor, ligand, docking_pdb_path)
-	else:
-		result_dict[receptor+ '_' + monomer][ligand] = eval(
-			open(docking_pdb_path + '/results/' + rec_lig + '/' + folder_name + '/ligand_reserved_pdb/res_dict.json', 'r').read())
-		print('res_dict.json previously exists and has read')
-
-	resultjson_path = docking_pdb_path + '/results/' + rec_lig + '/' + folder_name + '/results.json'
-
-	# Initialize results.json
-	ini = {}
-	with open(resultjson_path, 'w') as file:
-		file.write(json.dumps(ini))
-	results = {}
-	for r in result_dict: #result_dict is where we have our contact freuquencies
-		if r in results.keys():
-			for v in result_dict[r]:
-				results[r][v] == result_dict[r][v]
-		else:
-			results[r] = result_dict[r]
-	with open(resultjson_path, 'w') as file:
-		file.write(json.dumps(results))
-	print('result.json is finished')
-
-
-
-
-
-def pipeline(rec_lig, is_monomer, receptor, ligand, monomers_list, docking_pdb_path):
-
-	print('Current pair:' + rec_lig)
-
-	today_dir = docking_pdb_path + '/results/' + rec_lig + '/'
-
-	results_dir = today_dir + rec_lig + '/result/'
-	os.makedirs(results_dir)
-
-	hex_docking(rec_lig, rec_lig, receptor, ligand,docking_pdb_path) # CALL HEX
-
-	results_list = os.listdir(results_dir)
-	first_file_name = str(receptor + '_' + ligand + '0001.pdb')
-
-
-	# Repeats the analysis for every monomer in the receptor file
-	for monomer in monomers_list:
-		dir_final = today_dir + receptor + '_' + monomer + '_' + ligand + '/result/'
-		print('plotting monomer: ' + monomer + ' with the ligand: ' + ligand)
-		separate_results(monomer, results_dir, first_file_name, dir_final, monomers_list)
-		ligand_reserved(monomer, rec_lig, receptor, ligand,docking_pdb_path)
-		print('Ligands are now reserved in docking results.')
-		color_surfaces(monomer, receptor, ligand, rec_lig, docking_pdb_path)
-		#plot_frequencies(monomer)
-
-
-
-class Protein_Docking:
-	@staticmethod
-	def start(receptor,ligand,docking_pdb_path):
-
-		# Check if the receptor is a monomer or a complex and save the receptor and ligand names as variables
-
-		receptor_folder =  docking_pdb_path + '/results/receptor_to_dock'
-		receptor_folder_list = os.listdir(receptor_folder)
-		ligand_folder = os.listdir(docking_pdb_path + '/results/ligand_to_dock')
-
-		receptor_file_found = False
-		for rec in receptor_folder_list:
-			# There could be hidden files in the receptor or ligand directory so only consider pdb files
-			if rec[0] != '.' and len(rec.split('.')) == 2 and rec.split('.')[1] == 'pdb'\
-				and rec[:-4].lower() == receptor.lower():
-				receptor_file_found = True
-				receptor = rec[:-4]
-				# To check if the receptor is a monomer or not, the script will search the .pdb file
-				# for the line that indicated the presence of multiple chains,
-				with open(receptor_folder + '/' + rec, 'r+') as f:
-					is_monomer = True
-					for x in f.readlines():
-						if re.match(r'COMPND   \d CHAIN: \w, \w*', x) != None:
-							is_monomer = False
-							#if the receptor would be a monomer the regex would be r'COMPND   \d CHAIN: \w;'
-
-							# To make a list of the monomers' labels
-							print(receptor + ' identified as a protein complex')
-							if x[11:16] == 'CHAIN':
-								monomers_list = x.split(': ')[-1].split(', ')
-								# The COMPND line ends with ';' therefore it needs to be removed from the last label
-								monomers_list[-1] = monomers_list[-1][0]
-				break
-
-		ligand_file_found = False
-		for lig in ligand_folder:
-			sys.stdout.write(lig)
-			if lig[0] != '.' and len(lig.split('.')) == 2 and lig.split('.')[1] == 'pdb'\
-				and lig[:4].lower() == ligand.lower():
-				ligand_file_found = True
-			#DO NOT USE PDB FOR LIGAND FILES, it is possible but it can lead to errors due to the missing hydrogens
-				ligand = lig[:-4]
-				break
-
-		
-		##TODO: Add block to raise error if receptor or ligand files are not found
-
-		rec_lig = receptor + '_' + ligand
-
-		#check if results folder already exists
-		results_path = docking_pdb_path + '/results/' + rec_lig
-		if not os.path.exists(results_path):
-		# To save the terminal output later (very important)
-			stdoutOrigin=sys.stdout
-			sys.stdout = open(docking_pdb_path + '/results/Terminal_recordings/' + rec_lig + '_' + str(date.today()) + '.txt' , "w")
-
-			# Call to the pipeline with different parameters whether the receptor is a monomer or a complex
-			if is_monomer == False:
-				dir_final = docking_pdb_path + '/results/receptor_to_dock/monomers'
-				for monomer in monomers_list:
-					print('separating monomer: ' + monomer)
-					separate_monomers(monomer, receptor_folder, receptor, dir_final, monomers_list) # To separate the monomers in the multimer file
-
-				pipeline(rec_lig, is_monomer, receptor, ligand, monomers_list,docking_pdb_path)
-			else:
-				dir_final = docking_pdb_path + '/results/receptor_to_dock/monomers'
-				monomers_list = ['monomer']
-				separate_monomers('monomer', receptor_folder, receptor, dir_final, monomers_list) # To analyze the data from hex you still need to separate it.
-																								# It allows to use the same functions in both cases
-				pipeline(rec_lig, is_monomer, receptor, ligand, monomers_list,docking_pdb_path)
-
-			#To put together the json files with all the data from all monomers
-			new_json = docking_pdb_path + '/results/'+ rec_lig + '/' + '/final.json'
-			final_json = {}
-			min_values = []
-			max_values = []
-			abs_max = None
-			abs_min = None
-
-			for monomer in monomers_list:
-				monomer_json = docking_pdb_path + '/results/' + rec_lig + '/' + str(receptor + '_' + monomer + '_' + ligand) +'/results.json'
-				with open(monomer_json, 'r') as file:
-					monomer_dict = json.load(file)
-
-					monomer_key = list(monomer_dict.keys())[0]
-					ligand_key = list(monomer_dict[monomer_key].keys())[0]
-
-					inside_dict = monomer_dict[monomer_key][ligand_key]
-
-				# To eliminate empty dictionaries that might cause division errors below  normalized_mon_dicitonary calculations
-					if  inside_dict == {}:
-						continue
-					else:
-						mini = min(inside_dict.values())
-						maxi = max(inside_dict.values())
-
-					min_values.append(mini)
-					max_values.append(maxi)
-
-					abs_max = max(max_values)
-					abs_min = min(min_values)
-
-					print("This is the maximum value: ",abs_max, file=sys.stderr)
-					print("This is the minimum value: ",abs_min, file=sys.stderr)
-
-			#Now looping through every monomer, and calculating every residue energy to be normalized by using absolute minimum and maximum.
-			for monomer in monomers_list:
-				monomer_json = docking_pdb_path + '/results/' +rec_lig + '/' + str(receptor + '_' + monomer + '_' + ligand) +'/results.json'
-				with open(monomer_json, 'r') as file:
-					monomer_dict = json.load(file)
-
-					monomer_key = list(monomer_dict.keys())[0]
-					ligand_key = list(monomer_dict[monomer_key].keys())[0]
-
-					inside_dict = monomer_dict[monomer_key][ligand_key]
-
-					# It is here to prevent substraction of equal values or values that doesn't make any sense in terms of accuracy
-
-					if abs_min == abs_max :
-						normalized_mon_dict = {monomer_key:{ligand_key:{k:1 for k,v in inside_dict.items()}}}
-						final_json.update(normalized_mon_dict)
-					else:
-						normalized_mon_dict = {monomer_key:{ligand_key:{k:(v-abs_min)/(abs_max - abs_min) for k,v in inside_dict.items()}}}
-						final_json.update(normalized_mon_dict)
-			#Opening and writing new_json file that was directed to be final.json and was updated with normalization dictionary values
-
-			with open(new_json,'w') as file:
-				file.write(json.dumps(final_json))
-			print('Final json is finished')
-			print(new_json, file=sys.stderr)
-			sys.stdout.close()
-		else:
-			print("Docking has already been done on this protein-ligand.")
+import json
+import datetime
+
+HEX_BIN_PATH = '/usr/local/bin/hex/bin/hex'
+
+
+class Receptor(ABC):
+    """An abstract class that represents a receptor
+
+    --- Attributes ---
+    name (str): the name of the receptor
+    file_path (str): the relative path to the receptors pdb file
+    """
+    @abstractmethod
+    def __init__(self, name: str, file_path: str):
+        self.name = name
+        self.file_path = file_path
+
+
+class MonomerReceptor(Receptor):
+    """ A class that represents a receptor that is a monomer, meaning it consists
+    of only one chain.
+
+    --- Attributes ---
+    name (str): the name of the receptor
+    file_path (str): the relative path to the receptors pdb file
+    """
+    name: str
+    file_path: str
+
+    def __init__(self, name, file_path):
+        super().__init__(name, file_path)
+
+
+class ComplexReceptor(Receptor):
+    """ A class that represents a receptor that is a complex, meaning it consists
+    of more than one chain.
+
+    --- Attributes ---
+    name (str): the name of the receptor
+    file_path (str): the relative path to the receptors pdb file
+    monomer_list (List[str]): the list of monomers that make up the complex
+    line_numbers (List[List[int]]): the list of line numbers that separate the monomers, e.g. [[100,200],[300,500]]
+    """
+    def __init__(self, name: str, file_path: str, monomers_list: List[str]):
+        super().__init__(name, file_path)
+        self.monomers_list = monomers_list
+        self.line_numbers = self.separate_monomers()
+
+    def separate_monomers(self):
+        """Returns a list of lists, where each sublist contains the line
+        numbers of the start and end of a monomer.
+        For example, receptor X has 3 chains in this order: A, B, C.
+        The method will return [[1, 6], [7, 9], [10, 15]].
+        """
+        line_numbers = []
+        file = open(self.file_path, "r")
+        line = file.readline()
+        prev = None
+        curr_line = 0
+        while line != '':
+            # the first line of the first monomer
+            if line[:12] == "ATOM      1 ":
+                prev = curr_line - 1
+            # the last line of a monomer
+            elif line[:3] == 'TER':
+                # line_numbers.append(curr_line)
+                line_numbers.append([prev + 1, curr_line])
+                prev = curr_line
+            curr_line += 1
+            line = file.readline()
+
+        return line_numbers
+
+
+class Ligand:
+    """A class that represents a ligand.
+
+    --- Attributes ---
+    name (str): the name of the receptor
+    file_path (str): the relative path to the receptors pdb file
+    """
+    def __init__(self, name: str, file_path: str):
+        self.name = name
+        self.file_path = file_path
+
+
+class Docking(ABC):
+    """An abstract class that represents the docking between a receptor and a
+    ligand.
+
+    --- Attributes ---
+    receptor (Receptor): a Receptor object that represents a receptor
+    ligand (Ligand): a Ligand object that represents a ligand
+    results_path (str): the file path to where the results are stored
+    ligand_reserved_list (List[int]): a list of line numbers, one for each solution,
+    the indicates where the "Docked ligand" section begins
+    """
+
+    @abstractmethod
+    def __init__(self, receptor: Receptor, ligand: Ligand, results_path: str):
+        self.receptor = receptor
+        self.ligand = ligand
+        self.results_path = results_path
+        self.ligand_reserved_list = []
+
+    def hex_docking(self):
+        """Run hex docking using the command line.
+        """
+        hex_output_file = open(self.results_path + 'hex_output.txt', "w")
+
+    # Function to call Hex, including hard coded settings
+
+    # max_docking_solutions set at 5 for testing
+        hex_command = """ open_receptor  """ + self.receptor.file_path + """
+                open_ligand  """ + self.ligand.file_path + """
+                docking_correlation 1
+                docking_score_threshold 0
+                max_docking_solutions 25
+                docking_receptor_stepsize 5.50
+                docking_ligand_stepsize 5.50
+                docking_alpha_stepsize 2.80
+                docking_main_scan 16
+                receptor_origin C-825:VAL-O
+                commit_edits
+                activate_docking
+                save_range 1 100 """ \
+        + self.results_path + """ %s pdb""" % (self.receptor.name + '_' + self.ligand.name)
+        subprocess.Popen(HEX_BIN_PATH,
+                         stdin=subprocess.PIPE,
+                         stderr=subprocess.STDOUT,
+                         stdout=hex_output_file).communicate(bytes(hex_command.encode('utf-8')))
+        hex_output_file.close()
+        ct = datetime.datetime.now()
+        print("current time:-", ct)
+        print("Hex docking completed")
+
+    def crte_ligand_reserved_attr(self):
+        """This function populates the Docking instance's ligand_reserved_list attribute
+        with a list of line numbers. Each line number is where the Docked Ligand section
+        begins for each result.
+        For example, [1500, 1499, 1500] means that there are three solutions. In the first
+        solution, the "Docked Ligand" section begins at line 1500. In the second solution,
+        it begins at line 1499, and so on ...
+        """
+        line_numbers = []
+        for filename in os.listdir(self.results_path):
+            if filename[-3:] == 'pdb':
+                file = open(self.results_path + filename, "r")
+                lines = file.readlines()
+                for i in range(len(lines)):
+                    if "Docked ligand coordinates..." in lines[i]:
+                        line_numbers.append(i)
+                        break
+        self.ligand_reserved_list = line_numbers
+
+    def parse_hex_output(self):
+        """Returns a dictionary where the key is the cluster number and the
+        value is a list of solution numbers. One of the keys is "num_soln",
+        where its value is the total number of solutions.
+        For example: {num_soln : 5, 1 : [2, 4], 2 : [1, 3, 5]}
+        """
+        hex_output = open(self.results_path + 'hex_output.txt', "r")
+        lines = hex_output.readlines()
+        # line number where the clustering starts and ends
+        result_start = 0
+        result_end = 0
+        for i in range(len(lines)):
+            splitted_line = lines[i].split(" ")
+            if len(splitted_line) > 8 and splitted_line[0] == "Clst":
+                result_start = i + 2
+            if len(splitted_line) > 2 and "save_range" in splitted_line:
+                result_end = i - 2
+        clustering_lines = lines[result_start:result_end]
+        clusters = {}
+        clusters["num_soln"] = len(clustering_lines)
+        for line in clustering_lines:
+            cleaned_line = line.strip().split(" ")
+            res = []
+            # only keep non-blank items in line
+            for ch in cleaned_line:
+                if ch != "":
+                    res.append(ch)
+            clst = int(res[0])
+            sln = int(res[1])
+            if clst not in clusters:
+                clusters[clst] = [sln]
+            else:
+                clusters[clst].append(sln)
+        return clusters
+
+    def result_dict_generator(self, monomer_number, threshold):
+        """Return a dictionary where each key is a residue and each value is
+        the energy. The distance between each residue in the monomer and each
+        atom in the ligand is calculated, and only residues with distances
+        below the threshold are included.
+        """
+        receptor_file = open(self.receptor.file_path, "r")
+
+        if monomer_number != -1:  # if -1, go to monomer logic
+            # get the start and end line numbers of the monomer in the receptor pdb
+            monomer_start = self.receptor.line_numbers[monomer_number][0]
+            monomer_end = self.receptor.line_numbers[monomer_number][1]
+
+            # get the lines for that receptor only
+            receptor_file_lines = receptor_file.readlines()[monomer_start:monomer_end]
+        else:  # Monomer logic
+            receptor_file_lines = receptor_file.readlines()
+
+        # Store every receptor's atom coordinates information as a nested
+        # dictionary called 'reference'
+        reference = {}
+        for line in receptor_file_lines:
+            splitted_line = line.split()
+            if line[0:4] == 'ATOM':
+                coord = map(float, filter(None, splitted_line[6:9]))
+                if int(splitted_line[5]) in reference:
+                    reference[int(splitted_line[5])][int(splitted_line[1])] = tuple(coord)
+                else:
+                    reference[int(splitted_line[5])] = {int(splitted_line[1]) : tuple(coord)}
+
+        # here, the structure of the reference dict is is {residue: {atom_num :(x, y, z)}},
+
+        # The energy for each reference element will be stored in dictionary 'ac'
+        ac = {}
+        result_list = []
+        for filename in os.listdir(self.results_path):
+            if filename[-3:] == 'pdb':
+                result_list.append(filename)
+
+        lowest_en = None  # to keep track of lowest energy
+        all_residue_list = []
+
+        cluster_dict = self.parse_hex_output()
+
+        for i in range(len(result_list)):
+            energy = ''
+
+            # get the ligand_reserved section of the result file
+            file = open(self.results_path + result_list[i], 'r')
+            ligand_reserved_start = self.ligand_reserved_list[i]
+            ligand_reserved_section = file.readlines()[ligand_reserved_start:]
+
+            # go through ligand reserved section to calculate energy
+            residue_set = set()
+            coor = []
+            for line in ligand_reserved_section:
+                if 'REMARK' in line.split(' ') and 'Energy' in line.split(' '):
+                    cluster_size = len(cluster_dict[i + 1])
+                    total_solutions = cluster_dict['num_soln']
+
+                    # energy is weighed according to the number of solutions
+                    # in that cluster
+                    energy = ((float(line.split(' ')[6][:-1]))/total_solutions) * cluster_size
+
+                    # record values if lowest energy
+                    if lowest_en is None or energy < lowest_en:
+                        lowest_en = energy
+                elif line[:4] == 'ATOM':
+                    # coordinates of one atom
+                    coordinates = tuple(map(float, filter(None, line.split()[6:9])))
+                    coor.append(coordinates)
+            # each atom's coordinates is now stored in the list coordinates
+
+            residue_set = set()
+            for res in reference.keys():  # for each amino acid in the receptor file:
+                distances = []
+
+                for atom in coor:  # for each atom of the ligand
+                    for aa in reference[res].keys():  # for each atom of that amino acid
+                        # check if the distance between atoms of the ligands
+                        # and of the amino acid are lower than chosen threshold (5)
+                        distance = math.sqrt(sum([(reference[res][aa][0] - atom[0]) ** 2,
+                                                  (reference[res][aa][1] - atom[1]) ** 2,
+                                                  (reference[res][aa][2] - atom[2]) ** 2]))
+
+                        distances.append(distance)
+
+                # if at least one of the distances is lower than the threshold, otherwise skip
+                if all(d >= threshold for d in distances):
+                    continue
+                else:
+                    # adding energy (previosly divided by the number of results)
+                    # if found multiple times, we would get an average
+                    if res in ac.keys():
+                        ac[res] += energy
+                    else:
+                        ac[res] = energy
+
+                # Store the resi number into set
+                residue_set.add(res)
+
+            all_residue_list.append(residue_set)
+
+        return ac
+
+    @abstractmethod
+    def best_result(self):
+        pass
+
+    @abstractmethod
+    def crte_receptor_dict(self):
+        pass
+
+    @abstractmethod
+    def normalize_results(self, threshold):
+        pass
+
+
+class MonomerDocking(Docking):
+    """A class the represents a docking between a monomer receptor and a monomer.
+
+    --- Attributes ---
+    receptor (MonomerReceptor): a Receptor object that represents a monomer receptor
+    ligand (Ligand): a Ligand object that represents a ligand
+    results_path (str): the file path to where the results are stored
+    ligand_reserved_list (List[int]): a list of line numbers, one for each solution,
+        the indicates where the "Docked ligand" section begins
+    """
+
+    def __init__(self, receptor: MonomerReceptor, ligand: Ligand, results_path: str):
+        super().__init__(receptor, ligand, results_path)
+
+    def best_result(self):
+        pass
+
+    def crte_receptor_dict(self, threshold):
+        """"Return a dictionary that contains the residue-energy
+        dictionary of the monomer. This is not necessary, but maintains
+        consistency between monomer and complex receptor dictionaries.
+        """
+        receptor_res = {}
+        res_dict = self.result_dict_generator(-1, threshold)
+        ligand_res = {}
+        ligand_res[self.ligand.name] = res_dict
+        receptor_res[self.receptor.name] = ligand_res
+        return receptor_res
+
+    def normalize_results(self, threshold):
+        """Return normalized residue-energy dictionaries for the
+        receptor.
+        """
+        results_dict = self.crte_receptor_dict(threshold)
+        receptor_key = list(results_dict.keys())[0]
+        ligand_key = list(results_dict[receptor_key].keys())[0]
+
+        inside_dict = results_dict[receptor_key][ligand_key]
+        abs_max = None
+        abs_min = None
+
+        # To eliminate empty dictionaries that might cause division errors below
+        # normalized_mon_dicitonary calculations
+        if inside_dict != {}:
+            abs_min = min(inside_dict.values())
+            abs_max = max(inside_dict.values())
+
+        all_normalized_results = {}
+
+        normalized_mon_dict = {}
+        normalized_mon_dict[receptor_key] = {}
+        normalized_mon_dict[receptor_key][ligand_key] = {}
+
+        # prevent substraction of equal values or values that doesn't make any sense in terms of accuracy
+        if abs_min == abs_max:
+            for k, v in inside_dict.items():
+                normalized_mon_dict[receptor_key][ligand_key][k] = 1
+        else:
+            for k, v in inside_dict.items():
+                normalized_value = (v - abs_min) / (abs_max - abs_min)
+                normalized_mon_dict[receptor_key][ligand_key][k] = normalized_value
+        all_normalized_results.update(normalized_mon_dict)
+        return all_normalized_results
+
+
+class ComplexDocking(Docking):
+    """A class that represents a docking between a complex receptor and a ligand.
+
+        --- Attributes ---
+    receptor (ComplexReceptor): a Receptor object that represents a monomer receptor
+    ligand (Ligand): a Ligand object that represents a ligand
+    results_path (str): the file path to where the results are stored
+    ligand_reserved (List[int]): a list of line numbers, one for each solution,
+        which indicates where the "Docked ligand" section begins
+    split_results (List[List[Tuple[int]]]): a list where each sublist is a chain,
+        which contains a list of tuples. Each tuple indicates the line numbers
+        of the start and end of that chain in a results file.
+    """
+
+    def __init__(self, receptor: ComplexReceptor, ligand: Ligand, results_path: str):
+        super().__init__(receptor, ligand, results_path)
+        self.split_results = []
+
+    def separate_results(self):
+        """For each solution, record the start and end line number (0-based) of
+        each chain. Then, populate self.split_results with the final list.
+
+        Each sublist represents one solution file. Each tuple in the sublist
+        contains the start and end of one chain. The order of the tuples in
+        the sublist is the same as the order of the monomers in the receptor's
+        monomers_list.
+        """
+        results_files = os.listdir(self.results_path)
+
+        # for each solution
+        for file in results_files:
+            if file[-3:] != "pdb":
+                break
+            result_file = open(self.results_path + file)
+
+            # this list contains indices of the start and end of each chain
+            line_numbers = []
+            line = result_file.readline()
+            curr_line = 0
+            prev = None
+            while line != '':
+                # the start of the first chain
+                if line.split()[0] == "ATOM" and line.split()[1] == "1":
+                    # if line.startswith('ATOM      1  '):
+                    prev = curr_line - 1
+
+                # the end of a chain
+                elif line[0:3] == 'TER':
+                    line_numbers.append([prev + 1, curr_line])
+                    prev = curr_line
+
+                # read next line
+                line = result_file.readline()
+                curr_line += 1
+
+        # populate split_results attribute
+        self.split_results = line_numbers
+
+    def best_result(self):
+        pass
+
+    def crte_receptor_dict(self, threshold):
+        all_monomers = []
+        for i in range(len(self.receptor.monomers_list)):
+            ligand_res = {}
+            res_dict = self.result_dict_generator(i, threshold)
+            ligand_res[self.ligand.name] = res_dict
+            all_monomers.append({self.receptor.name + '_' + self.receptor.monomers_list[i] : ligand_res})
+        return all_monomers
+
+    def normalize_results(self, threshold):
+        min_values = []
+        max_values = []
+        abs_max = None
+        abs_min = None
+        all_monomers_dict = self.crte_receptor_dict(threshold)
+        for i in range(len(all_monomers_dict)):
+            monomer_dict = all_monomers_dict[i]
+            monomer_key = list(monomer_dict.keys())[0]
+            ligand_key = list(monomer_dict[monomer_key].keys())[0]
+
+            inside_dict = monomer_dict[monomer_key][ligand_key]
+
+            # To eliminate empty dictionaries that might cause division errors below
+            # normalized_mon_dicitonary calculations
+            if inside_dict == {}:
+                continue
+            else:
+                mini = min(inside_dict.values())
+                maxi = max(inside_dict.values())
+
+                min_values.append(mini)
+                max_values.append(maxi)
+
+                abs_max = max(max_values)
+                abs_min = min(min_values)
+
+                print("This is the maximum value: ", abs_max, file=sys.stderr)
+                print("This is the minimum value: ", abs_min, file=sys.stderr)
+
+        # Now looping through every monomer, and calculating every residue energy to be
+        # normalized by using absolute minimum and maximum.
+        all_normalized_results = {}
+        for i in range(len(all_monomers_dict)):
+            monomer_dict = all_monomers_dict[i]
+            monomer_key = list(monomer_dict.keys())[0]
+            ligand_key = list(monomer_dict[monomer_key].keys())[0]
+
+            inside_dict = monomer_dict[monomer_key][ligand_key]
+
+            normalized_mon_dict = {}
+            normalized_mon_dict[monomer_key] = {}
+            normalized_mon_dict[monomer_key][ligand_key] = {}
+
+            # prevent substraction of equal values or values that doesn't make any sense in terms of accuracy
+            if abs_min == abs_max:
+                for k, v in inside_dict.items():
+                    normalized_mon_dict[monomer_key][ligand_key][k] = 1
+            else:
+                for k, v in inside_dict.items():
+                    normalized_value = (v - abs_min) / (abs_max - abs_min)
+                    normalized_mon_dict[monomer_key][ligand_key][k] = normalized_value
+            all_normalized_results.update(normalized_mon_dict)
+        return all_normalized_results
+
+
+class Docker:
+    """A class that represents the controller to create docking pairs and carry
+    out the docking.
+    """
+
+    @staticmethod
+    def start(receptor: str, ligand: str, docking_pdb_path: str):
+        """Start the docking process and analyze results. Return the
+        normalized residue-energyy dictionary.
+        """
+        # create docking object
+        ct = datetime.datetime.now()
+        print("Starting the docking process at {}".format(ct))
+        docking = Docker.create_docking(receptor, ligand, docking_pdb_path)
+        if docking is None:
+            receptor = receptor.split('.')[0]
+            results_path = docking_pdb_path + receptor + '_' + ligand + '/'
+            with open(results_path + "final.json") as json_file:
+                final_json = json.load(json_file)
+            return final_json
+        elif docking == "Receptor file not found":
+            return "Receptor file not found"
+        elif docking == "Ligand file not found":
+            return "Ligand file not found"
+
+        docking.hex_docking()
+        if isinstance(docking, ComplexDocking):
+            docking.separate_results()
+        docking.crte_ligand_reserved_attr()
+        normalized_results = docking.normalize_results(5)
+        new_json = docking.results_path + "final.json"
+        with open(new_json, 'w') as file:
+            file.write(json.dumps(normalized_results))
+        ct = datetime.datetime.now()
+        print("current time:-", ct)
+        return normalized_results
+
+    def create_receptor(receptor_name: str, receptor_file_path: str):
+        """Return a new receptor with the name receptor_name, by parsing
+        the file at recepter_file_path.
+        """
+        with open(receptor_file_path) as f:
+            is_monomer = True
+            for line in f.readlines():
+                if re.match(r'COMPND   \d CHAIN: \w, \w*', line) is not None:
+                    is_monomer = False
+                    # if the receptor would be a monomer the regex would be
+                    # r'COMPND   \d CHAIN: \w;'
+
+                    # To make a list of the monomers' labels
+                    print(receptor_name + ' identified as a protein complex')
+                    if line[11:16] == 'CHAIN':
+                        monomers_list = line.split(': ')[-1].split(', ')
+                        # The COMPND line ends with ';' therefore it needs to be
+                        # removed from the last label
+                        monomers_list[-1] = monomers_list[-1][0]
+                        new_receptor = ComplexReceptor(receptor_name,
+                                                       receptor_file_path,
+                                                       monomers_list)
+                        return new_receptor
+                    print("Unknown pdb structure, need further investigation")
+
+            if is_monomer:
+                new_receptor = MonomerReceptor(receptor_name,
+                                               receptor_file_path)
+                return new_receptor
+
+    def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str):
+        """Return a docking pair, which contains a Receptor and a Ligand, as
+        specified by receptor_name and ligand_name, respectively.
+        """
+        # check that the docking combination has not been run before
+        # results_path = docking_pdb_path + 'RESULTS/' + receptor_name + '_' + ligand_name + '/'
+        if '.' in receptor_name:
+            receptor_name = receptor_name[:receptor_name.index('.')]
+        results_path = docking_pdb_path + receptor_name + '_' + ligand_name + '/'
+        print(results_path)
+        if os.path.exists(results_path): #or \
+            #os.path.exists(docking_pdb_path + receptor_name + '.1_' + ligand_name + '/'):
+            print("The docking between {0} and {1} has already been done.".format(receptor_name, 
+                                                                                  ligand_name))
+            return None
+
+
+        os.makedirs(results_path)
+
+        # find receptor file and create receptor object
+        receptor_folder = '/DATA/AF2-pdbs/Arabidopsis/AF2_Ath_PDBs_FAs_renamed/'
+        # receptor_folder = '/var/www/html/eplant/AF2_Ath_PDBs'
+        receptor_file_found = False
+
+        for receptor_file in os.listdir(receptor_folder):
+            # if receptor_file[0] != '.' and len(receptor_file.split('.')) == 2 and \
+            #     receptor_file[-4:] == 'pdb' and \
+            #         receptor_file[:-4].lower() == receptor_name.lower():
+            if receptor_file[0] != '.' and receptor_file[-4:] == '.pdb' and \
+                    (receptor_name in receptor_file):
+                receptor_file_found = True
+                receptor_file_path = receptor_folder + receptor_file
+                receptor = Docker.create_receptor(receptor_name, receptor_file_path)
+
+        # find ligand file and create ligand object
+        # ligand_folder = docking_pdb_path + 'HEX_SELECTED_LIGANDS/'
+        ligand_folder = '/DATA/HEX_API/HEX_SELECTED_LIGANDS/'
+        ligand_file_found = False
+
+        for ligand_file in os.listdir(ligand_folder):
+            if ligand_file[0] != '.' and len(ligand_file.split('.')) == 2 and \
+                ligand_file.split('.')[1] == 'sdf' and \
+                    ligand_file[:-4].lower() == ligand_name.lower():
+                ligand_file_found = True
+                ligand_file_path = ligand_folder + '/' + ligand_file
+                ligand = Ligand(ligand_name, ligand_file_path)
+
+        if not receptor_file_found:
+            return "Receptor file not found"
+        elif not ligand_file_found:
+            return "Ligand file not found"
+
+        # receptor and ligand objects are created and ready for docking
+        if isinstance(receptor, MonomerReceptor):
+            docking = MonomerDocking(receptor, ligand, results_path)
+        else:
+            docking = ComplexDocking(receptor, ligand, results_path)
+        return docking
+
+
+if __name__ == "__main__":
+    # print(Docker.start("8g2j", "UPG", "/DATA/HEX_API/"))
+    print(Docker.start("AT1G66340", "6325_Ethylene", "/DATA/HEX_API/RESULTS/"))
+    

From d9bf0547ec3fe04fa3d36f0a09c9385d38b43854 Mon Sep 17 00:00:00 2001
From: Dien Nguyen <dien.nguyen@mail.utoronto.ca>
Date: Thu, 8 Feb 2024 16:37:24 -0500
Subject: [PATCH 10/35] convert sdf_mapping to OOP and move to docking_utils.py
 file

---
 api/utils/docking_utils.py            |  81 +++-
 api/utils/refactored_docking_utils.py | 638 --------------------------
 api/utils/sdf_mapping.py              |  49 --
 3 files changed, 73 insertions(+), 695 deletions(-)
 delete mode 100755 api/utils/refactored_docking_utils.py
 delete mode 100644 api/utils/sdf_mapping.py

diff --git a/api/utils/docking_utils.py b/api/utils/docking_utils.py
index ab7d15d..030a7ca 100755
--- a/api/utils/docking_utils.py
+++ b/api/utils/docking_utils.py
@@ -582,13 +582,11 @@ def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str):
             receptor_name = receptor_name[:receptor_name.index('.')]
         results_path = docking_pdb_path + receptor_name + '_' + ligand_name + '/'
         print(results_path)
-        if os.path.exists(results_path): #or \
-            #os.path.exists(docking_pdb_path + receptor_name + '.1_' + ligand_name + '/'):
-            print("The docking between {0} and {1} has already been done.".format(receptor_name, 
+        if os.path.exists(results_path):
+            print("The docking between {0} and {1} has already been done.".format(receptor_name,
                                                                                   ligand_name))
             return None
 
-
         os.makedirs(results_path)
 
         # find receptor file and create receptor object
@@ -632,7 +630,74 @@ def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str):
         return docking
 
 
-if __name__ == "__main__":
-    # print(Docker.start("8g2j", "UPG", "/DATA/HEX_API/"))
-    print(Docker.start("AT1G66340", "6325_Ethylene", "/DATA/HEX_API/RESULTS/"))
-    
+class SDFMapping:
+    """
+    A class for mapping SDF names to their file names in the BAR.
+    """
+
+    def get_substance_name(self, filename: str, folder_path: str):
+        """Parse and return the names of a substance from a .sdf file. It
+        requires the line "> <PUBCHEM_SUBSTANCE_SYNONYM>" to be present
+        in the file.
+        """
+        file = open(folder_path + filename, "r")
+        line = file.readline().strip()
+        if line == "":
+            return None
+        while line != "> <PUBCHEM_SUBSTANCE_SYNONYM>" and line != "$$$$":
+            line = file.readline().strip()
+        # right now, line == "> <PUBCHEM_SUBSTANCE_SYNONYM>" or line is empty
+        if line == "$$$$":
+            return None
+        line = file.readline().strip()
+        names = []
+        while line != "":
+            if len(line) > 0 and line[0] == ">":
+                break
+            names.append(line)
+            line = file.readline().strip()
+        return names
+
+    @staticmethod
+    def create_mapping_filtered(folder_path: str, results_path: str):
+        """Create a json file that maps the name of the ligand to the
+        file name, for example: {"bld": "115196_bld.sdf"}.
+
+        It only works for sdf files that are formatted like the
+        example shown above.
+
+        folder_path: where the sdf files are stored
+        results_path: where the json file should be created
+        """
+        mapped_sdf = {}
+        sdf_files = os.listdir(folder_path)
+        for file in sdf_files:
+            if file[0] != "." and file[-4:] == ".sdf":
+                name = file[file.index("_") + 1:-4]
+                mapped_sdf[name] = file
+        json_file = results_path + "sdf_mapping_filtered.json"
+        with open(json_file, 'w') as file:
+            file.write(json.dumps(mapped_sdf))
+        return mapped_sdf
+
+    def create_mapping_unfiltered(self, folder_path: str, results_path: str):
+        """Create a json file that maps the names of the ligand to the
+        file name, for example: {"122234": "Corn sugar gum,Xanthan gum"}.
+
+        It only works for sdf files that contain this line:
+        "> <PUBCHEM_SUBSTANCE_SYNONYM>".
+
+        folder_path: where the sdf files are stored
+        results_path: where the json file should be created
+        """
+        mapped_sdf = {}
+        sdf_files = os.listdir(folder_path)
+        for file in sdf_files:
+            if file[0] != "." and file[-4:] == ".sdf":
+                names = self.get_substance_name(file, folder_path)
+                sdf_number = file.split(".")[0]
+                mapped_sdf[sdf_number] = ",".join(names)
+        json_file = results_path + "sdf_mapping_unfiltered.json"
+        with open(json_file, 'w') as file:
+            file.write(json.dumps(mapped_sdf))
+        return mapped_sdf
diff --git a/api/utils/refactored_docking_utils.py b/api/utils/refactored_docking_utils.py
deleted file mode 100755
index ab7d15d..0000000
--- a/api/utils/refactored_docking_utils.py
+++ /dev/null
@@ -1,638 +0,0 @@
-from abc import ABC, abstractmethod
-from typing import List
-import os
-import re
-import subprocess
-import math
-import sys
-import json
-import datetime
-
-HEX_BIN_PATH = '/usr/local/bin/hex/bin/hex'
-
-
-class Receptor(ABC):
-    """An abstract class that represents a receptor
-
-    --- Attributes ---
-    name (str): the name of the receptor
-    file_path (str): the relative path to the receptors pdb file
-    """
-    @abstractmethod
-    def __init__(self, name: str, file_path: str):
-        self.name = name
-        self.file_path = file_path
-
-
-class MonomerReceptor(Receptor):
-    """ A class that represents a receptor that is a monomer, meaning it consists
-    of only one chain.
-
-    --- Attributes ---
-    name (str): the name of the receptor
-    file_path (str): the relative path to the receptors pdb file
-    """
-    name: str
-    file_path: str
-
-    def __init__(self, name, file_path):
-        super().__init__(name, file_path)
-
-
-class ComplexReceptor(Receptor):
-    """ A class that represents a receptor that is a complex, meaning it consists
-    of more than one chain.
-
-    --- Attributes ---
-    name (str): the name of the receptor
-    file_path (str): the relative path to the receptors pdb file
-    monomer_list (List[str]): the list of monomers that make up the complex
-    line_numbers (List[List[int]]): the list of line numbers that separate the monomers, e.g. [[100,200],[300,500]]
-    """
-    def __init__(self, name: str, file_path: str, monomers_list: List[str]):
-        super().__init__(name, file_path)
-        self.monomers_list = monomers_list
-        self.line_numbers = self.separate_monomers()
-
-    def separate_monomers(self):
-        """Returns a list of lists, where each sublist contains the line
-        numbers of the start and end of a monomer.
-        For example, receptor X has 3 chains in this order: A, B, C.
-        The method will return [[1, 6], [7, 9], [10, 15]].
-        """
-        line_numbers = []
-        file = open(self.file_path, "r")
-        line = file.readline()
-        prev = None
-        curr_line = 0
-        while line != '':
-            # the first line of the first monomer
-            if line[:12] == "ATOM      1 ":
-                prev = curr_line - 1
-            # the last line of a monomer
-            elif line[:3] == 'TER':
-                # line_numbers.append(curr_line)
-                line_numbers.append([prev + 1, curr_line])
-                prev = curr_line
-            curr_line += 1
-            line = file.readline()
-
-        return line_numbers
-
-
-class Ligand:
-    """A class that represents a ligand.
-
-    --- Attributes ---
-    name (str): the name of the receptor
-    file_path (str): the relative path to the receptors pdb file
-    """
-    def __init__(self, name: str, file_path: str):
-        self.name = name
-        self.file_path = file_path
-
-
-class Docking(ABC):
-    """An abstract class that represents the docking between a receptor and a
-    ligand.
-
-    --- Attributes ---
-    receptor (Receptor): a Receptor object that represents a receptor
-    ligand (Ligand): a Ligand object that represents a ligand
-    results_path (str): the file path to where the results are stored
-    ligand_reserved_list (List[int]): a list of line numbers, one for each solution,
-    the indicates where the "Docked ligand" section begins
-    """
-
-    @abstractmethod
-    def __init__(self, receptor: Receptor, ligand: Ligand, results_path: str):
-        self.receptor = receptor
-        self.ligand = ligand
-        self.results_path = results_path
-        self.ligand_reserved_list = []
-
-    def hex_docking(self):
-        """Run hex docking using the command line.
-        """
-        hex_output_file = open(self.results_path + 'hex_output.txt', "w")
-
-    # Function to call Hex, including hard coded settings
-
-    # max_docking_solutions set at 5 for testing
-        hex_command = """ open_receptor  """ + self.receptor.file_path + """
-                open_ligand  """ + self.ligand.file_path + """
-                docking_correlation 1
-                docking_score_threshold 0
-                max_docking_solutions 25
-                docking_receptor_stepsize 5.50
-                docking_ligand_stepsize 5.50
-                docking_alpha_stepsize 2.80
-                docking_main_scan 16
-                receptor_origin C-825:VAL-O
-                commit_edits
-                activate_docking
-                save_range 1 100 """ \
-        + self.results_path + """ %s pdb""" % (self.receptor.name + '_' + self.ligand.name)
-        subprocess.Popen(HEX_BIN_PATH,
-                         stdin=subprocess.PIPE,
-                         stderr=subprocess.STDOUT,
-                         stdout=hex_output_file).communicate(bytes(hex_command.encode('utf-8')))
-        hex_output_file.close()
-        ct = datetime.datetime.now()
-        print("current time:-", ct)
-        print("Hex docking completed")
-
-    def crte_ligand_reserved_attr(self):
-        """This function populates the Docking instance's ligand_reserved_list attribute
-        with a list of line numbers. Each line number is where the Docked Ligand section
-        begins for each result.
-        For example, [1500, 1499, 1500] means that there are three solutions. In the first
-        solution, the "Docked Ligand" section begins at line 1500. In the second solution,
-        it begins at line 1499, and so on ...
-        """
-        line_numbers = []
-        for filename in os.listdir(self.results_path):
-            if filename[-3:] == 'pdb':
-                file = open(self.results_path + filename, "r")
-                lines = file.readlines()
-                for i in range(len(lines)):
-                    if "Docked ligand coordinates..." in lines[i]:
-                        line_numbers.append(i)
-                        break
-        self.ligand_reserved_list = line_numbers
-
-    def parse_hex_output(self):
-        """Returns a dictionary where the key is the cluster number and the
-        value is a list of solution numbers. One of the keys is "num_soln",
-        where its value is the total number of solutions.
-        For example: {num_soln : 5, 1 : [2, 4], 2 : [1, 3, 5]}
-        """
-        hex_output = open(self.results_path + 'hex_output.txt', "r")
-        lines = hex_output.readlines()
-        # line number where the clustering starts and ends
-        result_start = 0
-        result_end = 0
-        for i in range(len(lines)):
-            splitted_line = lines[i].split(" ")
-            if len(splitted_line) > 8 and splitted_line[0] == "Clst":
-                result_start = i + 2
-            if len(splitted_line) > 2 and "save_range" in splitted_line:
-                result_end = i - 2
-        clustering_lines = lines[result_start:result_end]
-        clusters = {}
-        clusters["num_soln"] = len(clustering_lines)
-        for line in clustering_lines:
-            cleaned_line = line.strip().split(" ")
-            res = []
-            # only keep non-blank items in line
-            for ch in cleaned_line:
-                if ch != "":
-                    res.append(ch)
-            clst = int(res[0])
-            sln = int(res[1])
-            if clst not in clusters:
-                clusters[clst] = [sln]
-            else:
-                clusters[clst].append(sln)
-        return clusters
-
-    def result_dict_generator(self, monomer_number, threshold):
-        """Return a dictionary where each key is a residue and each value is
-        the energy. The distance between each residue in the monomer and each
-        atom in the ligand is calculated, and only residues with distances
-        below the threshold are included.
-        """
-        receptor_file = open(self.receptor.file_path, "r")
-
-        if monomer_number != -1:  # if -1, go to monomer logic
-            # get the start and end line numbers of the monomer in the receptor pdb
-            monomer_start = self.receptor.line_numbers[monomer_number][0]
-            monomer_end = self.receptor.line_numbers[monomer_number][1]
-
-            # get the lines for that receptor only
-            receptor_file_lines = receptor_file.readlines()[monomer_start:monomer_end]
-        else:  # Monomer logic
-            receptor_file_lines = receptor_file.readlines()
-
-        # Store every receptor's atom coordinates information as a nested
-        # dictionary called 'reference'
-        reference = {}
-        for line in receptor_file_lines:
-            splitted_line = line.split()
-            if line[0:4] == 'ATOM':
-                coord = map(float, filter(None, splitted_line[6:9]))
-                if int(splitted_line[5]) in reference:
-                    reference[int(splitted_line[5])][int(splitted_line[1])] = tuple(coord)
-                else:
-                    reference[int(splitted_line[5])] = {int(splitted_line[1]) : tuple(coord)}
-
-        # here, the structure of the reference dict is is {residue: {atom_num :(x, y, z)}},
-
-        # The energy for each reference element will be stored in dictionary 'ac'
-        ac = {}
-        result_list = []
-        for filename in os.listdir(self.results_path):
-            if filename[-3:] == 'pdb':
-                result_list.append(filename)
-
-        lowest_en = None  # to keep track of lowest energy
-        all_residue_list = []
-
-        cluster_dict = self.parse_hex_output()
-
-        for i in range(len(result_list)):
-            energy = ''
-
-            # get the ligand_reserved section of the result file
-            file = open(self.results_path + result_list[i], 'r')
-            ligand_reserved_start = self.ligand_reserved_list[i]
-            ligand_reserved_section = file.readlines()[ligand_reserved_start:]
-
-            # go through ligand reserved section to calculate energy
-            residue_set = set()
-            coor = []
-            for line in ligand_reserved_section:
-                if 'REMARK' in line.split(' ') and 'Energy' in line.split(' '):
-                    cluster_size = len(cluster_dict[i + 1])
-                    total_solutions = cluster_dict['num_soln']
-
-                    # energy is weighed according to the number of solutions
-                    # in that cluster
-                    energy = ((float(line.split(' ')[6][:-1]))/total_solutions) * cluster_size
-
-                    # record values if lowest energy
-                    if lowest_en is None or energy < lowest_en:
-                        lowest_en = energy
-                elif line[:4] == 'ATOM':
-                    # coordinates of one atom
-                    coordinates = tuple(map(float, filter(None, line.split()[6:9])))
-                    coor.append(coordinates)
-            # each atom's coordinates is now stored in the list coordinates
-
-            residue_set = set()
-            for res in reference.keys():  # for each amino acid in the receptor file:
-                distances = []
-
-                for atom in coor:  # for each atom of the ligand
-                    for aa in reference[res].keys():  # for each atom of that amino acid
-                        # check if the distance between atoms of the ligands
-                        # and of the amino acid are lower than chosen threshold (5)
-                        distance = math.sqrt(sum([(reference[res][aa][0] - atom[0]) ** 2,
-                                                  (reference[res][aa][1] - atom[1]) ** 2,
-                                                  (reference[res][aa][2] - atom[2]) ** 2]))
-
-                        distances.append(distance)
-
-                # if at least one of the distances is lower than the threshold, otherwise skip
-                if all(d >= threshold for d in distances):
-                    continue
-                else:
-                    # adding energy (previosly divided by the number of results)
-                    # if found multiple times, we would get an average
-                    if res in ac.keys():
-                        ac[res] += energy
-                    else:
-                        ac[res] = energy
-
-                # Store the resi number into set
-                residue_set.add(res)
-
-            all_residue_list.append(residue_set)
-
-        return ac
-
-    @abstractmethod
-    def best_result(self):
-        pass
-
-    @abstractmethod
-    def crte_receptor_dict(self):
-        pass
-
-    @abstractmethod
-    def normalize_results(self, threshold):
-        pass
-
-
-class MonomerDocking(Docking):
-    """A class the represents a docking between a monomer receptor and a monomer.
-
-    --- Attributes ---
-    receptor (MonomerReceptor): a Receptor object that represents a monomer receptor
-    ligand (Ligand): a Ligand object that represents a ligand
-    results_path (str): the file path to where the results are stored
-    ligand_reserved_list (List[int]): a list of line numbers, one for each solution,
-        the indicates where the "Docked ligand" section begins
-    """
-
-    def __init__(self, receptor: MonomerReceptor, ligand: Ligand, results_path: str):
-        super().__init__(receptor, ligand, results_path)
-
-    def best_result(self):
-        pass
-
-    def crte_receptor_dict(self, threshold):
-        """"Return a dictionary that contains the residue-energy
-        dictionary of the monomer. This is not necessary, but maintains
-        consistency between monomer and complex receptor dictionaries.
-        """
-        receptor_res = {}
-        res_dict = self.result_dict_generator(-1, threshold)
-        ligand_res = {}
-        ligand_res[self.ligand.name] = res_dict
-        receptor_res[self.receptor.name] = ligand_res
-        return receptor_res
-
-    def normalize_results(self, threshold):
-        """Return normalized residue-energy dictionaries for the
-        receptor.
-        """
-        results_dict = self.crte_receptor_dict(threshold)
-        receptor_key = list(results_dict.keys())[0]
-        ligand_key = list(results_dict[receptor_key].keys())[0]
-
-        inside_dict = results_dict[receptor_key][ligand_key]
-        abs_max = None
-        abs_min = None
-
-        # To eliminate empty dictionaries that might cause division errors below
-        # normalized_mon_dicitonary calculations
-        if inside_dict != {}:
-            abs_min = min(inside_dict.values())
-            abs_max = max(inside_dict.values())
-
-        all_normalized_results = {}
-
-        normalized_mon_dict = {}
-        normalized_mon_dict[receptor_key] = {}
-        normalized_mon_dict[receptor_key][ligand_key] = {}
-
-        # prevent substraction of equal values or values that doesn't make any sense in terms of accuracy
-        if abs_min == abs_max:
-            for k, v in inside_dict.items():
-                normalized_mon_dict[receptor_key][ligand_key][k] = 1
-        else:
-            for k, v in inside_dict.items():
-                normalized_value = (v - abs_min) / (abs_max - abs_min)
-                normalized_mon_dict[receptor_key][ligand_key][k] = normalized_value
-        all_normalized_results.update(normalized_mon_dict)
-        return all_normalized_results
-
-
-class ComplexDocking(Docking):
-    """A class that represents a docking between a complex receptor and a ligand.
-
-        --- Attributes ---
-    receptor (ComplexReceptor): a Receptor object that represents a monomer receptor
-    ligand (Ligand): a Ligand object that represents a ligand
-    results_path (str): the file path to where the results are stored
-    ligand_reserved (List[int]): a list of line numbers, one for each solution,
-        which indicates where the "Docked ligand" section begins
-    split_results (List[List[Tuple[int]]]): a list where each sublist is a chain,
-        which contains a list of tuples. Each tuple indicates the line numbers
-        of the start and end of that chain in a results file.
-    """
-
-    def __init__(self, receptor: ComplexReceptor, ligand: Ligand, results_path: str):
-        super().__init__(receptor, ligand, results_path)
-        self.split_results = []
-
-    def separate_results(self):
-        """For each solution, record the start and end line number (0-based) of
-        each chain. Then, populate self.split_results with the final list.
-
-        Each sublist represents one solution file. Each tuple in the sublist
-        contains the start and end of one chain. The order of the tuples in
-        the sublist is the same as the order of the monomers in the receptor's
-        monomers_list.
-        """
-        results_files = os.listdir(self.results_path)
-
-        # for each solution
-        for file in results_files:
-            if file[-3:] != "pdb":
-                break
-            result_file = open(self.results_path + file)
-
-            # this list contains indices of the start and end of each chain
-            line_numbers = []
-            line = result_file.readline()
-            curr_line = 0
-            prev = None
-            while line != '':
-                # the start of the first chain
-                if line.split()[0] == "ATOM" and line.split()[1] == "1":
-                    # if line.startswith('ATOM      1  '):
-                    prev = curr_line - 1
-
-                # the end of a chain
-                elif line[0:3] == 'TER':
-                    line_numbers.append([prev + 1, curr_line])
-                    prev = curr_line
-
-                # read next line
-                line = result_file.readline()
-                curr_line += 1
-
-        # populate split_results attribute
-        self.split_results = line_numbers
-
-    def best_result(self):
-        pass
-
-    def crte_receptor_dict(self, threshold):
-        all_monomers = []
-        for i in range(len(self.receptor.monomers_list)):
-            ligand_res = {}
-            res_dict = self.result_dict_generator(i, threshold)
-            ligand_res[self.ligand.name] = res_dict
-            all_monomers.append({self.receptor.name + '_' + self.receptor.monomers_list[i] : ligand_res})
-        return all_monomers
-
-    def normalize_results(self, threshold):
-        min_values = []
-        max_values = []
-        abs_max = None
-        abs_min = None
-        all_monomers_dict = self.crte_receptor_dict(threshold)
-        for i in range(len(all_monomers_dict)):
-            monomer_dict = all_monomers_dict[i]
-            monomer_key = list(monomer_dict.keys())[0]
-            ligand_key = list(monomer_dict[monomer_key].keys())[0]
-
-            inside_dict = monomer_dict[monomer_key][ligand_key]
-
-            # To eliminate empty dictionaries that might cause division errors below
-            # normalized_mon_dicitonary calculations
-            if inside_dict == {}:
-                continue
-            else:
-                mini = min(inside_dict.values())
-                maxi = max(inside_dict.values())
-
-                min_values.append(mini)
-                max_values.append(maxi)
-
-                abs_max = max(max_values)
-                abs_min = min(min_values)
-
-                print("This is the maximum value: ", abs_max, file=sys.stderr)
-                print("This is the minimum value: ", abs_min, file=sys.stderr)
-
-        # Now looping through every monomer, and calculating every residue energy to be
-        # normalized by using absolute minimum and maximum.
-        all_normalized_results = {}
-        for i in range(len(all_monomers_dict)):
-            monomer_dict = all_monomers_dict[i]
-            monomer_key = list(monomer_dict.keys())[0]
-            ligand_key = list(monomer_dict[monomer_key].keys())[0]
-
-            inside_dict = monomer_dict[monomer_key][ligand_key]
-
-            normalized_mon_dict = {}
-            normalized_mon_dict[monomer_key] = {}
-            normalized_mon_dict[monomer_key][ligand_key] = {}
-
-            # prevent substraction of equal values or values that doesn't make any sense in terms of accuracy
-            if abs_min == abs_max:
-                for k, v in inside_dict.items():
-                    normalized_mon_dict[monomer_key][ligand_key][k] = 1
-            else:
-                for k, v in inside_dict.items():
-                    normalized_value = (v - abs_min) / (abs_max - abs_min)
-                    normalized_mon_dict[monomer_key][ligand_key][k] = normalized_value
-            all_normalized_results.update(normalized_mon_dict)
-        return all_normalized_results
-
-
-class Docker:
-    """A class that represents the controller to create docking pairs and carry
-    out the docking.
-    """
-
-    @staticmethod
-    def start(receptor: str, ligand: str, docking_pdb_path: str):
-        """Start the docking process and analyze results. Return the
-        normalized residue-energyy dictionary.
-        """
-        # create docking object
-        ct = datetime.datetime.now()
-        print("Starting the docking process at {}".format(ct))
-        docking = Docker.create_docking(receptor, ligand, docking_pdb_path)
-        if docking is None:
-            receptor = receptor.split('.')[0]
-            results_path = docking_pdb_path + receptor + '_' + ligand + '/'
-            with open(results_path + "final.json") as json_file:
-                final_json = json.load(json_file)
-            return final_json
-        elif docking == "Receptor file not found":
-            return "Receptor file not found"
-        elif docking == "Ligand file not found":
-            return "Ligand file not found"
-
-        docking.hex_docking()
-        if isinstance(docking, ComplexDocking):
-            docking.separate_results()
-        docking.crte_ligand_reserved_attr()
-        normalized_results = docking.normalize_results(5)
-        new_json = docking.results_path + "final.json"
-        with open(new_json, 'w') as file:
-            file.write(json.dumps(normalized_results))
-        ct = datetime.datetime.now()
-        print("current time:-", ct)
-        return normalized_results
-
-    def create_receptor(receptor_name: str, receptor_file_path: str):
-        """Return a new receptor with the name receptor_name, by parsing
-        the file at recepter_file_path.
-        """
-        with open(receptor_file_path) as f:
-            is_monomer = True
-            for line in f.readlines():
-                if re.match(r'COMPND   \d CHAIN: \w, \w*', line) is not None:
-                    is_monomer = False
-                    # if the receptor would be a monomer the regex would be
-                    # r'COMPND   \d CHAIN: \w;'
-
-                    # To make a list of the monomers' labels
-                    print(receptor_name + ' identified as a protein complex')
-                    if line[11:16] == 'CHAIN':
-                        monomers_list = line.split(': ')[-1].split(', ')
-                        # The COMPND line ends with ';' therefore it needs to be
-                        # removed from the last label
-                        monomers_list[-1] = monomers_list[-1][0]
-                        new_receptor = ComplexReceptor(receptor_name,
-                                                       receptor_file_path,
-                                                       monomers_list)
-                        return new_receptor
-                    print("Unknown pdb structure, need further investigation")
-
-            if is_monomer:
-                new_receptor = MonomerReceptor(receptor_name,
-                                               receptor_file_path)
-                return new_receptor
-
-    def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str):
-        """Return a docking pair, which contains a Receptor and a Ligand, as
-        specified by receptor_name and ligand_name, respectively.
-        """
-        # check that the docking combination has not been run before
-        # results_path = docking_pdb_path + 'RESULTS/' + receptor_name + '_' + ligand_name + '/'
-        if '.' in receptor_name:
-            receptor_name = receptor_name[:receptor_name.index('.')]
-        results_path = docking_pdb_path + receptor_name + '_' + ligand_name + '/'
-        print(results_path)
-        if os.path.exists(results_path): #or \
-            #os.path.exists(docking_pdb_path + receptor_name + '.1_' + ligand_name + '/'):
-            print("The docking between {0} and {1} has already been done.".format(receptor_name, 
-                                                                                  ligand_name))
-            return None
-
-
-        os.makedirs(results_path)
-
-        # find receptor file and create receptor object
-        receptor_folder = '/DATA/AF2-pdbs/Arabidopsis/AF2_Ath_PDBs_FAs_renamed/'
-        # receptor_folder = '/var/www/html/eplant/AF2_Ath_PDBs'
-        receptor_file_found = False
-
-        for receptor_file in os.listdir(receptor_folder):
-            # if receptor_file[0] != '.' and len(receptor_file.split('.')) == 2 and \
-            #     receptor_file[-4:] == 'pdb' and \
-            #         receptor_file[:-4].lower() == receptor_name.lower():
-            if receptor_file[0] != '.' and receptor_file[-4:] == '.pdb' and \
-                    (receptor_name in receptor_file):
-                receptor_file_found = True
-                receptor_file_path = receptor_folder + receptor_file
-                receptor = Docker.create_receptor(receptor_name, receptor_file_path)
-
-        # find ligand file and create ligand object
-        # ligand_folder = docking_pdb_path + 'HEX_SELECTED_LIGANDS/'
-        ligand_folder = '/DATA/HEX_API/HEX_SELECTED_LIGANDS/'
-        ligand_file_found = False
-
-        for ligand_file in os.listdir(ligand_folder):
-            if ligand_file[0] != '.' and len(ligand_file.split('.')) == 2 and \
-                ligand_file.split('.')[1] == 'sdf' and \
-                    ligand_file[:-4].lower() == ligand_name.lower():
-                ligand_file_found = True
-                ligand_file_path = ligand_folder + '/' + ligand_file
-                ligand = Ligand(ligand_name, ligand_file_path)
-
-        if not receptor_file_found:
-            return "Receptor file not found"
-        elif not ligand_file_found:
-            return "Ligand file not found"
-
-        # receptor and ligand objects are created and ready for docking
-        if isinstance(receptor, MonomerReceptor):
-            docking = MonomerDocking(receptor, ligand, results_path)
-        else:
-            docking = ComplexDocking(receptor, ligand, results_path)
-        return docking
-
-
-if __name__ == "__main__":
-    # print(Docker.start("8g2j", "UPG", "/DATA/HEX_API/"))
-    print(Docker.start("AT1G66340", "6325_Ethylene", "/DATA/HEX_API/RESULTS/"))
-    
diff --git a/api/utils/sdf_mapping.py b/api/utils/sdf_mapping.py
deleted file mode 100644
index 9bf2033..0000000
--- a/api/utils/sdf_mapping.py
+++ /dev/null
@@ -1,49 +0,0 @@
-import os
-import re
-from typing import List
-
-def get_substance_name(filename: str, folder_path: str):
-    file = open(folder_path + filename, "r")
-    line = file.readline().strip()
-    if line == "":
-        return None
-    while line != "> <PUBCHEM_SUBSTANCE_SYNONYM>" and line != "$$$$":
-        line = file.readline().strip()
-    # right now, line == "> <PUBCHEM_SUBSTANCE_SYNONYM>" or line is empty
-    if line == "$$$$":
-        return None
-    line = file.readline().strip()
-    names = []
-    while line != "":
-        if len(line) > 0 and line[0] == ">":
-            break
-    # while line != "":
-        # check regex to see if it contains lowercase
-        # matched = re.search("[a-z]", line)
-        # if matched is not None:
-        #     return line
-        # else:
-        #     line = file.readline().strip()
-        names.append(line)
-        line = file.readline().strip()
-    return names
-
-def create_mapping(folder_path: str):
-    mapped_sdf = {}
-    sdf_files = os.listdir(folder_path)
-    for file in sdf_files:
-        if file[0] != "." and file[-4:] == ".sdf":
-            file_number = file[:file.index("_")]
-            name = file[file.index("_") + 1:-4]
-            # the commented out section is for sdfs that have not been filtered
-            # names = get_substance_name(file, folder_path)
-            # print(name)
-            # sdf_number = file.split(".")[0]
-            # mapped_sdf[sdf_number] = ",".join(names)
-            mapped_sdf[file_number] = name  # check if want to map file_number or file name
-    return mapped_sdf
-
-if __name__ == "__main__":
-    sdf_folder_paths = ['/home/diennguyen/BAR_API/HEX_API/HEX_SMALL_MOLECULES']
-    print(create_mapping(sdf_folder_paths[0]))
-

From 4c208ce575873ec1d8c3783d312afb71107f045e Mon Sep 17 00:00:00 2001
From: Dien Nguyen <dien.nguyen@mail.utoronto.ca>
Date: Fri, 16 Feb 2024 13:39:39 -0500
Subject: [PATCH 11/35] Fix bug to prevent creating results folder for invalid
 protein or ligand

---
 api/utils/docking_utils.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/api/utils/docking_utils.py b/api/utils/docking_utils.py
index 030a7ca..cba1fe2 100755
--- a/api/utils/docking_utils.py
+++ b/api/utils/docking_utils.py
@@ -367,7 +367,8 @@ def normalize_results(self, threshold):
         normalized_mon_dict[receptor_key] = {}
         normalized_mon_dict[receptor_key][ligand_key] = {}
 
-        # prevent substraction of equal values or values that doesn't make any sense in terms of accuracy
+        # prevent substraction of equal values or values that doesn't make any sense
+        # in terms of accuracy
         if abs_min == abs_max:
             for k, v in inside_dict.items():
                 normalized_mon_dict[receptor_key][ligand_key][k] = 1
@@ -493,7 +494,8 @@ def normalize_results(self, threshold):
             normalized_mon_dict[monomer_key] = {}
             normalized_mon_dict[monomer_key][ligand_key] = {}
 
-            # prevent substraction of equal values or values that doesn't make any sense in terms of accuracy
+            # prevent substraction of equal values or values that doesn't make any sense
+            # in terms of accuracy
             if abs_min == abs_max:
                 for k, v in inside_dict.items():
                     normalized_mon_dict[monomer_key][ligand_key][k] = 1
@@ -530,6 +532,11 @@ def start(receptor: str, ligand: str, docking_pdb_path: str):
         elif docking == "Ligand file not found":
             return "Ligand file not found"
 
+        results_path = docking_pdb_path + receptor + '_' + ligand + '/'
+
+        # create folder to store docking results
+        os.makedirs(results_path)
+
         docking.hex_docking()
         if isinstance(docking, ComplexDocking):
             docking.separate_results()
@@ -587,17 +594,11 @@ def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str):
                                                                                   ligand_name))
             return None
 
-        os.makedirs(results_path)
-
         # find receptor file and create receptor object
         receptor_folder = '/DATA/AF2-pdbs/Arabidopsis/AF2_Ath_PDBs_FAs_renamed/'
-        # receptor_folder = '/var/www/html/eplant/AF2_Ath_PDBs'
         receptor_file_found = False
 
         for receptor_file in os.listdir(receptor_folder):
-            # if receptor_file[0] != '.' and len(receptor_file.split('.')) == 2 and \
-            #     receptor_file[-4:] == 'pdb' and \
-            #         receptor_file[:-4].lower() == receptor_name.lower():
             if receptor_file[0] != '.' and receptor_file[-4:] == '.pdb' and \
                     (receptor_name in receptor_file):
                 receptor_file_found = True
@@ -605,7 +606,6 @@ def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str):
                 receptor = Docker.create_receptor(receptor_name, receptor_file_path)
 
         # find ligand file and create ligand object
-        # ligand_folder = docking_pdb_path + 'HEX_SELECTED_LIGANDS/'
         ligand_folder = '/DATA/HEX_API/HEX_SELECTED_LIGANDS/'
         ligand_file_found = False
 

From 39b13eb86a66e00cf0b72227a9ceb20e4bc36065 Mon Sep 17 00:00:00 2001
From: Dien Nguyen <dnguyen@bar.utoronto.ca>
Date: Tue, 20 Feb 2024 11:47:35 -0500
Subject: [PATCH 12/35] Add regex check to process bigger receptor Receptors
 with more than 1000 residues will have pdb files where there is no space
 between columns 4 and 5. For example, it can look like this: A1000, whereas
 for earlier residues, it looks like this: A 345. A regex check is added to
 see which lines need to be further processed to separate and extract the
 residue number.

---
 api/utils/docking_utils.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/api/utils/docking_utils.py b/api/utils/docking_utils.py
index cba1fe2..b0724e5 100755
--- a/api/utils/docking_utils.py
+++ b/api/utils/docking_utils.py
@@ -221,10 +221,16 @@ def result_dict_generator(self, monomer_number, threshold):
             splitted_line = line.split()
             if line[0:4] == 'ATOM':
                 coord = map(float, filter(None, splitted_line[6:9]))
-                if int(splitted_line[5]) in reference:
-                    reference[int(splitted_line[5])][int(splitted_line[1])] = tuple(coord)
+
+                # check if chain name and residue are in the same column, e.g. A1000
+                if re.search(r'\d', splitted_line[4]) is None:
+                    residue = splitted_line[5]
+                else:
+                    residue = splitted_line[4][1:]
+                if int(residue) in reference:
+                    reference[int(residue)][int(splitted_line[1])] = tuple(coord)
                 else:
-                    reference[int(splitted_line[5])] = {int(splitted_line[1]) : tuple(coord)}
+                    reference[int(residue)] = {int(splitted_line[1]) : tuple(coord)}
 
         # here, the structure of the reference dict is is {residue: {atom_num :(x, y, z)}},
 

From a30f11794258c6dcb94330fc22143e30b79f33d4 Mon Sep 17 00:00:00 2001
From: Dien Nguyen <dnguyen@bar.utoronto.ca>
Date: Tue, 20 Feb 2024 11:54:53 -0500
Subject: [PATCH 13/35] Add unittests and files for testing Files for testing
 include 1 small monomer receptor pdb, 1 small complex receptor pdb, 1 ligand
 sdf file and the hex results and outputs for these dockings. The complex
 receptor file was altered to create a small complex receptor, so it does not
 actually exist. The results for the dockings are stored in folders just as
 they would be in the /DATA/HEX_API/RESULTS folder.

---
 tests/data/6325_Ethylene.sdf                  | 107 ++++++++
 tests/data/AF2_AT8G88888_complex.pdb          | 198 +++++++++++++++
 tests/data/AF2_AT9G99999_monomer.pdb          | 197 +++++++++++++++
 .../AT8G88888_complex_6325_Ethylene0001.pdb   | 205 +++++++++++++++
 .../hex_output.txt                            | 238 ++++++++++++++++++
 .../AT8G88888_complex_6325_Ethylene0001.pdb   | 205 +++++++++++++++
 .../AT9G99999_monomer_6325_Ethylene0001.pdb   | 205 +++++++++++++++
 .../hex_output.txt                            | 218 ++++++++++++++++
 tests/resources/test_docking_utils.py         | 131 ++++++++++
 9 files changed, 1704 insertions(+)
 create mode 100644 tests/data/6325_Ethylene.sdf
 create mode 100644 tests/data/AF2_AT8G88888_complex.pdb
 create mode 100644 tests/data/AF2_AT9G99999_monomer.pdb
 create mode 100644 tests/data/AT8G88888_complex_6325_Ethylene/AT8G88888_complex_6325_Ethylene0001.pdb
 create mode 100644 tests/data/AT8G88888_complex_6325_Ethylene/hex_output.txt
 create mode 100644 tests/data/AT8G88888_complex_6325_Ethylene0001.pdb
 create mode 100644 tests/data/AT9G99999_monomer_6325_Ethylene/AT9G99999_monomer_6325_Ethylene0001.pdb
 create mode 100644 tests/data/AT9G99999_monomer_6325_Ethylene/hex_output.txt
 create mode 100644 tests/resources/test_docking_utils.py

diff --git a/tests/data/6325_Ethylene.sdf b/tests/data/6325_Ethylene.sdf
new file mode 100644
index 0000000..851e2d7
--- /dev/null
+++ b/tests/data/6325_Ethylene.sdf
@@ -0,0 +1,107 @@
+6325
+  -OEChem-03192020393D
+
+  6  5  0     0  0  0  0  0  0999 V2000
+   -0.6672    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    0.6672    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -1.2213   -0.9290    0.0708 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -1.2212    0.9290   -0.0708 H   0  0  0  0  0  0  0  0  0  0  0  0
+    1.2213    0.9290   -0.0708 H   0  0  0  0  0  0  0  0  0  0  0  0
+    1.2213   -0.9290    0.0708 H   0  0  0  0  0  0  0  0  0  0  0  0
+  1  2  2  0  0  0  0
+  1  3  1  0  0  0  0
+  1  4  1  0  0  0  0
+  2  5  1  0  0  0  0
+  2  6  1  0  0  0  0
+M  END
+> <PUBCHEM_COMPOUND_CID>
+6325
+
+> <PUBCHEM_CONFORMER_RMSD>
+0.4
+
+> <PUBCHEM_CONFORMER_DIVERSEORDER>
+1
+
+> <PUBCHEM_MMFF94_PARTIAL_CHARGES>
+6
+1 -0.3
+2 -0.3
+3 0.15
+4 0.15
+5 0.15
+6 0.15
+
+> <PUBCHEM_EFFECTIVE_ROTOR_COUNT>
+0
+
+> <PUBCHEM_PHARMACOPHORE_FEATURES>
+2
+1 1 hydrophobe
+1 2 hydrophobe
+
+> <PUBCHEM_HEAVY_ATOM_COUNT>
+2
+
+> <PUBCHEM_ATOM_DEF_STEREO_COUNT>
+0
+
+> <PUBCHEM_ATOM_UDEF_STEREO_COUNT>
+0
+
+> <PUBCHEM_BOND_DEF_STEREO_COUNT>
+0
+
+> <PUBCHEM_BOND_UDEF_STEREO_COUNT>
+0
+
+> <PUBCHEM_ISOTOPIC_ATOM_COUNT>
+0
+
+> <PUBCHEM_COMPONENT_COUNT>
+1
+
+> <PUBCHEM_CACTVS_TAUTO_COUNT>
+1
+
+> <PUBCHEM_CONFORMER_ID>
+000018B500000001
+
+> <PUBCHEM_MMFF94_ENERGY>
+0.1306
+
+> <PUBCHEM_FEATURE_SELFOVERLAP>
+11.86
+
+> <PUBCHEM_SHAPE_FINGERPRINT>
+21015797 1 8574413327516572042
+260 1 8574713502780882945
+
+> <PUBCHEM_SHAPE_MULTIPOLES>
+41.16
+1.06
+0.62
+0.62
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+
+> <PUBCHEM_SHAPE_SELFOVERLAP>
+58.377
+
+> <PUBCHEM_SHAPE_VOLUME>
+31.7
+
+> <PUBCHEM_COORDINATE_TYPE>
+2
+5
+10
+
+$$$$
diff --git a/tests/data/AF2_AT8G88888_complex.pdb b/tests/data/AF2_AT8G88888_complex.pdb
new file mode 100644
index 0000000..357f6ba
--- /dev/null
+++ b/tests/data/AF2_AT8G88888_complex.pdb
@@ -0,0 +1,198 @@
+HEADER                                            01-JUN-22                     
+TITLE     ALPHAFOLD MONOMER V2.0 PREDICTION FOR UNCHARACTERIZED PROTEIN         
+TITLE    2 (A0A1I9LPI9)                                                         
+COMPND    MOL_ID: 1;                                                   
+COMPND   2 MOLECULE: UNCHARACTERIZED PROTEIN;                                   
+COMPND   3 CHAIN: A, B;                                                           
+SOURCE    MOL_ID: 1;                                                            
+SOURCE   2 ORGANISM_SCIENTIFIC: ARABIDOPSIS THALIANA;                           
+SOURCE   3 ORGANISM_TAXID: 3702                                                 
+REMARK   1                                                                      
+REMARK   1 REFERENCE 1                                                          
+REMARK   1  AUTH   JOHN JUMPER, RICHARD EVANS, ALEXANDER PRITZEL, TIM GREEN,    
+REMARK   1  AUTH 2 MICHAEL FIGURNOV, OLAF RONNEBERGER, KATHRYN TUNYASUVUNAKOOL, 
+REMARK   1  AUTH 3 RUSS BATES, AUGUSTIN ZIDEK, ANNA POTAPENKO, ALEX BRIDGLAND,  
+REMARK   1  AUTH 4 CLEMENS MEYER, SIMON A A KOHL, ANDREW J BALLARD,             
+REMARK   1  AUTH 5 ANDREW COWIE, BERNARDINO ROMERA-PAREDES, STANISLAV NIKOLOV,  
+REMARK   1  AUTH 6 RISHUB JAIN, JONAS ADLER, TREVOR BACK, STIG PETERSEN,        
+REMARK   1  AUTH 7 DAVID REIMAN, ELLEN CLANCY, MICHAL ZIELINSKI,                
+REMARK   1  AUTH 8 MARTIN STEINEGGER, MICHALINA PACHOLSKA, TAMAS BERGHAMMER,    
+REMARK   1  AUTH 9 DAVID SILVER, ORIOL VINYALS, ANDREW W SENIOR,                
+REMARK   1  AUTH10 KORAY KAVUKCUOGLU, PUSHMEET KOHLI, DEMIS HASSABIS            
+REMARK   1  TITL   HIGHLY ACCURATE PROTEIN STRUCTURE PREDICTION WITH ALPHAFOLD  
+REMARK   1  REF    NATURE                        V. 596   583 2021              
+REMARK   1  REFN                   ISSN 0028-0836                               
+REMARK   1  PMID   34265844                                                     
+REMARK   1  DOI    10.1038/s41586-021-03819-2                                   
+REMARK   1                                                                      
+REMARK   1 DISCLAIMERS                                                          
+REMARK   1 ALPHAFOLD DATA, COPYRIGHT (2021) DEEPMIND TECHNOLOGIES LIMITED. THE  
+REMARK   1 INFORMATION PROVIDED IS THEORETICAL MODELLING ONLY AND CAUTION SHOULD
+REMARK   1 BE EXERCISED IN ITS USE. IT IS PROVIDED "AS-IS" WITHOUT ANY WARRANTY 
+REMARK   1 OF ANY KIND, WHETHER EXPRESSED OR IMPLIED. NO WARRANTY IS GIVEN THAT 
+REMARK   1 USE OF THE INFORMATION SHALL NOT INFRINGE THE RIGHTS OF ANY THIRD    
+REMARK   1 PARTY. THE INFORMATION IS NOT INTENDED TO BE A SUBSTITUTE FOR        
+REMARK   1 PROFESSIONAL MEDICAL ADVICE, DIAGNOSIS, OR TREATMENT, AND DOES NOT   
+REMARK   1 CONSTITUTE MEDICAL OR OTHER PROFESSIONAL ADVICE. IT IS AVAILABLE FOR 
+REMARK   1 ACADEMIC AND COMMERCIAL PURPOSES, UNDER CC-BY 4.0 LICENCE.           
+DBREF  XXXX A    1    17  UNP    A0A1I9LPI9 A0A1I9LPI9_ARATH     1     17       
+SEQRES   1 A   17  MET PHE ARG PHE LEU ASP TRP ILE PHE THR VAL ALA THR          
+SEQRES   2 A   17  THR SER LEU ASP                                              
+CRYST1    1.000    1.000    1.000  90.00  90.00  90.00 P 1           1          
+ORIGX1      1.000000  0.000000  0.000000        0.00000                         
+ORIGX2      0.000000  1.000000  0.000000        0.00000                         
+ORIGX3      0.000000  0.000000  1.000000        0.00000                         
+SCALE1      1.000000  0.000000  0.000000        0.00000                         
+SCALE2      0.000000  1.000000  0.000000        0.00000                         
+SCALE3      0.000000  0.000000  1.000000        0.00000                         
+MODEL        1                                                                  
+ATOM      1  N   MET A   1      -7.410  -1.552   9.979  1.00 71.91           N  
+ATOM      2  CA  MET A   1      -6.641  -0.817   8.946  1.00 71.91           C  
+ATOM      3  C   MET A   1      -5.303  -1.485   8.564  1.00 71.91           C  
+ATOM      4  CB  MET A   1      -6.474   0.646   9.390  1.00 71.91           C  
+ATOM      5  O   MET A   1      -4.645  -0.990   7.666  1.00 71.91           O  
+ATOM      6  CG  MET A   1      -6.092   1.613   8.265  1.00 71.91           C  
+ATOM      7  SD  MET A   1      -6.114   3.332   8.818  1.00 71.91           S  
+ATOM      8  CE  MET A   1      -5.405   4.141   7.358  1.00 71.91           C  
+ATOM      9  N   PHE A   2      -4.916  -2.639   9.137  1.00 82.96           N  
+ATOM     10  CA  PHE A   2      -3.656  -3.336   8.797  1.00 82.96           C  
+ATOM     11  C   PHE A   2      -3.544  -3.808   7.338  1.00 82.96           C  
+ATOM     12  CB  PHE A   2      -3.477  -4.529   9.748  1.00 82.96           C  
+ATOM     13  O   PHE A   2      -2.492  -3.654   6.732  1.00 82.96           O  
+ATOM     14  CG  PHE A   2      -2.837  -4.145  11.064  1.00 82.96           C  
+ATOM     15  CD1 PHE A   2      -1.434  -4.085  11.145  1.00 82.96           C  
+ATOM     16  CD2 PHE A   2      -3.618  -3.837  12.195  1.00 82.96           C  
+ATOM     17  CE1 PHE A   2      -0.812  -3.714  12.349  1.00 82.96           C  
+ATOM     18  CE2 PHE A   2      -2.994  -3.457  13.397  1.00 82.96           C  
+ATOM     19  CZ  PHE A   2      -1.591  -3.397  13.473  1.00 82.96           C  
+ATOM     20  N   ARG A   3      -4.644  -4.300   6.744  1.00 88.23           N  
+ATOM     21  CA  ARG A   3      -4.655  -4.785   5.350  1.00 88.23           C  
+ATOM     22  C   ARG A   3      -4.257  -3.729   4.317  1.00 88.23           C  
+ATOM     23  CB  ARG A   3      -6.038  -5.349   4.983  1.00 88.23           C  
+ATOM     24  O   ARG A   3      -3.766  -4.087   3.258  1.00 88.23           O  
+ATOM     25  CG  ARG A   3      -6.239  -6.783   5.494  1.00 88.23           C  
+ATOM     26  CD  ARG A   3      -7.610  -7.350   5.094  1.00 88.23           C  
+ATOM     27  NE  ARG A   3      -7.758  -7.481   3.629  1.00 88.23           N  
+ATOM     28  NH1 ARG A   3      -9.874  -8.376   3.605  1.00 88.23           N  
+ATOM     29  NH2 ARG A   3      -8.815  -7.999   1.679  1.00 88.23           N  
+ATOM     30  CZ  ARG A   3      -8.811  -7.948   2.980  1.00 88.23           C  
+ATOM     31  N   PHE A   4      -4.500  -2.449   4.602  1.00 95.39           N  
+ATOM     32  CA  PHE A   4      -4.141  -1.385   3.667  1.00 95.39           C  
+ATOM     33  C   PHE A   4      -2.630  -1.112   3.666  1.00 95.39           C  
+ATOM     34  CB  PHE A   4      -4.950  -0.122   3.981  1.00 95.39           C  
+ATOM     35  O   PHE A   4      -2.042  -0.961   2.601  1.00 95.39           O  
+ATOM     36  CG  PHE A   4      -4.637   1.004   3.018  1.00 95.39           C  
+ATOM     37  CD1 PHE A   4      -3.744   2.026   3.392  1.00 95.39           C  
+ATOM     38  CD2 PHE A   4      -5.173   0.983   1.716  1.00 95.39           C  
+ATOM     39  CE1 PHE A   4      -3.393   3.025   2.468  1.00 95.39           C  
+ATOM     40  CE2 PHE A   4      -4.820   1.983   0.794  1.00 95.39           C  
+ATOM     41  CZ  PHE A   4      -3.931   3.004   1.170  1.00 95.39           C  
+ATOM     42  N   LEU A   5      -1.999  -1.093   4.848  1.00 95.07           N  
+ATOM     43  CA  LEU A   5      -0.549  -0.898   4.972  1.00 95.07           C  
+ATOM     44  C   LEU A   5       0.230  -2.068   4.366  1.00 95.07           C  
+ATOM     45  CB  LEU A   5      -0.169  -0.721   6.454  1.00 95.07           C  
+ATOM     46  O   LEU A   5       1.200  -1.845   3.652  1.00 95.07           O  
+ATOM     47  CG  LEU A   5      -0.676   0.581   7.097  1.00 95.07           C  
+ATOM     48  CD1 LEU A   5      -0.344   0.575   8.590  1.00 95.07           C  
+ATOM     49  CD2 LEU A   5      -0.045   1.824   6.468  1.00 95.07           C  
+ATOM     50  N   ASP A   6      -0.243  -3.291   4.604  1.00 94.98           N  
+ATOM     51  CA  ASP A   6       0.310  -4.514   4.017  1.00 94.98           C  
+ATOM     52  C   ASP A   6       0.268  -4.491   2.476  1.00 94.98           C  
+ATOM     53  CB  ASP A   6      -0.502  -5.675   4.596  1.00 94.98           C  
+ATOM     54  O   ASP A   6       1.259  -4.793   1.812  1.00 94.98           O  
+ATOM     55  CG  ASP A   6       0.008  -7.032   4.128  1.00 94.98           C  
+ATOM     56  OD1 ASP A   6      -0.791  -7.719   3.454  1.00 94.98           O  
+ATOM     57  OD2 ASP A   6       1.140  -7.372   4.527  1.00 94.98           O  
+ATOM     58  N   TRP A   7      -0.843  -4.021   1.895  1.00 94.94           N  
+ATOM     59  CA  TRP A   7      -0.972  -3.855   0.445  1.00 94.94           C  
+ATOM     60  C   TRP A   7       0.004  -2.814  -0.129  1.00 94.94           C  
+ATOM     61  CB  TRP A   7      -2.424  -3.518   0.091  1.00 94.94           C  
+ATOM     62  O   TRP A   7       0.686  -3.117  -1.103  1.00 94.94           O  
+ATOM     63  CG  TRP A   7      -2.650  -3.240  -1.363  1.00 94.94           C  
+ATOM     64  CD1 TRP A   7      -2.691  -4.169  -2.344  1.00 94.94           C  
+ATOM     65  CD2 TRP A   7      -2.756  -1.947  -2.035  1.00 94.94           C  
+ATOM     66  CE2 TRP A   7      -2.861  -2.176  -3.439  1.00 94.94           C  
+ATOM     67  CE3 TRP A   7      -2.755  -0.604  -1.602  1.00 94.94           C  
+ATOM     68  NE1 TRP A   7      -2.829  -3.547  -3.569  1.00 94.94           N  
+ATOM     69  CH2 TRP A   7      -2.931   0.196  -3.906  1.00 94.94           C  
+ATOM     70  CZ2 TRP A   7      -2.944  -1.130  -4.369  1.00 94.94           C  
+ATOM     71  CZ3 TRP A   7      -2.849   0.455  -2.526  1.00 94.94           C  
+ATOM     72  N   ILE A   8       0.122  -1.619   0.473  1.00 95.58           N  
+ATOM     73  CA  ILE A   8       1.094  -0.599   0.022  1.00 95.58           C  
+ATOM     74  C   ILE A   8       2.526  -1.128   0.124  1.00 95.58           C  
+ATOM     75  CB  ILE A   8       0.955   0.719   0.824  1.00 95.58           C  
+ATOM     76  O   ILE A   8       3.316  -0.913  -0.792  1.00 95.58           O  
+ATOM     77  CG1 ILE A   8      -0.332   1.498   0.479  1.00 95.58           C  
+ATOM     78  CG2 ILE A   8       2.174   1.654   0.660  1.00 95.58           C  
+ATOM     79  CD1 ILE A   8      -0.352   2.171  -0.903  1.00 95.58           C  
+ATOM     80  N   PHE A   9       2.857  -1.824   1.215  1.00 95.75           N  
+ATOM     81  CA  PHE A   9       4.181  -2.409   1.400  1.00 95.75           C  
+ATOM     82  C   PHE A   9       4.484  -3.426   0.296  1.00 95.75           C  
+ATOM     83  CB  PHE A   9       4.269  -3.029   2.799  1.00 95.75           C  
+ATOM     84  O   PHE A   9       5.521  -3.325  -0.350  1.00 95.75           O  
+ATOM     85  CG  PHE A   9       5.685  -3.372   3.208  1.00 95.75           C  
+ATOM     86  CD1 PHE A   9       6.205  -4.658   2.976  1.00 95.75           C  
+ATOM     87  CD2 PHE A   9       6.494  -2.385   3.801  1.00 95.75           C  
+ATOM     88  CE1 PHE A   9       7.529  -4.956   3.343  1.00 95.75           C  
+ATOM     89  CE2 PHE A   9       7.817  -2.684   4.169  1.00 95.75           C  
+ATOM     90  CZ  PHE A   9       8.334  -3.970   3.939  1.00 95.75           C  
+ATOM     91  N   THR A  10       3.528  -4.316   0.010  1.00 95.85           N  
+ATOM     92  CA  THR A  10       3.622  -5.299  -1.079  1.00 95.85           C  
+ATOM     93  C   THR A  10       3.835  -4.624  -2.434  1.00 95.85           C  
+ATOM     94  CB  THR A  10       2.357  -6.170  -1.140  1.00 95.85           C  
+ATOM     95  O   THR A  10       4.733  -5.013  -3.175  1.00 95.85           O  
+ATOM     96  CG2 THR A  10       2.445  -7.255  -2.212  1.00 95.85           C  
+ATOM     97  OG1 THR A  10       2.146  -6.838   0.082  1.00 95.85           O  
+ATOM     98  N   VAL A  11       3.047  -3.592  -2.759  1.00 96.10           N  
+ATOM     99  CA  VAL A  11       3.177  -2.859  -4.029  1.00 96.10           C  
+ATOM    100  C   VAL A  11       4.548  -2.188  -4.140  1.00 96.10           C  
+ATOM    101  CB  VAL A  11       2.034  -1.841  -4.204  1.00 96.10           C  
+ATOM    102  O   VAL A  11       5.202  -2.317  -5.174  1.00 96.10           O  
+ATOM    103  CG1 VAL A  11       2.239  -0.928  -5.422  1.00 96.10           C  
+ATOM    104  CG2 VAL A  11       0.692  -2.558  -4.412  1.00 96.10           C  
+ATOM    105  N   ALA A  12       5.008  -1.520  -3.077  1.00 95.59           N  
+ATOM    106  CA  ALA A  12       6.301  -0.839  -3.056  1.00 95.59           C  
+ATOM    107  C   ALA A  12       7.470  -1.817  -3.255  1.00 95.59           C  
+ATOM    108  CB  ALA A  12       6.430  -0.061  -1.741  1.00 95.59           C  
+ATOM    109  O   ALA A  12       8.385  -1.514  -4.019  1.00 95.59           O  
+ATOM    110  N   THR A  13       7.419  -3.001  -2.635  1.00 94.11           N  
+ATOM    111  CA  THR A  13       8.445  -4.039  -2.818  1.00 94.11           C  
+ATOM    112  C   THR A  13       8.398  -4.653  -4.216  1.00 94.11           C  
+ATOM    113  CB  THR A  13       8.369  -5.131  -1.738  1.00 94.11           C  
+ATOM    114  O   THR A  13       9.435  -4.745  -4.854  1.00 94.11           O  
+ATOM    115  CG2 THR A  13       8.715  -4.573  -0.355  1.00 94.11           C  
+ATOM    116  OG1 THR A  13       7.079  -5.693  -1.633  1.00 94.11           O  
+ATOM    117  N   THR A  14       7.213  -4.968  -4.760  1.00 93.90           N  
+ATOM    118  CA  THR A  14       7.092  -5.543  -6.121  1.00 93.90           C  
+ATOM    119  C   THR A  14       7.535  -4.612  -7.245  1.00 93.90           C  
+ATOM    120  CB  THR A  14       5.651  -5.960  -6.455  1.00 93.90           C  
+ATOM    121  O   THR A  14       7.745  -5.072  -8.357  1.00 93.90           O  
+ATOM    122  CG2 THR A  14       5.207  -7.198  -5.681  1.00 93.90           C  
+ATOM    123  OG1 THR A  14       4.714  -4.932  -6.199  1.00 93.90           O  
+ATOM    124  N   SER A  15       7.585  -3.303  -6.989  1.00 91.22           N  
+ATOM    125  CA  SER A  15       8.091  -2.321  -7.953  1.00 91.22           C  
+ATOM    126  C   SER A  15       9.588  -2.045  -7.815  1.00 91.22           C  
+ATOM    127  CB  SER A  15       7.310  -1.014  -7.807  1.00 91.22           C  
+ATOM    128  O   SER A  15      10.166  -1.402  -8.689  1.00 91.22           O  
+ATOM    129  OG  SER A  15       7.463  -0.403  -6.533  1.00 91.22           O  
+ATOM    130  N   LEU A  16      10.169  -2.425  -6.673  1.00 85.38           N  
+ATOM    131  CA  LEU A  16      11.572  -2.198  -6.342  1.00 85.38           C  
+ATOM    132  C   LEU A  16      12.447  -3.386  -6.770  1.00 85.38           C
+TER     132      LEU A  16
+ATOM    133  CB  LEU B  16      11.669  -1.927  -4.830  1.00 85.38           C  
+ATOM    134  O   LEU B  16      13.583  -3.156  -7.181  1.00 85.38           O  
+ATOM    135  CG  LEU B  16      13.080  -1.545  -4.347  1.00 85.38           C  
+ATOM    136  CD1 LEU B  16      13.490  -0.148  -4.827  1.00 85.38           C  
+ATOM    137  CD2 LEU B  16      13.114  -1.545  -2.817  1.00 85.38           C  
+ATOM    138  N   ASP B  17      11.914  -4.607  -6.661  1.00 73.64           N  
+ATOM    139  CA  ASP B  17      12.422  -5.816  -7.331  1.00 73.64           C  
+ATOM    140  C   ASP B  17      12.082  -5.810  -8.833  1.00 73.64           C  
+ATOM    141  CB  ASP B  17      11.841  -7.073  -6.638  1.00 73.64           C  
+ATOM    142  O   ASP B  17      12.961  -6.201  -9.638  1.00 73.64           O  
+ATOM    143  CG  ASP B  17      12.463  -7.412  -5.269  1.00 73.64           C  
+ATOM    144  OD1 ASP B  17      13.655  -7.806  -5.237  1.00 73.64           O  
+ATOM    145  OD2 ASP B  17      11.729  -7.366  -4.249  1.00 73.64           O  
+ATOM    146  OXT ASP B  17      10.937  -5.420  -9.159  1.00 73.64           O  
+TER     147      ASP B  17                                                      
+ENDMDL                                                                          
+END                                                                             
\ No newline at end of file
diff --git a/tests/data/AF2_AT9G99999_monomer.pdb b/tests/data/AF2_AT9G99999_monomer.pdb
new file mode 100644
index 0000000..f64d2a7
--- /dev/null
+++ b/tests/data/AF2_AT9G99999_monomer.pdb
@@ -0,0 +1,197 @@
+HEADER                                            01-JUN-22                     
+TITLE     ALPHAFOLD MONOMER V2.0 PREDICTION FOR UNCHARACTERIZED PROTEIN         
+TITLE    2 (A0A1I9LPI9)                                                         
+COMPND    MOL_ID: 1;                                                            
+COMPND   2 MOLECULE: UNCHARACTERIZED PROTEIN;                                   
+COMPND   3 CHAIN: A                                                             
+SOURCE    MOL_ID: 1;                                                            
+SOURCE   2 ORGANISM_SCIENTIFIC: ARABIDOPSIS THALIANA;                           
+SOURCE   3 ORGANISM_TAXID: 3702                                                 
+REMARK   1                                                                      
+REMARK   1 REFERENCE 1                                                          
+REMARK   1  AUTH   JOHN JUMPER, RICHARD EVANS, ALEXANDER PRITZEL, TIM GREEN,    
+REMARK   1  AUTH 2 MICHAEL FIGURNOV, OLAF RONNEBERGER, KATHRYN TUNYASUVUNAKOOL, 
+REMARK   1  AUTH 3 RUSS BATES, AUGUSTIN ZIDEK, ANNA POTAPENKO, ALEX BRIDGLAND,  
+REMARK   1  AUTH 4 CLEMENS MEYER, SIMON A A KOHL, ANDREW J BALLARD,             
+REMARK   1  AUTH 5 ANDREW COWIE, BERNARDINO ROMERA-PAREDES, STANISLAV NIKOLOV,  
+REMARK   1  AUTH 6 RISHUB JAIN, JONAS ADLER, TREVOR BACK, STIG PETERSEN,        
+REMARK   1  AUTH 7 DAVID REIMAN, ELLEN CLANCY, MICHAL ZIELINSKI,                
+REMARK   1  AUTH 8 MARTIN STEINEGGER, MICHALINA PACHOLSKA, TAMAS BERGHAMMER,    
+REMARK   1  AUTH 9 DAVID SILVER, ORIOL VINYALS, ANDREW W SENIOR,                
+REMARK   1  AUTH10 KORAY KAVUKCUOGLU, PUSHMEET KOHLI, DEMIS HASSABIS            
+REMARK   1  TITL   HIGHLY ACCURATE PROTEIN STRUCTURE PREDICTION WITH ALPHAFOLD  
+REMARK   1  REF    NATURE                        V. 596   583 2021              
+REMARK   1  REFN                   ISSN 0028-0836                               
+REMARK   1  PMID   34265844                                                     
+REMARK   1  DOI    10.1038/s41586-021-03819-2                                   
+REMARK   1                                                                      
+REMARK   1 DISCLAIMERS                                                          
+REMARK   1 ALPHAFOLD DATA, COPYRIGHT (2021) DEEPMIND TECHNOLOGIES LIMITED. THE  
+REMARK   1 INFORMATION PROVIDED IS THEORETICAL MODELLING ONLY AND CAUTION SHOULD
+REMARK   1 BE EXERCISED IN ITS USE. IT IS PROVIDED "AS-IS" WITHOUT ANY WARRANTY 
+REMARK   1 OF ANY KIND, WHETHER EXPRESSED OR IMPLIED. NO WARRANTY IS GIVEN THAT 
+REMARK   1 USE OF THE INFORMATION SHALL NOT INFRINGE THE RIGHTS OF ANY THIRD    
+REMARK   1 PARTY. THE INFORMATION IS NOT INTENDED TO BE A SUBSTITUTE FOR        
+REMARK   1 PROFESSIONAL MEDICAL ADVICE, DIAGNOSIS, OR TREATMENT, AND DOES NOT   
+REMARK   1 CONSTITUTE MEDICAL OR OTHER PROFESSIONAL ADVICE. IT IS AVAILABLE FOR 
+REMARK   1 ACADEMIC AND COMMERCIAL PURPOSES, UNDER CC-BY 4.0 LICENCE.           
+DBREF  XXXX A    1    17  UNP    A0A1I9LPI9 A0A1I9LPI9_ARATH     1     17       
+SEQRES   1 A   17  MET PHE ARG PHE LEU ASP TRP ILE PHE THR VAL ALA THR          
+SEQRES   2 A   17  THR SER LEU ASP                                              
+CRYST1    1.000    1.000    1.000  90.00  90.00  90.00 P 1           1          
+ORIGX1      1.000000  0.000000  0.000000        0.00000                         
+ORIGX2      0.000000  1.000000  0.000000        0.00000                         
+ORIGX3      0.000000  0.000000  1.000000        0.00000                         
+SCALE1      1.000000  0.000000  0.000000        0.00000                         
+SCALE2      0.000000  1.000000  0.000000        0.00000                         
+SCALE3      0.000000  0.000000  1.000000        0.00000                         
+MODEL        1                                                                  
+ATOM      1  N   MET A   1      -7.410  -1.552   9.979  1.00 71.91           N  
+ATOM      2  CA  MET A   1      -6.641  -0.817   8.946  1.00 71.91           C  
+ATOM      3  C   MET A   1      -5.303  -1.485   8.564  1.00 71.91           C  
+ATOM      4  CB  MET A   1      -6.474   0.646   9.390  1.00 71.91           C  
+ATOM      5  O   MET A   1      -4.645  -0.990   7.666  1.00 71.91           O  
+ATOM      6  CG  MET A   1      -6.092   1.613   8.265  1.00 71.91           C  
+ATOM      7  SD  MET A   1      -6.114   3.332   8.818  1.00 71.91           S  
+ATOM      8  CE  MET A   1      -5.405   4.141   7.358  1.00 71.91           C  
+ATOM      9  N   PHE A   2      -4.916  -2.639   9.137  1.00 82.96           N  
+ATOM     10  CA  PHE A   2      -3.656  -3.336   8.797  1.00 82.96           C  
+ATOM     11  C   PHE A   2      -3.544  -3.808   7.338  1.00 82.96           C  
+ATOM     12  CB  PHE A   2      -3.477  -4.529   9.748  1.00 82.96           C  
+ATOM     13  O   PHE A   2      -2.492  -3.654   6.732  1.00 82.96           O  
+ATOM     14  CG  PHE A   2      -2.837  -4.145  11.064  1.00 82.96           C  
+ATOM     15  CD1 PHE A   2      -1.434  -4.085  11.145  1.00 82.96           C  
+ATOM     16  CD2 PHE A   2      -3.618  -3.837  12.195  1.00 82.96           C  
+ATOM     17  CE1 PHE A   2      -0.812  -3.714  12.349  1.00 82.96           C  
+ATOM     18  CE2 PHE A   2      -2.994  -3.457  13.397  1.00 82.96           C  
+ATOM     19  CZ  PHE A   2      -1.591  -3.397  13.473  1.00 82.96           C  
+ATOM     20  N   ARG A   3      -4.644  -4.300   6.744  1.00 88.23           N  
+ATOM     21  CA  ARG A   3      -4.655  -4.785   5.350  1.00 88.23           C  
+ATOM     22  C   ARG A   3      -4.257  -3.729   4.317  1.00 88.23           C  
+ATOM     23  CB  ARG A   3      -6.038  -5.349   4.983  1.00 88.23           C  
+ATOM     24  O   ARG A   3      -3.766  -4.087   3.258  1.00 88.23           O  
+ATOM     25  CG  ARG A   3      -6.239  -6.783   5.494  1.00 88.23           C  
+ATOM     26  CD  ARG A   3      -7.610  -7.350   5.094  1.00 88.23           C  
+ATOM     27  NE  ARG A   3      -7.758  -7.481   3.629  1.00 88.23           N  
+ATOM     28  NH1 ARG A   3      -9.874  -8.376   3.605  1.00 88.23           N  
+ATOM     29  NH2 ARG A   3      -8.815  -7.999   1.679  1.00 88.23           N  
+ATOM     30  CZ  ARG A   3      -8.811  -7.948   2.980  1.00 88.23           C  
+ATOM     31  N   PHE A   4      -4.500  -2.449   4.602  1.00 95.39           N  
+ATOM     32  CA  PHE A   4      -4.141  -1.385   3.667  1.00 95.39           C  
+ATOM     33  C   PHE A   4      -2.630  -1.112   3.666  1.00 95.39           C  
+ATOM     34  CB  PHE A   4      -4.950  -0.122   3.981  1.00 95.39           C  
+ATOM     35  O   PHE A   4      -2.042  -0.961   2.601  1.00 95.39           O  
+ATOM     36  CG  PHE A   4      -4.637   1.004   3.018  1.00 95.39           C  
+ATOM     37  CD1 PHE A   4      -3.744   2.026   3.392  1.00 95.39           C  
+ATOM     38  CD2 PHE A   4      -5.173   0.983   1.716  1.00 95.39           C  
+ATOM     39  CE1 PHE A   4      -3.393   3.025   2.468  1.00 95.39           C  
+ATOM     40  CE2 PHE A   4      -4.820   1.983   0.794  1.00 95.39           C  
+ATOM     41  CZ  PHE A   4      -3.931   3.004   1.170  1.00 95.39           C  
+ATOM     42  N   LEU A   5      -1.999  -1.093   4.848  1.00 95.07           N  
+ATOM     43  CA  LEU A   5      -0.549  -0.898   4.972  1.00 95.07           C  
+ATOM     44  C   LEU A   5       0.230  -2.068   4.366  1.00 95.07           C  
+ATOM     45  CB  LEU A   5      -0.169  -0.721   6.454  1.00 95.07           C  
+ATOM     46  O   LEU A   5       1.200  -1.845   3.652  1.00 95.07           O  
+ATOM     47  CG  LEU A   5      -0.676   0.581   7.097  1.00 95.07           C  
+ATOM     48  CD1 LEU A   5      -0.344   0.575   8.590  1.00 95.07           C  
+ATOM     49  CD2 LEU A   5      -0.045   1.824   6.468  1.00 95.07           C  
+ATOM     50  N   ASP A   6      -0.243  -3.291   4.604  1.00 94.98           N  
+ATOM     51  CA  ASP A   6       0.310  -4.514   4.017  1.00 94.98           C  
+ATOM     52  C   ASP A   6       0.268  -4.491   2.476  1.00 94.98           C  
+ATOM     53  CB  ASP A   6      -0.502  -5.675   4.596  1.00 94.98           C  
+ATOM     54  O   ASP A   6       1.259  -4.793   1.812  1.00 94.98           O  
+ATOM     55  CG  ASP A   6       0.008  -7.032   4.128  1.00 94.98           C  
+ATOM     56  OD1 ASP A   6      -0.791  -7.719   3.454  1.00 94.98           O  
+ATOM     57  OD2 ASP A   6       1.140  -7.372   4.527  1.00 94.98           O  
+ATOM     58  N   TRP A   7      -0.843  -4.021   1.895  1.00 94.94           N  
+ATOM     59  CA  TRP A   7      -0.972  -3.855   0.445  1.00 94.94           C  
+ATOM     60  C   TRP A   7       0.004  -2.814  -0.129  1.00 94.94           C  
+ATOM     61  CB  TRP A   7      -2.424  -3.518   0.091  1.00 94.94           C  
+ATOM     62  O   TRP A   7       0.686  -3.117  -1.103  1.00 94.94           O  
+ATOM     63  CG  TRP A   7      -2.650  -3.240  -1.363  1.00 94.94           C  
+ATOM     64  CD1 TRP A   7      -2.691  -4.169  -2.344  1.00 94.94           C  
+ATOM     65  CD2 TRP A   7      -2.756  -1.947  -2.035  1.00 94.94           C  
+ATOM     66  CE2 TRP A   7      -2.861  -2.176  -3.439  1.00 94.94           C  
+ATOM     67  CE3 TRP A   7      -2.755  -0.604  -1.602  1.00 94.94           C  
+ATOM     68  NE1 TRP A   7      -2.829  -3.547  -3.569  1.00 94.94           N  
+ATOM     69  CH2 TRP A   7      -2.931   0.196  -3.906  1.00 94.94           C  
+ATOM     70  CZ2 TRP A   7      -2.944  -1.130  -4.369  1.00 94.94           C  
+ATOM     71  CZ3 TRP A   7      -2.849   0.455  -2.526  1.00 94.94           C  
+ATOM     72  N   ILE A   8       0.122  -1.619   0.473  1.00 95.58           N  
+ATOM     73  CA  ILE A   8       1.094  -0.599   0.022  1.00 95.58           C  
+ATOM     74  C   ILE A   8       2.526  -1.128   0.124  1.00 95.58           C  
+ATOM     75  CB  ILE A   8       0.955   0.719   0.824  1.00 95.58           C  
+ATOM     76  O   ILE A   8       3.316  -0.913  -0.792  1.00 95.58           O  
+ATOM     77  CG1 ILE A   8      -0.332   1.498   0.479  1.00 95.58           C  
+ATOM     78  CG2 ILE A   8       2.174   1.654   0.660  1.00 95.58           C  
+ATOM     79  CD1 ILE A   8      -0.352   2.171  -0.903  1.00 95.58           C  
+ATOM     80  N   PHE A   9       2.857  -1.824   1.215  1.00 95.75           N  
+ATOM     81  CA  PHE A   9       4.181  -2.409   1.400  1.00 95.75           C  
+ATOM     82  C   PHE A   9       4.484  -3.426   0.296  1.00 95.75           C  
+ATOM     83  CB  PHE A   9       4.269  -3.029   2.799  1.00 95.75           C  
+ATOM     84  O   PHE A   9       5.521  -3.325  -0.350  1.00 95.75           O  
+ATOM     85  CG  PHE A   9       5.685  -3.372   3.208  1.00 95.75           C  
+ATOM     86  CD1 PHE A   9       6.205  -4.658   2.976  1.00 95.75           C  
+ATOM     87  CD2 PHE A   9       6.494  -2.385   3.801  1.00 95.75           C  
+ATOM     88  CE1 PHE A   9       7.529  -4.956   3.343  1.00 95.75           C  
+ATOM     89  CE2 PHE A   9       7.817  -2.684   4.169  1.00 95.75           C  
+ATOM     90  CZ  PHE A   9       8.334  -3.970   3.939  1.00 95.75           C  
+ATOM     91  N   THR A  10       3.528  -4.316   0.010  1.00 95.85           N  
+ATOM     92  CA  THR A  10       3.622  -5.299  -1.079  1.00 95.85           C  
+ATOM     93  C   THR A  10       3.835  -4.624  -2.434  1.00 95.85           C  
+ATOM     94  CB  THR A  10       2.357  -6.170  -1.140  1.00 95.85           C  
+ATOM     95  O   THR A  10       4.733  -5.013  -3.175  1.00 95.85           O  
+ATOM     96  CG2 THR A  10       2.445  -7.255  -2.212  1.00 95.85           C  
+ATOM     97  OG1 THR A  10       2.146  -6.838   0.082  1.00 95.85           O  
+ATOM     98  N   VAL A  11       3.047  -3.592  -2.759  1.00 96.10           N  
+ATOM     99  CA  VAL A  11       3.177  -2.859  -4.029  1.00 96.10           C  
+ATOM    100  C   VAL A  11       4.548  -2.188  -4.140  1.00 96.10           C  
+ATOM    101  CB  VAL A  11       2.034  -1.841  -4.204  1.00 96.10           C  
+ATOM    102  O   VAL A  11       5.202  -2.317  -5.174  1.00 96.10           O  
+ATOM    103  CG1 VAL A  11       2.239  -0.928  -5.422  1.00 96.10           C  
+ATOM    104  CG2 VAL A  11       0.692  -2.558  -4.412  1.00 96.10           C  
+ATOM    105  N   ALA A  12       5.008  -1.520  -3.077  1.00 95.59           N  
+ATOM    106  CA  ALA A  12       6.301  -0.839  -3.056  1.00 95.59           C  
+ATOM    107  C   ALA A  12       7.470  -1.817  -3.255  1.00 95.59           C  
+ATOM    108  CB  ALA A  12       6.430  -0.061  -1.741  1.00 95.59           C  
+ATOM    109  O   ALA A  12       8.385  -1.514  -4.019  1.00 95.59           O  
+ATOM    110  N   THR A  13       7.419  -3.001  -2.635  1.00 94.11           N  
+ATOM    111  CA  THR A  13       8.445  -4.039  -2.818  1.00 94.11           C  
+ATOM    112  C   THR A  13       8.398  -4.653  -4.216  1.00 94.11           C  
+ATOM    113  CB  THR A  13       8.369  -5.131  -1.738  1.00 94.11           C  
+ATOM    114  O   THR A  13       9.435  -4.745  -4.854  1.00 94.11           O  
+ATOM    115  CG2 THR A  13       8.715  -4.573  -0.355  1.00 94.11           C  
+ATOM    116  OG1 THR A  13       7.079  -5.693  -1.633  1.00 94.11           O  
+ATOM    117  N   THR A  14       7.213  -4.968  -4.760  1.00 93.90           N  
+ATOM    118  CA  THR A  14       7.092  -5.543  -6.121  1.00 93.90           C  
+ATOM    119  C   THR A  14       7.535  -4.612  -7.245  1.00 93.90           C  
+ATOM    120  CB  THR A  14       5.651  -5.960  -6.455  1.00 93.90           C  
+ATOM    121  O   THR A  14       7.745  -5.072  -8.357  1.00 93.90           O  
+ATOM    122  CG2 THR A  14       5.207  -7.198  -5.681  1.00 93.90           C  
+ATOM    123  OG1 THR A  14       4.714  -4.932  -6.199  1.00 93.90           O  
+ATOM    124  N   SER A  15       7.585  -3.303  -6.989  1.00 91.22           N  
+ATOM    125  CA  SER A  15       8.091  -2.321  -7.953  1.00 91.22           C  
+ATOM    126  C   SER A  15       9.588  -2.045  -7.815  1.00 91.22           C  
+ATOM    127  CB  SER A  15       7.310  -1.014  -7.807  1.00 91.22           C  
+ATOM    128  O   SER A  15      10.166  -1.402  -8.689  1.00 91.22           O  
+ATOM    129  OG  SER A  15       7.463  -0.403  -6.533  1.00 91.22           O  
+ATOM    130  N   LEU A  16      10.169  -2.425  -6.673  1.00 85.38           N  
+ATOM    131  CA  LEU A  16      11.572  -2.198  -6.342  1.00 85.38           C  
+ATOM    132  C   LEU A  16      12.447  -3.386  -6.770  1.00 85.38           C  
+ATOM    133  CB  LEU A  16      11.669  -1.927  -4.830  1.00 85.38           C  
+ATOM    134  O   LEU A  16      13.583  -3.156  -7.181  1.00 85.38           O  
+ATOM    135  CG  LEU A  16      13.080  -1.545  -4.347  1.00 85.38           C  
+ATOM    136  CD1 LEU A  16      13.490  -0.148  -4.827  1.00 85.38           C  
+ATOM    137  CD2 LEU A  16      13.114  -1.545  -2.817  1.00 85.38           C  
+ATOM    138  N   ASP A  17      11.914  -4.607  -6.661  1.00 73.64           N  
+ATOM    139  CA  ASP A  17      12.422  -5.816  -7.331  1.00 73.64           C  
+ATOM    140  C   ASP A  17      12.082  -5.810  -8.833  1.00 73.64           C  
+ATOM    141  CB  ASP A  17      11.841  -7.073  -6.638  1.00 73.64           C  
+ATOM    142  O   ASP A  17      12.961  -6.201  -9.638  1.00 73.64           O  
+ATOM    143  CG  ASP A  17      12.463  -7.412  -5.269  1.00 73.64           C  
+ATOM    144  OD1 ASP A  17      13.655  -7.806  -5.237  1.00 73.64           O  
+ATOM    145  OD2 ASP A  17      11.729  -7.366  -4.249  1.00 73.64           O  
+ATOM    146  OXT ASP A  17      10.937  -5.420  -9.159  1.00 73.64           O  
+TER     147      ASP A  17                                                      
+ENDMDL                                                                          
+END                                                                             
\ No newline at end of file
diff --git a/tests/data/AT8G88888_complex_6325_Ethylene/AT8G88888_complex_6325_Ethylene0001.pdb b/tests/data/AT8G88888_complex_6325_Ethylene/AT8G88888_complex_6325_Ethylene0001.pdb
new file mode 100644
index 0000000..9b5cc72
--- /dev/null
+++ b/tests/data/AT8G88888_complex_6325_Ethylene/AT8G88888_complex_6325_Ethylene0001.pdb
@@ -0,0 +1,205 @@
+REMARK    File generated by Hex 8.0.0 on Mon Feb 19 14:04:05 2024.
+REMARK    Source: tests/data/AF2_AT8G88888_complex.pdb
+REMARK    Docked receptor coordinates...
+REMARK    Solution 1, from model "AF2_AT8G88888_complex", ID: 0040000b03710053
+REMARK    Energy  -7.055752e+01, RMS  -1.00
+REMARK    Overlap Volume 0.0, Clash Volume 0.0
+REMARK    Box_min: -10.654 -8.723 -9.638
+REMARK    Box_max: 13.655 4.141 13.473
+REMARK    Cube_min: -10.654 -14.446 -10.237
+REMARK    Cube_max: 13.655 9.863 14.072
+REMARK    Symmetry Type: Default
+REMARK    Symmetry Matrix: 0
+ATOM      1  N   MET A   1      -7.410  -1.552   9.979  1.00 71.91
+ATOM      2  CA  MET A   1      -6.641  -0.817   8.946  1.00 71.91
+ATOM      3  C   MET A   1      -5.303  -1.485   8.564  1.00 71.91
+ATOM      4  CB  MET A   1      -6.474   0.646   9.390  1.00 71.91
+ATOM      5  O   MET A   1      -4.645  -0.990   7.666  1.00 71.91
+ATOM      6  CG  MET A   1      -6.092   1.613   8.265  1.00 71.91
+ATOM      7  SD  MET A   1      -6.114   3.332   8.818  1.00 71.91
+ATOM      8  CE  MET A   1      -5.405   4.141   7.358  1.00 71.91
+ATOM      9 1H   MET A   1      -8.294  -1.042  10.181  1.00 99.99
+ATOM     10 2H   MET A   1      -7.633  -2.507   9.631  1.00 99.99
+ATOM     11 3H   MET A   1      -6.843  -1.622  10.848  1.00 99.99
+ATOM     12  N   PHE A   2      -4.916  -2.639   9.137  1.00 82.96
+ATOM     13  CA  PHE A   2      -3.656  -3.336   8.797  1.00 82.96
+ATOM     14  C   PHE A   2      -3.544  -3.808   7.338  1.00 82.96
+ATOM     15  CB  PHE A   2      -3.477  -4.529   9.748  1.00 82.96
+ATOM     16  O   PHE A   2      -2.492  -3.654   6.732  1.00 82.96
+ATOM     17  CG  PHE A   2      -2.837  -4.145  11.064  1.00 82.96
+ATOM     18  CD1 PHE A   2      -1.434  -4.085  11.145  1.00 82.96
+ATOM     19  CD2 PHE A   2      -3.618  -3.837  12.195  1.00 82.96
+ATOM     20  CE1 PHE A   2      -0.812  -3.714  12.349  1.00 82.96
+ATOM     21  CE2 PHE A   2      -2.994  -3.457  13.397  1.00 82.96
+ATOM     22  CZ  PHE A   2      -1.591  -3.397  13.473  1.00 82.96
+ATOM     23  H   PHE A   2      -5.539  -3.014   9.824  1.00 99.99
+ATOM     24  N   ARG A   3      -4.644  -4.300   6.744  1.00 88.23
+ATOM     25  CA  ARG A   3      -4.655  -4.785   5.350  1.00 88.23
+ATOM     26  C   ARG A   3      -4.257  -3.729   4.317  1.00 88.23
+ATOM     27  CB  ARG A   3      -6.038  -5.349   4.983  1.00 88.23
+ATOM     28  O   ARG A   3      -3.766  -4.087   3.258  1.00 88.23
+ATOM     29  CG  ARG A   3      -6.239  -6.783   5.494  1.00 88.23
+ATOM     30  CD  ARG A   3      -7.610  -7.350   5.094  1.00 88.23
+ATOM     31  NE  ARG A   3      -7.758  -7.481   3.629  1.00 88.23
+ATOM     32  NH1 ARG A   3      -9.874  -8.376   3.605  1.00 88.23
+ATOM     33  NH2 ARG A   3      -8.815  -7.999   1.679  1.00 88.23
+ATOM     34  CZ  ARG A   3      -8.811  -7.948   2.980  1.00 88.23
+ATOM     35  H   ARG A   3      -5.475  -4.324   7.301  1.00 99.99
+ATOM     36  HE  ARG A   3      -6.985  -7.188   3.071  1.00 99.99
+ATOM     37 1HH1 ARG A   3     -10.654  -8.723   3.085  1.00 99.99
+ATOM     38 2HH1 ARG A   3      -9.908  -8.357   4.604  1.00 99.99
+ATOM     39 1HH2 ARG A   3      -9.615  -8.353   1.195  1.00 99.99
+ATOM     40 2HH2 ARG A   3      -8.018  -7.684   1.164  1.00 99.99
+ATOM     41  N   PHE A   4      -4.500  -2.449   4.602  1.00 95.39
+ATOM     42  CA  PHE A   4      -4.141  -1.385   3.667  1.00 95.39
+ATOM     43  C   PHE A   4      -2.630  -1.112   3.666  1.00 95.39
+ATOM     44  CB  PHE A   4      -4.950  -0.122   3.981  1.00 95.39
+ATOM     45  O   PHE A   4      -2.042  -0.961   2.601  1.00 95.39
+ATOM     46  CG  PHE A   4      -4.637   1.004   3.018  1.00 95.39
+ATOM     47  CD1 PHE A   4      -3.744   2.026   3.392  1.00 95.39
+ATOM     48  CD2 PHE A   4      -5.173   0.983   1.716  1.00 95.39
+ATOM     49  CE1 PHE A   4      -3.393   3.025   2.468  1.00 95.39
+ATOM     50  CE2 PHE A   4      -4.820   1.983   0.794  1.00 95.39
+ATOM     51  CZ  PHE A   4      -3.931   3.004   1.170  1.00 95.39
+ATOM     52  H   PHE A   4      -4.937  -2.236   5.477  1.00 99.99
+ATOM     53  N   LEU A   5      -1.999  -1.093   4.848  1.00 95.07
+ATOM     54  CA  LEU A   5      -0.549  -0.898   4.972  1.00 95.07
+ATOM     55  C   LEU A   5       0.230  -2.068   4.366  1.00 95.07
+ATOM     56  CB  LEU A   5      -0.169  -0.721   6.454  1.00 95.07
+ATOM     57  O   LEU A   5       1.200  -1.845   3.652  1.00 95.07
+ATOM     58  CG  LEU A   5      -0.676   0.581   7.097  1.00 95.07
+ATOM     59  CD1 LEU A   5      -0.344   0.575   8.590  1.00 95.07
+ATOM     60  CD2 LEU A   5      -0.045   1.824   6.468  1.00 95.07
+ATOM     61  H   LEU A   5      -2.570  -1.220   5.660  1.00 99.99
+ATOM     62  N   ASP A   6      -0.243  -3.291   4.604  1.00 94.98
+ATOM     63  CA  ASP A   6       0.310  -4.514   4.017  1.00 94.98
+ATOM     64  C   ASP A   6       0.268  -4.491   2.476  1.00 94.98
+ATOM     65  CB  ASP A   6      -0.502  -5.675   4.596  1.00 94.98
+ATOM     66  O   ASP A   6       1.259  -4.793   1.812  1.00 94.98
+ATOM     67  CG  ASP A   6       0.008  -7.032   4.128  1.00 94.98
+ATOM     68  OD1 ASP A   6      -0.791  -7.719   3.454  1.00 94.98
+ATOM     69  OD2 ASP A   6       1.140  -7.372   4.527  1.00 94.98
+ATOM     70  H   ASP A   6      -1.025  -3.331   5.228  1.00 99.99
+ATOM     71  N   TRP A   7      -0.843  -4.021   1.895  1.00 94.94
+ATOM     72  CA  TRP A   7      -0.972  -3.855   0.445  1.00 94.94
+ATOM     73  C   TRP A   7       0.004  -2.814  -0.129  1.00 94.94
+ATOM     74  CB  TRP A   7      -2.424  -3.518   0.091  1.00 94.94
+ATOM     75  O   TRP A   7       0.686  -3.117  -1.103  1.00 94.94
+ATOM     76  CG  TRP A   7      -2.650  -3.240  -1.363  1.00 94.94
+ATOM     77  CD1 TRP A   7      -2.691  -4.169  -2.344  1.00 94.94
+ATOM     78  CD2 TRP A   7      -2.756  -1.947  -2.035  1.00 94.94
+ATOM     79  CE2 TRP A   7      -2.861  -2.176  -3.439  1.00 94.94
+ATOM     80  CE3 TRP A   7      -2.755  -0.604  -1.602  1.00 94.94
+ATOM     81  NE1 TRP A   7      -2.829  -3.547  -3.569  1.00 94.94
+ATOM     82  CH2 TRP A   7      -2.931   0.196  -3.906  1.00 94.94
+ATOM     83  CZ2 TRP A   7      -2.944  -1.130  -4.369  1.00 94.94
+ATOM     84  CZ3 TRP A   7      -2.849   0.455  -2.526  1.00 94.94
+ATOM     85  H   TRP A   7      -1.598  -3.783   2.507  1.00 99.99
+ATOM     86  HE1 TRP A   7      -2.899  -3.980  -4.446  1.00 99.99
+ATOM     87  N   ILE A   8       0.122  -1.619   0.473  1.00 95.58
+ATOM     88  CA  ILE A   8       1.094  -0.599   0.022  1.00 95.58
+ATOM     89  C   ILE A   8       2.526  -1.128   0.124  1.00 95.58
+ATOM     90  CB  ILE A   8       0.955   0.719   0.824  1.00 95.58
+ATOM     91  O   ILE A   8       3.316  -0.913  -0.792  1.00 95.58
+ATOM     92  CG1 ILE A   8      -0.332   1.498   0.479  1.00 95.58
+ATOM     93  CG2 ILE A   8       2.174   1.654   0.660  1.00 95.58
+ATOM     94  CD1 ILE A   8      -0.352   2.171  -0.903  1.00 95.58
+ATOM     95  H   ILE A   8      -0.482  -1.442   1.251  1.00 99.99
+ATOM     96  N   PHE A   9       2.857  -1.824   1.215  1.00 95.75
+ATOM     97  CA  PHE A   9       4.181  -2.409   1.400  1.00 95.75
+ATOM     98  C   PHE A   9       4.484  -3.426   0.296  1.00 95.75
+ATOM     99  CB  PHE A   9       4.269  -3.029   2.799  1.00 95.75
+ATOM    100  O   PHE A   9       5.521  -3.325  -0.350  1.00 95.75
+ATOM    101  CG  PHE A   9       5.685  -3.372   3.208  1.00 95.75
+ATOM    102  CD1 PHE A   9       6.205  -4.658   2.976  1.00 95.75
+ATOM    103  CD2 PHE A   9       6.494  -2.385   3.801  1.00 95.75
+ATOM    104  CE1 PHE A   9       7.529  -4.956   3.343  1.00 95.75
+ATOM    105  CE2 PHE A   9       7.817  -2.684   4.169  1.00 95.75
+ATOM    106  CZ  PHE A   9       8.334  -3.970   3.939  1.00 95.75
+ATOM    107  H   PHE A   9       2.148  -1.930   1.913  1.00 99.99
+ATOM    108  N   THR A  10       3.528  -4.316   0.010  1.00 95.85
+ATOM    109  CA  THR A  10       3.622  -5.299  -1.079  1.00 95.85
+ATOM    110  C   THR A  10       3.835  -4.624  -2.434  1.00 95.85
+ATOM    111  CB  THR A  10       2.357  -6.170  -1.140  1.00 95.85
+ATOM    112  O   THR A  10       4.733  -5.013  -3.175  1.00 95.85
+ATOM    113  CG2 THR A  10       2.445  -7.255  -2.212  1.00 95.85
+ATOM    114  OG1 THR A  10       2.146  -6.838   0.082  1.00 95.85
+ATOM    115  H   THR A  10       2.717  -4.277   0.596  1.00 99.99
+ATOM    116  HG1 THR A  10       1.357  -7.353  -0.022  1.00 99.99
+ATOM    117  N   VAL A  11       3.047  -3.592  -2.759  1.00 96.10
+ATOM    118  CA  VAL A  11       3.177  -2.859  -4.029  1.00 96.10
+ATOM    119  C   VAL A  11       4.548  -2.188  -4.140  1.00 96.10
+ATOM    120  CB  VAL A  11       2.034  -1.841  -4.204  1.00 96.10
+ATOM    121  O   VAL A  11       5.202  -2.317  -5.174  1.00 96.10
+ATOM    122  CG1 VAL A  11       2.239  -0.928  -5.422  1.00 96.10
+ATOM    123  CG2 VAL A  11       0.692  -2.558  -4.412  1.00 96.10
+ATOM    124  H   VAL A  11       2.351  -3.334  -2.088  1.00 99.99
+ATOM    125  N   ALA A  12       5.008  -1.520  -3.077  1.00 95.59
+ATOM    126  CA  ALA A  12       6.301  -0.839  -3.056  1.00 95.59
+ATOM    127  C   ALA A  12       7.470  -1.817  -3.255  1.00 95.59
+ATOM    128  CB  ALA A  12       6.430  -0.061  -1.741  1.00 95.59
+ATOM    129  O   ALA A  12       8.385  -1.514  -4.019  1.00 95.59
+ATOM    130  H   ALA A  12       4.410  -1.508  -2.275  1.00 99.99
+ATOM    131  N   THR A  13       7.419  -3.001  -2.635  1.00 94.11
+ATOM    132  CA  THR A  13       8.445  -4.039  -2.818  1.00 94.11
+ATOM    133  C   THR A  13       8.398  -4.653  -4.216  1.00 94.11
+ATOM    134  CB  THR A  13       8.369  -5.131  -1.738  1.00 94.11
+ATOM    135  O   THR A  13       9.435  -4.745  -4.854  1.00 94.11
+ATOM    136  CG2 THR A  13       8.715  -4.573  -0.355  1.00 94.11
+ATOM    137  OG1 THR A  13       7.079  -5.693  -1.633  1.00 94.11
+ATOM    138  H   THR A  13       6.638  -3.155  -2.029  1.00 99.99
+ATOM    139  HG1 THR A  13       7.123  -6.350  -0.951  1.00 99.99
+ATOM    140  N   THR A  14       7.213  -4.968  -4.760  1.00 93.90
+ATOM    141  CA  THR A  14       7.092  -5.543  -6.121  1.00 93.90
+ATOM    142  C   THR A  14       7.535  -4.612  -7.245  1.00 93.90
+ATOM    143  CB  THR A  14       5.651  -5.960  -6.455  1.00 93.90
+ATOM    144  O   THR A  14       7.745  -5.072  -8.357  1.00 93.90
+ATOM    145  CG2 THR A  14       5.207  -7.198  -5.681  1.00 93.90
+ATOM    146  OG1 THR A  14       4.714  -4.932  -6.199  1.00 93.90
+ATOM    147  H   THR A  14       6.400  -4.797  -4.202  1.00 99.99
+ATOM    148  HG1 THR A  14       3.862  -5.276  -6.432  1.00 99.99
+ATOM    149  N   SER A  15       7.585  -3.303  -6.989  1.00 91.22
+ATOM    150  CA  SER A  15       8.091  -2.321  -7.953  1.00 91.22
+ATOM    151  C   SER A  15       9.588  -2.045  -7.815  1.00 91.22
+ATOM    152  CB  SER A  15       7.310  -1.014  -7.807  1.00 91.22
+ATOM    153  O   SER A  15      10.166  -1.402  -8.689  1.00 91.22
+ATOM    154  OG  SER A  15       7.463  -0.403  -6.533  1.00 91.22
+ATOM    155  H   SER A  15       7.254  -3.014  -6.090  1.00 99.99
+ATOM    156  HG  SER A  15       6.945   0.391  -6.547  1.00 99.99
+ATOM    157  N   LEU A  16      10.169  -2.425  -6.673  1.00 85.38
+ATOM    158  CA  LEU A  16      11.572  -2.198  -6.342  1.00 85.38
+ATOM    159  C   LEU A  16      12.447  -3.386  -6.770  1.00 85.38
+ATOM    160  H   LEU A  16       9.561  -2.894  -6.031  1.00 99.99
+TER     161      LEU A  16    
+ATOM    162  CB  LEU B  16      11.669  -1.927  -4.830  1.00 85.38
+ATOM    163  O   LEU B  16      13.583  -3.156  -7.181  1.00 85.38
+ATOM    164  CG  LEU B  16      13.080  -1.545  -4.347  1.00 85.38
+ATOM    165  CD1 LEU B  16      13.490  -0.148  -4.827  1.00 85.38
+ATOM    166  CD2 LEU B  16      13.114  -1.545  -2.817  1.00 85.38
+ATOM    167  N   ASP B  17      11.914  -4.607  -6.661  1.00 73.64
+ATOM    168  CA  ASP B  17      12.422  -5.816  -7.331  1.00 73.64
+ATOM    169  C   ASP B  17      12.082  -5.810  -8.833  1.00 73.64
+ATOM    170  CB  ASP B  17      11.841  -7.073  -6.638  1.00 73.64
+ATOM    171  O   ASP B  17      12.961  -6.201  -9.638  1.00 73.64
+ATOM    172  CG  ASP B  17      12.463  -7.412  -5.269  1.00 73.64
+ATOM    173  OD1 ASP B  17      13.655  -7.806  -5.237  1.00 73.64
+ATOM    174  OD2 ASP B  17      11.729  -7.366  -4.249  1.00 73.64
+ATOM    175  OXT ASP B  17      10.937  -5.420  -9.159  1.00 73.64
+TER     176      ASP B  17    
+REMARK    Source: tests/data/6325_Ethylene.sdf
+REMARK    Docked ligand coordinates...
+REMARK    Solution 1, from model "6325_Ethylene", ID: 0040000b03710053
+REMARK    Energy  -7.055752e+01, RMS  -1.00
+REMARK    Overlap Volume 0.0, Clash Volume 0.0
+REMARK    Box_min: -10.654 -8.723 -9.638
+REMARK    Box_max: 13.655 4.141 13.473
+REMARK    Cube_min: -10.654 -14.446 -10.237
+REMARK    Cube_max: 13.655 9.863 14.072
+REMARK    Symmetry Type: Default
+REMARK    Symmetry Matrix: 0
+ATOM    177 C    SDF A   1      -0.275  -0.845  -2.305  1.00 99.99
+ATOM    178 C    SDF A   1       0.981  -0.395  -2.301  1.00 99.99
+ATOM    179 H    SDF A   1      -1.106  -0.166  -2.458  1.00 99.99
+ATOM    180 H    SDF A   1      -0.487  -1.898  -2.156  1.00 99.99
+ATOM    181 H    SDF A   1       1.813  -1.073  -2.148  1.00 99.99
+ATOM    182 H    SDF A   1       1.193   0.658  -2.449  1.00 99.99
diff --git a/tests/data/AT8G88888_complex_6325_Ethylene/hex_output.txt b/tests/data/AT8G88888_complex_6325_Ethylene/hex_output.txt
new file mode 100644
index 0000000..f5d4f1e
--- /dev/null
+++ b/tests/data/AT8G88888_complex_6325_Ethylene/hex_output.txt
@@ -0,0 +1,238 @@
+Hex 8.0.0 starting at Mon Feb 19 14:03:40 2024 on host bar.
+
+Hostname:        bar
+Main memory:     128813 Mb
+CPUs online:     32
+GPUs online:     0
+Compute threads: 24
+
+Sizeof(short):   2
+Sizeof(int):     4
+Sizeof(long):    8
+Sizeof(long int):8
+Sizeof(float):   4
+Sizeof(double):  8
+Sizeof(octa):    8
+Sizeof(int *):   8
+Sizeof(long *):  8
+Sizeof(void *):  8
+
+Default path:    /home/dnguyen/BAR_API_forked/BAR_API
+PATH:            /home/dnguyen/hex/exe:/home/dnguyen/hex/bin:/home/dnguyen/BAR_API_forked/BAR_API/venv/bin:/home/dnguyen/.vscode-server/bin/8b3775030ed1a69b13e4f4c628c612102e30a681/bin/remote-cli:/usr/local/bin:/usr/bin:/bin:/usr/local/games:/usr/games:/ho
+Hex program:     /home/dnguyen/hex/exe/hex8.0.0-nogui.x64
+HEX_ROOT:        /home/dnguyen/hex
+HEX_STRIDE:      stride.x64
+HEX_PDB:         NULL
+HEX_DATA:        NULL
+HEX_MACROS:      NULL
+HEX_COLOURS:     NULL
+HEX_STARTUP:     NULL
+HEX_FONTFILE:    NULL
+HEX_PIPE:        NULL
+HEX_CACHE:       /home/dnguyen/hex_cache
+HEX_LOG:         NULL
+HEX_CPUS:        NULL
+HEX_GPUS:        NULL
+HEX_FIRST_GPU:   NULL
+HEX_GTO_SCALE:   NULL
+HEX_ETO_SCALE:   NULL
+
+CUDA enabled:    No
+
+
+
+Running HEX_STARTUP file: /home/dnguyen/hex/data/startup_v5.mac
+Disc Cache enabled. Using directory: /home/dnguyen/hex_cache
+
+
+Reading commands from stdin ...
+>>  open_receptor  tests/data/AF2_AT8G88888_complex.pdb
+Assuming tests/data/AF2_AT8G88888_complex.pdb is a PDB file...
+
+Opened PDB file: tests/data/AF2_AT8G88888_complex.pdb, ID = AF2_AT8G88888_complex
+*Warning* Can't add all hydrogens to incomplete residue: B 16:LEU
+*Warning* Can't add all hydrogens to incomplete residue: B 17:ASP
+Loaded PDB file: tests/data/AF2_AT8G88888_complex.pdb, (20 residues, 176 atoms, 1 models)
+*Warning* Fractional charge (-0.46) for terminal residue: B 16:LEU
+   LEU:CB   Radius = 1.70, Charge = 0.02
+   LEU:O    Radius = 1.50, Charge = -0.50
+   LEU:CG   Radius = 1.50, Charge = 0.05
+   LEU:CD1  Radius = 1.90, Charge = -0.01
+   LEU:CD2  Radius = 1.90, Charge = -0.01
+*Warning* Fractional charge (-2.27) for terminal residue: B 17:ASP
+   ASP:N    Radius = 1.40, Charge = -0.52
+   ASP:CA   Radius = 1.50, Charge = 0.25
+   ASP:C    Radius = 1.40, Charge = 0.14
+   ASP:CB   Radius = 1.70, Charge = -0.21
+   ASP:O    Radius = 1.50, Charge = -0.57
+   ASP:CG   Radius = 1.40, Charge = 0.62
+   ASP:OD1  Radius = 1.50, Charge = -0.71
+   ASP:OD2  Radius = 1.50, Charge = -0.71
+   ASP:OXT  Radius = 1.50, Charge = -0.57
+Counted 1 +ve and 2 -ve formal charged residues: Net formal charge: -1
+>AF2_AT8G88888_complex A
+MFRFLDWIFTVATTSL
+>AF2_AT8G88888_complex B
+LD
+>>                 open_ligand  tests/data/6325_Ethylene.sdf
+Assuming tests/data/6325_Ethylene.sdf is an SDF file...
+
+Opened SDF file: tests/data/6325_Ethylene.sdf, ID = 6325_Ethylene
+>>                 docking_correlation 1
+Docking with shape+electrostatics.
+>>                 docking_score_threshold 0
+>>                 max_docking_solutions 25
+>>                 docking_receptor_stepsize 5.50
+Receptor step size: 5.50 deg. B=32, M=64, T=1692.
+>>                 docking_ligand_stepsize 5.50
+Ligand step size: 5.50 deg. B=32, M=64, T=1692.
+>>                 docking_alpha_stepsize 2.80
+Twist step size: 2.80 deg. B=64, M=128.
+>>                 docking_main_scan 16
+>>                 receptor_origin C-825:VAL-O
+*Warning* No Match: C-825:VAL-O -> C-825:VAL-O (Chain-ResidueID:ResidueName-AtomName)
+>>                 commit_edits
+>>                 activate_docking
+
+Contouring surface for molecule AF2_AT8G88888_complex.
+Polar probe = 1.40A, Apolar probe = 1.40A
+Gaussian sampling over 146 atoms done in 0.03 seconds.
+Contoured 37680 triangles (18842 vertices) in 0.03 seconds.
+Surface traversal done in 0.01 seconds - Found 1 surface segments.
+Primary surface:   Area = 1893.33, Volume = 5111.39.
+Culled 0 small segments in 0.01 seconds.
+Total contouring time: 0.05 seconds.
+
+
+Contouring surface for molecule 6325_Ethylene.
+Polar probe = 1.40A, Apolar probe = 1.40A
+Gaussian sampling over 2 atoms done in 0.00 seconds.
+Contoured 3240 triangles (1622 vertices) in 0.00 seconds.
+Surface traversal done in 0.00 seconds - Found 1 surface segments.
+Primary surface:   Area = 164.05, Volume = 195.88.
+Culled 0 small segments in 0.00 seconds.
+Total contouring time: 0.00 seconds.
+
+Sampling surface and interior volumes for molecule AF2_AT8G88888_complex.
+Generated 14716 exterior and 5201 interior skin grid cells.
+Exterior skin volume = 3178.66; interior skin volume = 1123.42.
+Volume sampling done in 0.04 seconds.
+Sampling surface and interior volumes for molecule 6325_Ethylene.
+Generated 966 exterior and 123 interior skin grid cells.
+Exterior skin volume = 208.66; interior skin volume = 26.57.
+Volume sampling done in 0.00 seconds.
+
+Calculating potential to N = 25 (5525 coefficients) using 24 Tasks ...
+Grid: 74x74x74 = 405224 cells (20323 non-zero) of 0.60 Angstroms.
+Done integration over 20323 cells in 0.06s (335995/s).
+
+Calculating electrostatics for molecule AF2_AT8G88888_complex.
+Charge density for molecule AF2_AT8G88888_complex to N = 25:  172 atoms done in 0.00 seconds.
+Potential for molecule AF2_AT8G88888_complex to N = 25 done in 0.01 seconds.
+Calculating electrostatics for molecule 6325_Ethylene.
+Charge density for molecule 6325_Ethylene to N = 25:  0 atoms done in 0.00 seconds.
+Potential for molecule 6325_Ethylene to N = 25 done in 0.01 seconds.
+
+------------------------------------------------------------------------------
+Docking will output a maximum of 25 solutions per pair...
+
+------------------------------------------------------------------------------
+Docking 1 pair of starting orientations...
+
+Docking receptor: AF2_AT8G88888_complex and ligand: 6325_Ethylene...
+
+Receptor AF2_AT8G88888_complex: Tag = AF2_AT8G88888_complex
+Ligand   6325_Ethylene: Tag = 6325_Ethylene
+
+Setting up shape + electrostatics correlation.
+
+Starting SPF search.
+Setting docking_score threshold = 0.0
+Setting 30 distance samples from 0.00 to 23.20, with steps of 0.80.
+
+
+Total 6D space: Iterate[30,1692,1] x FFT[128,32,64] = 13306429440.
+Initial rotational increments (N=16) Receptor: 1692 (39Mb), Ligand: 1692 (39Mb)
+Applying 1692+1692 coefficient rotations on 24 CPUs for N=16.
+Done 3384 rotations in a total of 0.07s (48465/s).
+
+Starting 3D FFT search using 24 CPUs and 0 GPUs with N=16, Nalpha=128/128.
+Estart = 68.70.
+Done 13306429440 orientations in 23.85s (558001702/s).
+Found 51655052/13306429440 within score threshold = 0.0 NOT including start guess.
+
+Time spent culling 42*1600000 solutions = 8.56s.
+Starting guess not found in top 1255052 solutions.
+Emin = -67.49, Emax = -0.00
+
+Re-sampling top 40000 orientations -> top 28050 retained.
+Surviving rotational steps (N=25) Receptor: 48 (9Mb), Ligand: 1441 (243Mb)
+Applying 48+1441 coefficient rotations on 24 CPUs for N=25.
+Done 1489 rotations in a total of 0.12s (12658/s).
+
+Starting 1D FFT refinement using 24 CPUs and 0 GPUs with N=25, Nalpha=128/128.
+Estart = 59.64.
+Done 3590400 orientations in 0.24s (14979911/s).
+Found 70715/3590400 within score threshold = 0.0 NOT including start guess.
+
+
+Solution buffer reached 70715/200000 = 35.4% occupancy with no culling.
+Starting guess not found in top 70715 solutions.
+Emin = -70.56, Emax = -7.51
+
+Docking correlation summary by RMS deviation and steric clashes
+-------------------------------------------------------------------------
+  Soln   Etotal    Eshape    Eforce    Eair              RMS        Bumps
+  ----  --------- --------- --------- ---------   ----------------  -----
+
+
+Docked structures AF2_AT8G88888_complex:6325_Ethylene in a total of 0 min, 25 sec.
+
+
+------------------------------------------------------------------------------
+Saving top 25 orientations.
+
+Docking done in a total of 0 min, 25 sec.
+
+
+------------------------------------------------------------------------------
+
+No AIRs enabled or defined. Skipping restraint checks.
+Clustering found 1 clusters from 25 docking solutions in 0.00 seconds.
+
+---- ---- ------- ------- ------- ------- ------- --- -----
+Clst Soln  Models  Etotal  Eshape  Eforce  Eair   Bmp  RMS
+---- ---- ------- ------- ------- ------- ------- --- -----
+   1    1 001:001   -70.6   -70.6     0.0     0.0  -1  -1.00   
+   1    2 001:001   -70.4   -70.4     0.0     0.0  -1  -1.00   
+   1    3 001:001   -70.4   -70.4     0.0     0.0  -1  -1.00   
+   1    4 001:001   -70.4   -70.4     0.0     0.0  -1  -1.00   
+   1    5 001:001   -70.4   -70.4     0.0     0.0  -1  -1.00   
+   1    6 001:001   -70.3   -70.3     0.0     0.0  -1  -1.00   
+   1    7 001:001   -70.2   -70.2     0.0     0.0  -1  -1.00   
+   1    8 001:001   -70.2   -70.2     0.0     0.0  -1  -1.00   
+   1    9 001:001   -70.1   -70.1     0.0     0.0  -1  -1.00   
+   1   10 001:001   -70.1   -70.1     0.0     0.0  -1  -1.00   
+   1   11 001:001   -70.1   -70.1     0.0     0.0  -1  -1.00   
+   1   12 001:001   -70.1   -70.1     0.0     0.0  -1  -1.00   
+   1   13 001:001   -70.0   -70.0     0.0     0.0  -1  -1.00   
+   1   14 001:001   -70.0   -70.0     0.0     0.0  -1  -1.00   
+   1   15 001:001   -70.0   -70.0     0.0     0.0  -1  -1.00   
+   1   16 001:001   -70.0   -70.0     0.0     0.0  -1  -1.00   
+   1   17 001:001   -70.0   -70.0     0.0     0.0  -1  -1.00   
+   1   18 001:001   -70.0   -70.0     0.0     0.0  -1  -1.00   
+   1   19 001:001   -70.0   -70.0     0.0     0.0  -1  -1.00   
+   1   20 001:001   -70.0   -70.0     0.0     0.0  -1  -1.00   
+   1   21 001:001   -69.9   -69.9     0.0     0.0  -1  -1.00   
+   1   22 001:001   -69.9   -69.9     0.0     0.0  -1  -1.00   
+   1   23 001:001   -69.9   -69.9     0.0     0.0  -1  -1.00   
+   1   24 001:001   -69.9   -69.9     0.0     0.0  -1  -1.00   
+   1   25 001:001   -69.9   -69.9     0.0     0.0  -1  -1.00   
+------------------------------------------------------------
+   1    1 001:001   -70.6   -70.6     0.0     0.0  -1  -1.00   
+>>                 save_range 1 100 tests/data/AT8G88888_complex_6325_Ethylene/ AT8G88888_complex_6325_Ethylene pdb
+Saving orientation 1 (solution 1) to tests/data/AT8G88888_complex_6325_Ethylene/AT8G88888_complex_6325_Ethylene0001.pdb
+
+Max application memory used: 384.62 MB virtual + 0.00 KB shared.
+
+Hex stopping: Mon Feb 19 14:04:05 2024
diff --git a/tests/data/AT8G88888_complex_6325_Ethylene0001.pdb b/tests/data/AT8G88888_complex_6325_Ethylene0001.pdb
new file mode 100644
index 0000000..54bc1e8
--- /dev/null
+++ b/tests/data/AT8G88888_complex_6325_Ethylene0001.pdb
@@ -0,0 +1,205 @@
+REMARK    File generated by Hex 8.0.0 on Mon Feb 19 13:27:06 2024.
+REMARK    Source: tests/data/AF2_AT8G88888_complex.pdb
+REMARK    Docked receptor coordinates...
+REMARK    Solution 1, from model "AF2_AT8G88888_complex", ID: 0040000b03710053
+REMARK    Energy  -7.055624e+01, RMS  -1.00
+REMARK    Overlap Volume 0.0, Clash Volume 0.0
+REMARK    Box_min: -10.654 -8.723 -9.638
+REMARK    Box_max: 13.655 4.141 13.473
+REMARK    Cube_min: -10.654 -14.446 -10.237
+REMARK    Cube_max: 13.655 9.863 14.072
+REMARK    Symmetry Type: Default
+REMARK    Symmetry Matrix: 0
+ATOM      1  N   MET A   1      -7.410  -1.552   9.979  1.00 71.91
+ATOM      2  CA  MET A   1      -6.641  -0.817   8.946  1.00 71.91
+ATOM      3  C   MET A   1      -5.303  -1.485   8.564  1.00 71.91
+ATOM      4  CB  MET A   1      -6.474   0.646   9.390  1.00 71.91
+ATOM      5  O   MET A   1      -4.645  -0.990   7.666  1.00 71.91
+ATOM      6  CG  MET A   1      -6.092   1.613   8.265  1.00 71.91
+ATOM      7  SD  MET A   1      -6.114   3.332   8.818  1.00 71.91
+ATOM      8  CE  MET A   1      -5.405   4.141   7.358  1.00 71.91
+ATOM      9 1H   MET A   1      -8.294  -1.042  10.181  1.00 99.99
+ATOM     10 2H   MET A   1      -7.633  -2.507   9.631  1.00 99.99
+ATOM     11 3H   MET A   1      -6.843  -1.622  10.848  1.00 99.99
+ATOM     12  N   PHE A   2      -4.916  -2.639   9.137  1.00 82.96
+ATOM     13  CA  PHE A   2      -3.656  -3.336   8.797  1.00 82.96
+ATOM     14  C   PHE A   2      -3.544  -3.808   7.338  1.00 82.96
+ATOM     15  CB  PHE A   2      -3.477  -4.529   9.748  1.00 82.96
+ATOM     16  O   PHE A   2      -2.492  -3.654   6.732  1.00 82.96
+ATOM     17  CG  PHE A   2      -2.837  -4.145  11.064  1.00 82.96
+ATOM     18  CD1 PHE A   2      -1.434  -4.085  11.145  1.00 82.96
+ATOM     19  CD2 PHE A   2      -3.618  -3.837  12.195  1.00 82.96
+ATOM     20  CE1 PHE A   2      -0.812  -3.714  12.349  1.00 82.96
+ATOM     21  CE2 PHE A   2      -2.994  -3.457  13.397  1.00 82.96
+ATOM     22  CZ  PHE A   2      -1.591  -3.397  13.473  1.00 82.96
+ATOM     23  H   PHE A   2      -5.539  -3.014   9.824  1.00 99.99
+ATOM     24  N   ARG A   3      -4.644  -4.300   6.744  1.00 88.23
+ATOM     25  CA  ARG A   3      -4.655  -4.785   5.350  1.00 88.23
+ATOM     26  C   ARG A   3      -4.257  -3.729   4.317  1.00 88.23
+ATOM     27  CB  ARG A   3      -6.038  -5.349   4.983  1.00 88.23
+ATOM     28  O   ARG A   3      -3.766  -4.087   3.258  1.00 88.23
+ATOM     29  CG  ARG A   3      -6.239  -6.783   5.494  1.00 88.23
+ATOM     30  CD  ARG A   3      -7.610  -7.350   5.094  1.00 88.23
+ATOM     31  NE  ARG A   3      -7.758  -7.481   3.629  1.00 88.23
+ATOM     32  NH1 ARG A   3      -9.874  -8.376   3.605  1.00 88.23
+ATOM     33  NH2 ARG A   3      -8.815  -7.999   1.679  1.00 88.23
+ATOM     34  CZ  ARG A   3      -8.811  -7.948   2.980  1.00 88.23
+ATOM     35  H   ARG A   3      -5.475  -4.324   7.301  1.00 99.99
+ATOM     36  HE  ARG A   3      -6.985  -7.188   3.071  1.00 99.99
+ATOM     37 1HH1 ARG A   3     -10.654  -8.723   3.085  1.00 99.99
+ATOM     38 2HH1 ARG A   3      -9.908  -8.357   4.604  1.00 99.99
+ATOM     39 1HH2 ARG A   3      -9.615  -8.353   1.195  1.00 99.99
+ATOM     40 2HH2 ARG A   3      -8.018  -7.684   1.164  1.00 99.99
+ATOM     41  N   PHE A   4      -4.500  -2.449   4.602  1.00 95.39
+ATOM     42  CA  PHE A   4      -4.141  -1.385   3.667  1.00 95.39
+ATOM     43  C   PHE A   4      -2.630  -1.112   3.666  1.00 95.39
+ATOM     44  CB  PHE A   4      -4.950  -0.122   3.981  1.00 95.39
+ATOM     45  O   PHE A   4      -2.042  -0.961   2.601  1.00 95.39
+ATOM     46  CG  PHE A   4      -4.637   1.004   3.018  1.00 95.39
+ATOM     47  CD1 PHE A   4      -3.744   2.026   3.392  1.00 95.39
+ATOM     48  CD2 PHE A   4      -5.173   0.983   1.716  1.00 95.39
+ATOM     49  CE1 PHE A   4      -3.393   3.025   2.468  1.00 95.39
+ATOM     50  CE2 PHE A   4      -4.820   1.983   0.794  1.00 95.39
+ATOM     51  CZ  PHE A   4      -3.931   3.004   1.170  1.00 95.39
+ATOM     52  H   PHE A   4      -4.937  -2.236   5.477  1.00 99.99
+ATOM     53  N   LEU A   5      -1.999  -1.093   4.848  1.00 95.07
+ATOM     54  CA  LEU A   5      -0.549  -0.898   4.972  1.00 95.07
+ATOM     55  C   LEU A   5       0.230  -2.068   4.366  1.00 95.07
+ATOM     56  CB  LEU A   5      -0.169  -0.721   6.454  1.00 95.07
+ATOM     57  O   LEU A   5       1.200  -1.845   3.652  1.00 95.07
+ATOM     58  CG  LEU A   5      -0.676   0.581   7.097  1.00 95.07
+ATOM     59  CD1 LEU A   5      -0.344   0.575   8.590  1.00 95.07
+ATOM     60  CD2 LEU A   5      -0.045   1.824   6.468  1.00 95.07
+ATOM     61  H   LEU A   5      -2.570  -1.220   5.660  1.00 99.99
+ATOM     62  N   ASP A   6      -0.243  -3.291   4.604  1.00 94.98
+ATOM     63  CA  ASP A   6       0.310  -4.514   4.017  1.00 94.98
+ATOM     64  C   ASP A   6       0.268  -4.491   2.476  1.00 94.98
+ATOM     65  CB  ASP A   6      -0.502  -5.675   4.596  1.00 94.98
+ATOM     66  O   ASP A   6       1.259  -4.793   1.812  1.00 94.98
+ATOM     67  CG  ASP A   6       0.008  -7.032   4.128  1.00 94.98
+ATOM     68  OD1 ASP A   6      -0.791  -7.719   3.454  1.00 94.98
+ATOM     69  OD2 ASP A   6       1.140  -7.372   4.527  1.00 94.98
+ATOM     70  H   ASP A   6      -1.025  -3.331   5.228  1.00 99.99
+ATOM     71  N   TRP A   7      -0.843  -4.021   1.895  1.00 94.94
+ATOM     72  CA  TRP A   7      -0.972  -3.855   0.445  1.00 94.94
+ATOM     73  C   TRP A   7       0.004  -2.814  -0.129  1.00 94.94
+ATOM     74  CB  TRP A   7      -2.424  -3.518   0.091  1.00 94.94
+ATOM     75  O   TRP A   7       0.686  -3.117  -1.103  1.00 94.94
+ATOM     76  CG  TRP A   7      -2.650  -3.240  -1.363  1.00 94.94
+ATOM     77  CD1 TRP A   7      -2.691  -4.169  -2.344  1.00 94.94
+ATOM     78  CD2 TRP A   7      -2.756  -1.947  -2.035  1.00 94.94
+ATOM     79  CE2 TRP A   7      -2.861  -2.176  -3.439  1.00 94.94
+ATOM     80  CE3 TRP A   7      -2.755  -0.604  -1.602  1.00 94.94
+ATOM     81  NE1 TRP A   7      -2.829  -3.547  -3.569  1.00 94.94
+ATOM     82  CH2 TRP A   7      -2.931   0.196  -3.906  1.00 94.94
+ATOM     83  CZ2 TRP A   7      -2.944  -1.130  -4.369  1.00 94.94
+ATOM     84  CZ3 TRP A   7      -2.849   0.455  -2.526  1.00 94.94
+ATOM     85  H   TRP A   7      -1.598  -3.783   2.507  1.00 99.99
+ATOM     86  HE1 TRP A   7      -2.899  -3.980  -4.446  1.00 99.99
+ATOM     87  N   ILE A   8       0.122  -1.619   0.473  1.00 95.58
+ATOM     88  CA  ILE A   8       1.094  -0.599   0.022  1.00 95.58
+ATOM     89  C   ILE A   8       2.526  -1.128   0.124  1.00 95.58
+ATOM     90  CB  ILE A   8       0.955   0.719   0.824  1.00 95.58
+ATOM     91  O   ILE A   8       3.316  -0.913  -0.792  1.00 95.58
+ATOM     92  CG1 ILE A   8      -0.332   1.498   0.479  1.00 95.58
+ATOM     93  CG2 ILE A   8       2.174   1.654   0.660  1.00 95.58
+ATOM     94  CD1 ILE A   8      -0.352   2.171  -0.903  1.00 95.58
+ATOM     95  H   ILE A   8      -0.482  -1.442   1.251  1.00 99.99
+ATOM     96  N   PHE A   9       2.857  -1.824   1.215  1.00 95.75
+ATOM     97  CA  PHE A   9       4.181  -2.409   1.400  1.00 95.75
+ATOM     98  C   PHE A   9       4.484  -3.426   0.296  1.00 95.75
+ATOM     99  CB  PHE A   9       4.269  -3.029   2.799  1.00 95.75
+ATOM    100  O   PHE A   9       5.521  -3.325  -0.350  1.00 95.75
+ATOM    101  CG  PHE A   9       5.685  -3.372   3.208  1.00 95.75
+ATOM    102  CD1 PHE A   9       6.205  -4.658   2.976  1.00 95.75
+ATOM    103  CD2 PHE A   9       6.494  -2.385   3.801  1.00 95.75
+ATOM    104  CE1 PHE A   9       7.529  -4.956   3.343  1.00 95.75
+ATOM    105  CE2 PHE A   9       7.817  -2.684   4.169  1.00 95.75
+ATOM    106  CZ  PHE A   9       8.334  -3.970   3.939  1.00 95.75
+ATOM    107  H   PHE A   9       2.148  -1.930   1.913  1.00 99.99
+ATOM    108  N   THR A  10       3.528  -4.316   0.010  1.00 95.85
+ATOM    109  CA  THR A  10       3.622  -5.299  -1.079  1.00 95.85
+ATOM    110  C   THR A  10       3.835  -4.624  -2.434  1.00 95.85
+ATOM    111  CB  THR A  10       2.357  -6.170  -1.140  1.00 95.85
+ATOM    112  O   THR A  10       4.733  -5.013  -3.175  1.00 95.85
+ATOM    113  CG2 THR A  10       2.445  -7.255  -2.212  1.00 95.85
+ATOM    114  OG1 THR A  10       2.146  -6.838   0.082  1.00 95.85
+ATOM    115  H   THR A  10       2.717  -4.277   0.596  1.00 99.99
+ATOM    116  HG1 THR A  10       1.357  -7.353  -0.022  1.00 99.99
+ATOM    117  N   VAL A  11       3.047  -3.592  -2.759  1.00 96.10
+ATOM    118  CA  VAL A  11       3.177  -2.859  -4.029  1.00 96.10
+ATOM    119  C   VAL A  11       4.548  -2.188  -4.140  1.00 96.10
+ATOM    120  CB  VAL A  11       2.034  -1.841  -4.204  1.00 96.10
+ATOM    121  O   VAL A  11       5.202  -2.317  -5.174  1.00 96.10
+ATOM    122  CG1 VAL A  11       2.239  -0.928  -5.422  1.00 96.10
+ATOM    123  CG2 VAL A  11       0.692  -2.558  -4.412  1.00 96.10
+ATOM    124  H   VAL A  11       2.351  -3.334  -2.088  1.00 99.99
+ATOM    125  N   ALA A  12       5.008  -1.520  -3.077  1.00 95.59
+ATOM    126  CA  ALA A  12       6.301  -0.839  -3.056  1.00 95.59
+ATOM    127  C   ALA A  12       7.470  -1.817  -3.255  1.00 95.59
+ATOM    128  CB  ALA A  12       6.430  -0.061  -1.741  1.00 95.59
+ATOM    129  O   ALA A  12       8.385  -1.514  -4.019  1.00 95.59
+ATOM    130  H   ALA A  12       4.410  -1.508  -2.275  1.00 99.99
+ATOM    131  N   THR A  13       7.419  -3.001  -2.635  1.00 94.11
+ATOM    132  CA  THR A  13       8.445  -4.039  -2.818  1.00 94.11
+ATOM    133  C   THR A  13       8.398  -4.653  -4.216  1.00 94.11
+ATOM    134  CB  THR A  13       8.369  -5.131  -1.738  1.00 94.11
+ATOM    135  O   THR A  13       9.435  -4.745  -4.854  1.00 94.11
+ATOM    136  CG2 THR A  13       8.715  -4.573  -0.355  1.00 94.11
+ATOM    137  OG1 THR A  13       7.079  -5.693  -1.633  1.00 94.11
+ATOM    138  H   THR A  13       6.638  -3.155  -2.029  1.00 99.99
+ATOM    139  HG1 THR A  13       7.123  -6.350  -0.951  1.00 99.99
+ATOM    140  N   THR A  14       7.213  -4.968  -4.760  1.00 93.90
+ATOM    141  CA  THR A  14       7.092  -5.543  -6.121  1.00 93.90
+ATOM    142  C   THR A  14       7.535  -4.612  -7.245  1.00 93.90
+ATOM    143  CB  THR A  14       5.651  -5.960  -6.455  1.00 93.90
+ATOM    144  O   THR A  14       7.745  -5.072  -8.357  1.00 93.90
+ATOM    145  CG2 THR A  14       5.207  -7.198  -5.681  1.00 93.90
+ATOM    146  OG1 THR A  14       4.714  -4.932  -6.199  1.00 93.90
+ATOM    147  H   THR A  14       6.400  -4.797  -4.202  1.00 99.99
+ATOM    148  HG1 THR A  14       3.862  -5.276  -6.432  1.00 99.99
+ATOM    149  N   SER A  15       7.585  -3.303  -6.989  1.00 91.22
+ATOM    150  CA  SER A  15       8.091  -2.321  -7.953  1.00 91.22
+ATOM    151  C   SER A  15       9.588  -2.045  -7.815  1.00 91.22
+ATOM    152  CB  SER A  15       7.310  -1.014  -7.807  1.00 91.22
+ATOM    153  O   SER A  15      10.166  -1.402  -8.689  1.00 91.22
+ATOM    154  OG  SER A  15       7.463  -0.403  -6.533  1.00 91.22
+ATOM    155  H   SER A  15       7.254  -3.014  -6.090  1.00 99.99
+ATOM    156  HG  SER A  15       6.945   0.391  -6.547  1.00 99.99
+ATOM    157  N   LEU A  16      10.169  -2.425  -6.673  1.00 85.38
+ATOM    158  CA  LEU A  16      11.572  -2.198  -6.342  1.00 85.38
+ATOM    159  C   LEU A  16      12.447  -3.386  -6.770  1.00 85.38
+ATOM    160  H   LEU A  16       9.561  -2.894  -6.031  1.00 99.99
+TER     161      LEU A  16    
+ATOM    162  CB  LEU B  16      11.669  -1.927  -4.830  1.00 85.38
+ATOM    163  O   LEU B  16      13.583  -3.156  -7.181  1.00 85.38
+ATOM    164  CG  LEU B  16      13.080  -1.545  -4.347  1.00 85.38
+ATOM    165  CD1 LEU B  16      13.490  -0.148  -4.827  1.00 85.38
+ATOM    166  CD2 LEU B  16      13.114  -1.545  -2.817  1.00 85.38
+ATOM    167  N   ASP B  17      11.914  -4.607  -6.661  1.00 73.64
+ATOM    168  CA  ASP B  17      12.422  -5.816  -7.331  1.00 73.64
+ATOM    169  C   ASP B  17      12.082  -5.810  -8.833  1.00 73.64
+ATOM    170  CB  ASP B  17      11.841  -7.073  -6.638  1.00 73.64
+ATOM    171  O   ASP B  17      12.961  -6.201  -9.638  1.00 73.64
+ATOM    172  CG  ASP B  17      12.463  -7.412  -5.269  1.00 73.64
+ATOM    173  OD1 ASP B  17      13.655  -7.806  -5.237  1.00 73.64
+ATOM    174  OD2 ASP B  17      11.729  -7.366  -4.249  1.00 73.64
+ATOM    175  OXT ASP B  17      10.937  -5.420  -9.159  1.00 73.64
+TER     176      ASP B  17    
+REMARK    Source: tests/data/6325_Ethylene.sdf
+REMARK    Docked ligand coordinates...
+REMARK    Solution 1, from model "6325_Ethylene", ID: 0040000b03710053
+REMARK    Energy  -7.055624e+01, RMS  -1.00
+REMARK    Overlap Volume 0.0, Clash Volume 0.0
+REMARK    Box_min: -10.654 -8.723 -9.638
+REMARK    Box_max: 13.655 4.141 13.473
+REMARK    Cube_min: -10.654 -14.446 -10.237
+REMARK    Cube_max: 13.655 9.863 14.072
+REMARK    Symmetry Type: Default
+REMARK    Symmetry Matrix: 0
+ATOM    177 C    SDF A   1      -0.275  -0.845  -2.305  1.00 99.99
+ATOM    178 C    SDF A   1       0.981  -0.395  -2.301  1.00 99.99
+ATOM    179 H    SDF A   1      -1.106  -0.166  -2.458  1.00 99.99
+ATOM    180 H    SDF A   1      -0.487  -1.898  -2.156  1.00 99.99
+ATOM    181 H    SDF A   1       1.813  -1.073  -2.148  1.00 99.99
+ATOM    182 H    SDF A   1       1.193   0.658  -2.449  1.00 99.99
diff --git a/tests/data/AT9G99999_monomer_6325_Ethylene/AT9G99999_monomer_6325_Ethylene0001.pdb b/tests/data/AT9G99999_monomer_6325_Ethylene/AT9G99999_monomer_6325_Ethylene0001.pdb
new file mode 100644
index 0000000..b4b564a
--- /dev/null
+++ b/tests/data/AT9G99999_monomer_6325_Ethylene/AT9G99999_monomer_6325_Ethylene0001.pdb
@@ -0,0 +1,205 @@
+REMARK    File generated by Hex 8.0.0 on Mon Feb 19 14:02:49 2024.
+REMARK    Source: tests/data/AF2_AT9G99999_monomer.pdb
+REMARK    Docked receptor coordinates...
+REMARK    Solution 1, from model "AF2_AT9G99999_monomer", ID: 004a000c059c0018
+REMARK    Energy  -7.034417e+01, RMS  -1.00
+REMARK    Overlap Volume 0.0, Clash Volume 0.0
+REMARK    Box_min: -10.654 -8.723 -9.638
+REMARK    Box_max: 13.655 4.141 13.473
+REMARK    Cube_min: -10.654 -14.446 -10.237
+REMARK    Cube_max: 13.655 9.863 14.072
+REMARK    Symmetry Type: Default
+REMARK    Symmetry Matrix: 0
+ATOM      1  N   MET A   1      -7.410  -1.552   9.979  1.00 71.91
+ATOM      2  CA  MET A   1      -6.641  -0.817   8.946  1.00 71.91
+ATOM      3  C   MET A   1      -5.303  -1.485   8.564  1.00 71.91
+ATOM      4  CB  MET A   1      -6.474   0.646   9.390  1.00 71.91
+ATOM      5  O   MET A   1      -4.645  -0.990   7.666  1.00 71.91
+ATOM      6  CG  MET A   1      -6.092   1.613   8.265  1.00 71.91
+ATOM      7  SD  MET A   1      -6.114   3.332   8.818  1.00 71.91
+ATOM      8  CE  MET A   1      -5.405   4.141   7.358  1.00 71.91
+ATOM      9 1H   MET A   1      -8.294  -1.042  10.181  1.00 99.99
+ATOM     10 2H   MET A   1      -7.633  -2.507   9.631  1.00 99.99
+ATOM     11 3H   MET A   1      -6.843  -1.622  10.848  1.00 99.99
+ATOM     12  N   PHE A   2      -4.916  -2.639   9.137  1.00 82.96
+ATOM     13  CA  PHE A   2      -3.656  -3.336   8.797  1.00 82.96
+ATOM     14  C   PHE A   2      -3.544  -3.808   7.338  1.00 82.96
+ATOM     15  CB  PHE A   2      -3.477  -4.529   9.748  1.00 82.96
+ATOM     16  O   PHE A   2      -2.492  -3.654   6.732  1.00 82.96
+ATOM     17  CG  PHE A   2      -2.837  -4.145  11.064  1.00 82.96
+ATOM     18  CD1 PHE A   2      -1.434  -4.085  11.145  1.00 82.96
+ATOM     19  CD2 PHE A   2      -3.618  -3.837  12.195  1.00 82.96
+ATOM     20  CE1 PHE A   2      -0.812  -3.714  12.349  1.00 82.96
+ATOM     21  CE2 PHE A   2      -2.994  -3.457  13.397  1.00 82.96
+ATOM     22  CZ  PHE A   2      -1.591  -3.397  13.473  1.00 82.96
+ATOM     23  H   PHE A   2      -5.539  -3.014   9.824  1.00 99.99
+ATOM     24  N   ARG A   3      -4.644  -4.300   6.744  1.00 88.23
+ATOM     25  CA  ARG A   3      -4.655  -4.785   5.350  1.00 88.23
+ATOM     26  C   ARG A   3      -4.257  -3.729   4.317  1.00 88.23
+ATOM     27  CB  ARG A   3      -6.038  -5.349   4.983  1.00 88.23
+ATOM     28  O   ARG A   3      -3.766  -4.087   3.258  1.00 88.23
+ATOM     29  CG  ARG A   3      -6.239  -6.783   5.494  1.00 88.23
+ATOM     30  CD  ARG A   3      -7.610  -7.350   5.094  1.00 88.23
+ATOM     31  NE  ARG A   3      -7.758  -7.481   3.629  1.00 88.23
+ATOM     32  NH1 ARG A   3      -9.874  -8.376   3.605  1.00 88.23
+ATOM     33  NH2 ARG A   3      -8.815  -7.999   1.679  1.00 88.23
+ATOM     34  CZ  ARG A   3      -8.811  -7.948   2.980  1.00 88.23
+ATOM     35  H   ARG A   3      -5.475  -4.324   7.301  1.00 99.99
+ATOM     36  HE  ARG A   3      -6.985  -7.188   3.071  1.00 99.99
+ATOM     37 1HH1 ARG A   3     -10.654  -8.723   3.085  1.00 99.99
+ATOM     38 2HH1 ARG A   3      -9.908  -8.357   4.604  1.00 99.99
+ATOM     39 1HH2 ARG A   3      -9.615  -8.353   1.195  1.00 99.99
+ATOM     40 2HH2 ARG A   3      -8.018  -7.684   1.164  1.00 99.99
+ATOM     41  N   PHE A   4      -4.500  -2.449   4.602  1.00 95.39
+ATOM     42  CA  PHE A   4      -4.141  -1.385   3.667  1.00 95.39
+ATOM     43  C   PHE A   4      -2.630  -1.112   3.666  1.00 95.39
+ATOM     44  CB  PHE A   4      -4.950  -0.122   3.981  1.00 95.39
+ATOM     45  O   PHE A   4      -2.042  -0.961   2.601  1.00 95.39
+ATOM     46  CG  PHE A   4      -4.637   1.004   3.018  1.00 95.39
+ATOM     47  CD1 PHE A   4      -3.744   2.026   3.392  1.00 95.39
+ATOM     48  CD2 PHE A   4      -5.173   0.983   1.716  1.00 95.39
+ATOM     49  CE1 PHE A   4      -3.393   3.025   2.468  1.00 95.39
+ATOM     50  CE2 PHE A   4      -4.820   1.983   0.794  1.00 95.39
+ATOM     51  CZ  PHE A   4      -3.931   3.004   1.170  1.00 95.39
+ATOM     52  H   PHE A   4      -4.937  -2.236   5.477  1.00 99.99
+ATOM     53  N   LEU A   5      -1.999  -1.093   4.848  1.00 95.07
+ATOM     54  CA  LEU A   5      -0.549  -0.898   4.972  1.00 95.07
+ATOM     55  C   LEU A   5       0.230  -2.068   4.366  1.00 95.07
+ATOM     56  CB  LEU A   5      -0.169  -0.721   6.454  1.00 95.07
+ATOM     57  O   LEU A   5       1.200  -1.845   3.652  1.00 95.07
+ATOM     58  CG  LEU A   5      -0.676   0.581   7.097  1.00 95.07
+ATOM     59  CD1 LEU A   5      -0.344   0.575   8.590  1.00 95.07
+ATOM     60  CD2 LEU A   5      -0.045   1.824   6.468  1.00 95.07
+ATOM     61  H   LEU A   5      -2.570  -1.220   5.660  1.00 99.99
+ATOM     62  N   ASP A   6      -0.243  -3.291   4.604  1.00 94.98
+ATOM     63  CA  ASP A   6       0.310  -4.514   4.017  1.00 94.98
+ATOM     64  C   ASP A   6       0.268  -4.491   2.476  1.00 94.98
+ATOM     65  CB  ASP A   6      -0.502  -5.675   4.596  1.00 94.98
+ATOM     66  O   ASP A   6       1.259  -4.793   1.812  1.00 94.98
+ATOM     67  CG  ASP A   6       0.008  -7.032   4.128  1.00 94.98
+ATOM     68  OD1 ASP A   6      -0.791  -7.719   3.454  1.00 94.98
+ATOM     69  OD2 ASP A   6       1.140  -7.372   4.527  1.00 94.98
+ATOM     70  H   ASP A   6      -1.025  -3.331   5.228  1.00 99.99
+ATOM     71  N   TRP A   7      -0.843  -4.021   1.895  1.00 94.94
+ATOM     72  CA  TRP A   7      -0.972  -3.855   0.445  1.00 94.94
+ATOM     73  C   TRP A   7       0.004  -2.814  -0.129  1.00 94.94
+ATOM     74  CB  TRP A   7      -2.424  -3.518   0.091  1.00 94.94
+ATOM     75  O   TRP A   7       0.686  -3.117  -1.103  1.00 94.94
+ATOM     76  CG  TRP A   7      -2.650  -3.240  -1.363  1.00 94.94
+ATOM     77  CD1 TRP A   7      -2.691  -4.169  -2.344  1.00 94.94
+ATOM     78  CD2 TRP A   7      -2.756  -1.947  -2.035  1.00 94.94
+ATOM     79  CE2 TRP A   7      -2.861  -2.176  -3.439  1.00 94.94
+ATOM     80  CE3 TRP A   7      -2.755  -0.604  -1.602  1.00 94.94
+ATOM     81  NE1 TRP A   7      -2.829  -3.547  -3.569  1.00 94.94
+ATOM     82  CH2 TRP A   7      -2.931   0.196  -3.906  1.00 94.94
+ATOM     83  CZ2 TRP A   7      -2.944  -1.130  -4.369  1.00 94.94
+ATOM     84  CZ3 TRP A   7      -2.849   0.455  -2.526  1.00 94.94
+ATOM     85  H   TRP A   7      -1.598  -3.783   2.507  1.00 99.99
+ATOM     86  HE1 TRP A   7      -2.899  -3.980  -4.446  1.00 99.99
+ATOM     87  N   ILE A   8       0.122  -1.619   0.473  1.00 95.58
+ATOM     88  CA  ILE A   8       1.094  -0.599   0.022  1.00 95.58
+ATOM     89  C   ILE A   8       2.526  -1.128   0.124  1.00 95.58
+ATOM     90  CB  ILE A   8       0.955   0.719   0.824  1.00 95.58
+ATOM     91  O   ILE A   8       3.316  -0.913  -0.792  1.00 95.58
+ATOM     92  CG1 ILE A   8      -0.332   1.498   0.479  1.00 95.58
+ATOM     93  CG2 ILE A   8       2.174   1.654   0.660  1.00 95.58
+ATOM     94  CD1 ILE A   8      -0.352   2.171  -0.903  1.00 95.58
+ATOM     95  H   ILE A   8      -0.482  -1.442   1.251  1.00 99.99
+ATOM     96  N   PHE A   9       2.857  -1.824   1.215  1.00 95.75
+ATOM     97  CA  PHE A   9       4.181  -2.409   1.400  1.00 95.75
+ATOM     98  C   PHE A   9       4.484  -3.426   0.296  1.00 95.75
+ATOM     99  CB  PHE A   9       4.269  -3.029   2.799  1.00 95.75
+ATOM    100  O   PHE A   9       5.521  -3.325  -0.350  1.00 95.75
+ATOM    101  CG  PHE A   9       5.685  -3.372   3.208  1.00 95.75
+ATOM    102  CD1 PHE A   9       6.205  -4.658   2.976  1.00 95.75
+ATOM    103  CD2 PHE A   9       6.494  -2.385   3.801  1.00 95.75
+ATOM    104  CE1 PHE A   9       7.529  -4.956   3.343  1.00 95.75
+ATOM    105  CE2 PHE A   9       7.817  -2.684   4.169  1.00 95.75
+ATOM    106  CZ  PHE A   9       8.334  -3.970   3.939  1.00 95.75
+ATOM    107  H   PHE A   9       2.148  -1.930   1.913  1.00 99.99
+ATOM    108  N   THR A  10       3.528  -4.316   0.010  1.00 95.85
+ATOM    109  CA  THR A  10       3.622  -5.299  -1.079  1.00 95.85
+ATOM    110  C   THR A  10       3.835  -4.624  -2.434  1.00 95.85
+ATOM    111  CB  THR A  10       2.357  -6.170  -1.140  1.00 95.85
+ATOM    112  O   THR A  10       4.733  -5.013  -3.175  1.00 95.85
+ATOM    113  CG2 THR A  10       2.445  -7.255  -2.212  1.00 95.85
+ATOM    114  OG1 THR A  10       2.146  -6.838   0.082  1.00 95.85
+ATOM    115  H   THR A  10       2.717  -4.277   0.596  1.00 99.99
+ATOM    116  HG1 THR A  10       1.357  -7.353  -0.022  1.00 99.99
+ATOM    117  N   VAL A  11       3.047  -3.592  -2.759  1.00 96.10
+ATOM    118  CA  VAL A  11       3.177  -2.859  -4.029  1.00 96.10
+ATOM    119  C   VAL A  11       4.548  -2.188  -4.140  1.00 96.10
+ATOM    120  CB  VAL A  11       2.034  -1.841  -4.204  1.00 96.10
+ATOM    121  O   VAL A  11       5.202  -2.317  -5.174  1.00 96.10
+ATOM    122  CG1 VAL A  11       2.239  -0.928  -5.422  1.00 96.10
+ATOM    123  CG2 VAL A  11       0.692  -2.558  -4.412  1.00 96.10
+ATOM    124  H   VAL A  11       2.351  -3.334  -2.088  1.00 99.99
+ATOM    125  N   ALA A  12       5.008  -1.520  -3.077  1.00 95.59
+ATOM    126  CA  ALA A  12       6.301  -0.839  -3.056  1.00 95.59
+ATOM    127  C   ALA A  12       7.470  -1.817  -3.255  1.00 95.59
+ATOM    128  CB  ALA A  12       6.430  -0.061  -1.741  1.00 95.59
+ATOM    129  O   ALA A  12       8.385  -1.514  -4.019  1.00 95.59
+ATOM    130  H   ALA A  12       4.410  -1.508  -2.275  1.00 99.99
+ATOM    131  N   THR A  13       7.419  -3.001  -2.635  1.00 94.11
+ATOM    132  CA  THR A  13       8.445  -4.039  -2.818  1.00 94.11
+ATOM    133  C   THR A  13       8.398  -4.653  -4.216  1.00 94.11
+ATOM    134  CB  THR A  13       8.369  -5.131  -1.738  1.00 94.11
+ATOM    135  O   THR A  13       9.435  -4.745  -4.854  1.00 94.11
+ATOM    136  CG2 THR A  13       8.715  -4.573  -0.355  1.00 94.11
+ATOM    137  OG1 THR A  13       7.079  -5.693  -1.633  1.00 94.11
+ATOM    138  H   THR A  13       6.638  -3.155  -2.029  1.00 99.99
+ATOM    139  HG1 THR A  13       7.123  -6.350  -0.951  1.00 99.99
+ATOM    140  N   THR A  14       7.213  -4.968  -4.760  1.00 93.90
+ATOM    141  CA  THR A  14       7.092  -5.543  -6.121  1.00 93.90
+ATOM    142  C   THR A  14       7.535  -4.612  -7.245  1.00 93.90
+ATOM    143  CB  THR A  14       5.651  -5.960  -6.455  1.00 93.90
+ATOM    144  O   THR A  14       7.745  -5.072  -8.357  1.00 93.90
+ATOM    145  CG2 THR A  14       5.207  -7.198  -5.681  1.00 93.90
+ATOM    146  OG1 THR A  14       4.714  -4.932  -6.199  1.00 93.90
+ATOM    147  H   THR A  14       6.400  -4.797  -4.202  1.00 99.99
+ATOM    148  HG1 THR A  14       3.862  -5.276  -6.432  1.00 99.99
+ATOM    149  N   SER A  15       7.585  -3.303  -6.989  1.00 91.22
+ATOM    150  CA  SER A  15       8.091  -2.321  -7.953  1.00 91.22
+ATOM    151  C   SER A  15       9.588  -2.045  -7.815  1.00 91.22
+ATOM    152  CB  SER A  15       7.310  -1.014  -7.807  1.00 91.22
+ATOM    153  O   SER A  15      10.166  -1.402  -8.689  1.00 91.22
+ATOM    154  OG  SER A  15       7.463  -0.403  -6.533  1.00 91.22
+ATOM    155  H   SER A  15       7.254  -3.014  -6.090  1.00 99.99
+ATOM    156  HG  SER A  15       6.945   0.391  -6.547  1.00 99.99
+ATOM    157  N   LEU A  16      10.169  -2.425  -6.673  1.00 85.38
+ATOM    158  CA  LEU A  16      11.572  -2.198  -6.342  1.00 85.38
+ATOM    159  C   LEU A  16      12.447  -3.386  -6.770  1.00 85.38
+ATOM    160  CB  LEU A  16      11.669  -1.927  -4.830  1.00 85.38
+ATOM    161  O   LEU A  16      13.583  -3.156  -7.181  1.00 85.38
+ATOM    162  CG  LEU A  16      13.080  -1.545  -4.347  1.00 85.38
+ATOM    163  CD1 LEU A  16      13.490  -0.148  -4.827  1.00 85.38
+ATOM    164  CD2 LEU A  16      13.114  -1.545  -2.817  1.00 85.38
+ATOM    165  H   LEU A  16       9.561  -2.894  -6.031  1.00 99.99
+ATOM    166  N   ASP A  17      11.914  -4.607  -6.661  1.00 73.64
+ATOM    167  CA  ASP A  17      12.422  -5.816  -7.331  1.00 73.64
+ATOM    168  C   ASP A  17      12.082  -5.810  -8.833  1.00 73.64
+ATOM    169  CB  ASP A  17      11.841  -7.073  -6.638  1.00 73.64
+ATOM    170  O   ASP A  17      12.961  -6.201  -9.638  1.00 73.64
+ATOM    171  CG  ASP A  17      12.463  -7.412  -5.269  1.00 73.64
+ATOM    172  OD1 ASP A  17      13.655  -7.806  -5.237  1.00 73.64
+ATOM    173  OD2 ASP A  17      11.729  -7.366  -4.249  1.00 73.64
+ATOM    174  OXT ASP A  17      10.937  -5.420  -9.159  1.00 73.64
+ATOM    175  H   ASP A  17      11.110  -4.650  -6.067  1.00 99.99
+TER     176      ASP A  17    
+REMARK    Source: tests/data/6325_Ethylene.sdf
+REMARK    Docked ligand coordinates...
+REMARK    Solution 1, from model "6325_Ethylene", ID: 004a000c059c0018
+REMARK    Energy  -7.034417e+01, RMS  -1.00
+REMARK    Overlap Volume 0.0, Clash Volume 0.0
+REMARK    Box_min: -10.654 -8.723 -9.638
+REMARK    Box_max: 13.655 4.141 13.473
+REMARK    Cube_min: -10.654 -14.446 -10.237
+REMARK    Cube_max: 13.655 9.863 14.072
+REMARK    Symmetry Type: Default
+REMARK    Symmetry Matrix: 0
+ATOM    177 C    SDF A   1       0.249   0.367  -3.298  1.00 99.99
+ATOM    178 C    SDF A   1      -0.044  -0.621  -2.450  1.00 99.99
+ATOM    179 H    SDF A   1      -0.193   0.398  -4.287  1.00 99.99
+ATOM    180 H    SDF A   1       0.935   1.156  -3.013  1.00 99.99
+ATOM    181 H    SDF A   1       0.399  -0.652  -1.461  1.00 99.99
+ATOM    182 H    SDF A   1      -0.730  -1.410  -2.735  1.00 99.99
diff --git a/tests/data/AT9G99999_monomer_6325_Ethylene/hex_output.txt b/tests/data/AT9G99999_monomer_6325_Ethylene/hex_output.txt
new file mode 100644
index 0000000..b48ca48
--- /dev/null
+++ b/tests/data/AT9G99999_monomer_6325_Ethylene/hex_output.txt
@@ -0,0 +1,218 @@
+Hex 8.0.0 starting at Mon Feb 19 14:02:25 2024 on host bar.
+
+Hostname:        bar
+Main memory:     128813 Mb
+CPUs online:     32
+GPUs online:     0
+Compute threads: 24
+
+Sizeof(short):   2
+Sizeof(int):     4
+Sizeof(long):    8
+Sizeof(long int):8
+Sizeof(float):   4
+Sizeof(double):  8
+Sizeof(octa):    8
+Sizeof(int *):   8
+Sizeof(long *):  8
+Sizeof(void *):  8
+
+Default path:    /home/dnguyen/BAR_API_forked/BAR_API
+PATH:            /home/dnguyen/hex/exe:/home/dnguyen/hex/bin:/home/dnguyen/BAR_API_forked/BAR_API/venv/bin:/home/dnguyen/.vscode-server/bin/8b3775030ed1a69b13e4f4c628c612102e30a681/bin/remote-cli:/usr/local/bin:/usr/bin:/bin:/usr/local/games:/usr/games:/ho
+Hex program:     /home/dnguyen/hex/exe/hex8.0.0-nogui.x64
+HEX_ROOT:        /home/dnguyen/hex
+HEX_STRIDE:      stride.x64
+HEX_PDB:         NULL
+HEX_DATA:        NULL
+HEX_MACROS:      NULL
+HEX_COLOURS:     NULL
+HEX_STARTUP:     NULL
+HEX_FONTFILE:    NULL
+HEX_PIPE:        NULL
+HEX_CACHE:       /home/dnguyen/hex_cache
+HEX_LOG:         NULL
+HEX_CPUS:        NULL
+HEX_GPUS:        NULL
+HEX_FIRST_GPU:   NULL
+HEX_GTO_SCALE:   NULL
+HEX_ETO_SCALE:   NULL
+
+CUDA enabled:    No
+
+
+
+Running HEX_STARTUP file: /home/dnguyen/hex/data/startup_v5.mac
+Disc Cache enabled. Using directory: /home/dnguyen/hex_cache
+
+
+Reading commands from stdin ...
+>>  open_receptor  tests/data/AF2_AT9G99999_monomer.pdb
+Assuming tests/data/AF2_AT9G99999_monomer.pdb is a PDB file...
+
+Opened PDB file: tests/data/AF2_AT9G99999_monomer.pdb, ID = AF2_AT9G99999_monomer
+Loaded PDB file: tests/data/AF2_AT9G99999_monomer.pdb, (18 residues, 176 atoms, 1 models)
+Counted 1 +ve and 2 -ve formal charged residues: Net formal charge: -1
+>AF2_AT9G99999_monomer A
+MFRFLDWIFTVATTSLD
+>>                 open_ligand  tests/data/6325_Ethylene.sdf
+Assuming tests/data/6325_Ethylene.sdf is an SDF file...
+
+Opened SDF file: tests/data/6325_Ethylene.sdf, ID = 6325_Ethylene
+>>                 docking_correlation 1
+Docking with shape+electrostatics.
+>>                 docking_score_threshold 0
+>>                 max_docking_solutions 25
+>>                 docking_receptor_stepsize 5.50
+Receptor step size: 5.50 deg. B=32, M=64, T=1692.
+>>                 docking_ligand_stepsize 5.50
+Ligand step size: 5.50 deg. B=32, M=64, T=1692.
+>>                 docking_alpha_stepsize 2.80
+Twist step size: 2.80 deg. B=64, M=128.
+>>                 docking_main_scan 16
+>>                 receptor_origin C-825:VAL-O
+*Warning* No Match: C-825:VAL-O -> C-825:VAL-O (Chain-ResidueID:ResidueName-AtomName)
+>>                 commit_edits
+>>                 activate_docking
+
+Contouring surface for molecule AF2_AT9G99999_monomer.
+Polar probe = 1.40A, Apolar probe = 1.40A
+Gaussian sampling over 146 atoms done in 0.03 seconds.
+Contoured 37544 triangles (18774 vertices) in 0.03 seconds.
+Surface traversal done in 0.01 seconds - Found 1 surface segments.
+Primary surface:   Area = 1893.22, Volume = 5111.06.
+Culled 0 small segments in 0.01 seconds.
+Total contouring time: 0.05 seconds.
+
+
+Contouring surface for molecule 6325_Ethylene.
+Polar probe = 1.40A, Apolar probe = 1.40A
+Gaussian sampling over 2 atoms done in 0.00 seconds.
+Contoured 3244 triangles (1624 vertices) in 0.00 seconds.
+Surface traversal done in 0.00 seconds - Found 1 surface segments.
+Primary surface:   Area = 164.05, Volume = 195.88.
+Culled 0 small segments in 0.00 seconds.
+Total contouring time: 0.00 seconds.
+
+Sampling surface and interior volumes for molecule AF2_AT9G99999_monomer.
+Generated 14696 exterior and 5220 interior skin grid cells.
+Exterior skin volume = 3174.34; interior skin volume = 1127.52.
+Volume sampling done in 0.04 seconds.
+Sampling surface and interior volumes for molecule 6325_Ethylene.
+Generated 966 exterior and 123 interior skin grid cells.
+Exterior skin volume = 208.66; interior skin volume = 26.57.
+Volume sampling done in 0.00 seconds.
+
+Calculating potential to N = 25 (5525 coefficients) using 24 Tasks ...
+Grid: 74x74x74 = 405224 cells (20309 non-zero) of 0.60 Angstroms.
+Done integration over 20309 cells in 0.06s (321208/s).
+
+Calculating electrostatics for molecule AF2_AT9G99999_monomer.
+Charge density for molecule AF2_AT9G99999_monomer to N = 25:  173 atoms done in 0.00 seconds.
+Potential for molecule AF2_AT9G99999_monomer to N = 25 done in 0.02 seconds.
+Calculating electrostatics for molecule 6325_Ethylene.
+Charge density for molecule 6325_Ethylene to N = 25:  0 atoms done in 0.00 seconds.
+Potential for molecule 6325_Ethylene to N = 25 done in 0.01 seconds.
+
+------------------------------------------------------------------------------
+Docking will output a maximum of 25 solutions per pair...
+
+------------------------------------------------------------------------------
+Docking 1 pair of starting orientations...
+
+Docking receptor: AF2_AT9G99999_monomer and ligand: 6325_Ethylene...
+
+Receptor AF2_AT9G99999_monomer: Tag = AF2_AT9G99999_monomer
+Ligand   6325_Ethylene: Tag = 6325_Ethylene
+
+Setting up shape + electrostatics correlation.
+
+Starting SPF search.
+Setting docking_score threshold = 0.0
+Setting 30 distance samples from 0.00 to 23.20, with steps of 0.80.
+
+
+Total 6D space: Iterate[30,1692,1] x FFT[128,32,64] = 13306429440.
+Initial rotational increments (N=16) Receptor: 1692 (39Mb), Ligand: 1692 (39Mb)
+Applying 1692+1692 coefficient rotations on 24 CPUs for N=16.
+Done 3384 rotations in a total of 0.08s (44932/s).
+
+Starting 3D FFT search using 24 CPUs and 0 GPUs with N=16, Nalpha=128/128.
+Estart = 68.63.
+Done 13306429440 orientations in 23.31s (570939787/s).
+Found 51678702/13306429440 within score threshold = 0.0 NOT including start guess.
+
+Time spent culling 42*1600000 solutions = 7.90s.
+Starting guess not found in top 1278702 solutions.
+Emin = -68.28, Emax = -0.00
+
+Re-sampling top 40000 orientations -> top 28433 retained.
+Surviving rotational steps (N=25) Receptor: 48 (9Mb), Ligand: 1441 (243Mb)
+Applying 48+1441 coefficient rotations on 24 CPUs for N=25.
+Done 1489 rotations in a total of 0.11s (12949/s).
+
+Starting 1D FFT refinement using 24 CPUs and 0 GPUs with N=25, Nalpha=128/128.
+Estart = 59.61.
+Done 3639424 orientations in 0.24s (15429264/s).
+Found 70787/3639424 within score threshold = 0.0 NOT including start guess.
+
+
+Solution buffer reached 70787/200000 = 35.4% occupancy with no culling.
+Starting guess not found in top 70787 solutions.
+Emin = -70.34, Emax = -6.16
+
+Docking correlation summary by RMS deviation and steric clashes
+-------------------------------------------------------------------------
+  Soln   Etotal    Eshape    Eforce    Eair              RMS        Bumps
+  ----  --------- --------- --------- ---------   ----------------  -----
+
+
+Docked structures AF2_AT9G99999_monomer:6325_Ethylene in a total of 0 min, 24 sec.
+
+
+------------------------------------------------------------------------------
+Saving top 25 orientations.
+
+Docking done in a total of 0 min, 24 sec.
+
+
+------------------------------------------------------------------------------
+
+No AIRs enabled or defined. Skipping restraint checks.
+Clustering found 1 clusters from 25 docking solutions in 0.00 seconds.
+
+---- ---- ------- ------- ------- ------- ------- --- -----
+Clst Soln  Models  Etotal  Eshape  Eforce  Eair   Bmp  RMS
+---- ---- ------- ------- ------- ------- ------- --- -----
+   1    1 001:001   -70.3   -70.3     0.0     0.0  -1  -1.00   
+   1    2 001:001   -70.2   -70.2     0.0     0.0  -1  -1.00   
+   1    3 001:001   -70.2   -70.2     0.0     0.0  -1  -1.00   
+   1    4 001:001   -70.1   -70.1     0.0     0.0  -1  -1.00   
+   1    5 001:001   -70.0   -70.0     0.0     0.0  -1  -1.00   
+   1    6 001:001   -70.0   -70.0     0.0     0.0  -1  -1.00   
+   1    7 001:001   -70.0   -70.0     0.0     0.0  -1  -1.00   
+   1    8 001:001   -69.8   -69.8     0.0     0.0  -1  -1.00   
+   1    9 001:001   -69.8   -69.8     0.0     0.0  -1  -1.00   
+   1   10 001:001   -69.8   -69.8     0.0     0.0  -1  -1.00   
+   1   11 001:001   -69.8   -69.8     0.0     0.0  -1  -1.00   
+   1   12 001:001   -69.8   -69.8     0.0     0.0  -1  -1.00   
+   1   13 001:001   -69.7   -69.7     0.0     0.0  -1  -1.00   
+   1   14 001:001   -69.7   -69.7     0.0     0.0  -1  -1.00   
+   1   15 001:001   -69.6   -69.6     0.0     0.0  -1  -1.00   
+   1   16 001:001   -69.6   -69.6     0.0     0.0  -1  -1.00   
+   1   17 001:001   -69.6   -69.6     0.0     0.0  -1  -1.00   
+   1   18 001:001   -69.6   -69.6     0.0     0.0  -1  -1.00   
+   1   19 001:001   -69.5   -69.5     0.0     0.0  -1  -1.00   
+   1   20 001:001   -69.5   -69.5     0.0     0.0  -1  -1.00   
+   1   21 001:001   -69.5   -69.5     0.0     0.0  -1  -1.00   
+   1   22 001:001   -69.5   -69.5     0.0     0.0  -1  -1.00   
+   1   23 001:001   -69.5   -69.5     0.0     0.0  -1  -1.00   
+   1   24 001:001   -69.5   -69.5     0.0     0.0  -1  -1.00   
+   1   25 001:001   -69.5   -69.5     0.0     0.0  -1  -1.00   
+------------------------------------------------------------
+   1    1 001:001   -70.3   -70.3     0.0     0.0  -1  -1.00   
+>>                 save_range 1 100 tests/data/AT9G99999_monomer_6325_Ethylene/ AT9G99999_monomer_6325_Ethylene pdb
+Saving orientation 1 (solution 1) to tests/data/AT9G99999_monomer_6325_Ethylene/AT9G99999_monomer_6325_Ethylene0001.pdb
+
+Max application memory used: 384.17 MB virtual + 0.00 KB shared.
+
+Hex stopping: Mon Feb 19 14:02:49 2024
diff --git a/tests/resources/test_docking_utils.py b/tests/resources/test_docking_utils.py
new file mode 100644
index 0000000..a225f8f
--- /dev/null
+++ b/tests/resources/test_docking_utils.py
@@ -0,0 +1,131 @@
+import unittest
+from api.utils.docking_utils import Receptor, ComplexReceptor, MonomerReceptor
+from api.utils.docking_utils import Ligand
+from api.utils.docking_utils import Docker
+from api.utils.docking_utils import MonomerDocking, ComplexDocking
+
+
+class TestReceptorClasses(unittest.TestCase):
+
+    def test_monomer_receptor_init(self):
+        """Test that MonomerReceptor object is correctly instantiated."""
+
+        monomer_receptor = MonomerReceptor("AT9G99999", "/tests/data/AF2_AT9G99999_monomer.pdb")
+        self.assertEqual(monomer_receptor.name, "AT9G99999")
+        self.assertEqual(monomer_receptor.file_path, "/tests/data/AF2_AT9G99999_monomer.pdb")
+
+    def test_complex_receptor_init(self):
+        """Test that ComplexReceptor object is correctly instantiated. This
+        function also tests that monomers are separated correctly using
+        separate_monomers when the object is instantiated.
+        """
+
+        monomers_list = ["A", "B"]
+        complex_receptor = ComplexReceptor("test_complex_receptor",
+                                           "tests/data/AF2_AT8G88888_complex.pdb",
+                                           monomers_list)
+        self.assertEqual(complex_receptor.name, "test_complex_receptor")
+        self.assertEqual(complex_receptor.file_path, "tests/data/AF2_AT8G88888_complex.pdb")
+        self.assertEqual(complex_receptor.monomers_list, monomers_list)
+        self.assertEqual(len(complex_receptor.line_numbers), len(monomers_list))
+        self.assertEqual(complex_receptor.line_numbers, [[48, 180], [181, 195]])
+
+
+class TestLigandClass(unittest.TestCase):
+
+    def test_ligand_init(self):
+        """Test that Ligand object is correctly instantiated."""
+
+        ligand = Ligand("test_ligand", "tests/data/6325_Ethylene.sdf")
+        self.assertEqual(ligand.name, "test_ligand")
+        self.assertEqual(ligand.file_path, "tests/data/6325_Ethylene.sdf")
+
+
+class TestDockerClass(unittest.TestCase):
+
+    def test_create_monomer_receptor(self):
+        """Test that docker creates a MonomerReceptor object when given a
+        monomer pdb file."""
+
+        receptor_name = "AT9G99999_monomer"
+        receptor_path = "tests/data/AF2_AT9G99999_monomer.pdb"
+        receptor = Docker.create_receptor(receptor_name, receptor_path)
+        self.assertEqual(isinstance(receptor, MonomerReceptor), True)
+        self.assertEqual(receptor.name, receptor_name)
+        self.assertEqual(receptor.file_path, receptor_path)
+
+    def test_create_complex_receptor(self):
+        """Test that docker creates a correct ComplexReceptor object when
+        given a complex pdb file."""
+
+        receptor_name = "AT8G88888_complex"
+        receptor_path = "tests/data/AF2_AT8G88888_complex.pdb"
+        receptor = Docker.create_receptor(receptor_name, receptor_path)
+        self.assertEqual(isinstance(receptor, Receptor), True)
+        self.assertEqual(receptor.name, "AT8G88888_complex")
+        self.assertEqual(receptor.file_path, "tests/data/AF2_AT8G88888_complex.pdb")
+        self.assertEqual(receptor.monomers_list, ["A", "B"])
+        self.assertEqual(receptor.line_numbers, [[48, 180], [181, 195]])
+
+    def test_docking_exists(self):
+        """Test that Docker.create_docking returns None when the docking
+        already exists."""
+
+        receptor_name = "AT8G88888_complex"
+        ligand_name = "6325_Ethylene"
+        receptor_name_2 = "AT9G99999_monomer"
+        results_path = "tests/data/"
+        docking = Docker.create_docking(receptor_name, ligand_name, results_path)
+        docking2 = Docker.create_docking(receptor_name_2, ligand_name, results_path)
+        self.assertEqual(docking, None)
+        self.assertEqual(docking2, None)
+
+
+class TestDockingClass(unittest.TestCase):
+
+    def test_docking_complex_results(self):
+        """Test that correct dictionary is created in normalized_results for
+        complex docking."""
+
+        receptor_name = "AT8G88888_complex"
+        receptor_path = "tests/data/AF2_AT8G88888_complex.pdb"
+        ligand_name = "6325_Ethylene"
+        ligand_path = "tests/data/6325_Ethylene.sdf"
+        results_path = "tests/data/AT8G88888_complex_6325_Ethylene/"
+        receptor = Docker.create_receptor(receptor_name, receptor_path)
+        ligand = Ligand(ligand_name, ligand_path)
+        docking = ComplexDocking(receptor, ligand, results_path)
+        docking.separate_results()
+        docking.crte_ligand_reserved_attr()
+        normalized_results = docking.normalize_results(5)
+
+        self.assertIsInstance(normalized_results, dict)
+        self.assertIsNot(len(normalized_results), 0)
+        self.assertIn('AT8G88888_complex_A', normalized_results)
+        self.assertIn('AT8G88888_complex_B', normalized_results)
+        self.assertIn('6325_Ethylene', normalized_results['AT8G88888_complex_A'])
+        self.assertIn('6325_Ethylene', normalized_results['AT8G88888_complex_B'])
+
+    def test_docking_monomer_results(self):
+        """Test that correct dictionary is created in normalized_results for
+        monomer docking."""
+
+        receptor_name = "AT9G99999_monomer"
+        receptor_path = "tests/data/AF2_AT9G99999_monomer.pdb"
+        ligand_name = "6325_Ethylene"
+        ligand_path = "tests/data/6325_Ethylene.sdf"
+        results_path = "tests/data/AT9G99999_monomer_6325_Ethylene/"
+        receptor = Docker.create_receptor(receptor_name, receptor_path)
+        ligand = Ligand(ligand_name, ligand_path)
+        docking = MonomerDocking(receptor, ligand, results_path)
+        docking.crte_ligand_reserved_attr()
+        normalized_results = docking.normalize_results(5)
+
+        self.assertIsInstance(normalized_results, dict)
+        self.assertIsNot(len(normalized_results), 0)
+        self.assertIn('AT9G99999_monomer', normalized_results)
+        self.assertIn('6325_Ethylene', normalized_results['AT9G99999_monomer'])
+
+
+if __name__ == '__main__':
+    unittest.main()

From 1fdf2f6f37f7009de7fad5ff4181233ee5e68326 Mon Sep 17 00:00:00 2001
From: Dien Nguyen <dnguyen@bar.utoronto.ca>
Date: Tue, 27 Feb 2024 16:03:49 -0500
Subject: [PATCH 14/35] Move test_docking_utils.py to tests/utils/ folder

---
 tests/{resources => utils}/test_docking_utils.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tests/{resources => utils}/test_docking_utils.py (100%)

diff --git a/tests/resources/test_docking_utils.py b/tests/utils/test_docking_utils.py
similarity index 100%
rename from tests/resources/test_docking_utils.py
rename to tests/utils/test_docking_utils.py

From 4bec80afd8f839dfb4d2a913461bb08898c5c442 Mon Sep 17 00:00:00 2001
From: Dien Nguyen <dnguyen@bar.utoronto.ca>
Date: Tue, 19 Mar 2024 14:19:07 -0400
Subject: [PATCH 15/35] Add regex matching to separate coordinates in docking
 results pdb

Reformat output json to include path to results file and the date the docking was performed
---
 api/utils/docking_utils.py | 95 +++++++++++++++++++++++++-------------
 1 file changed, 63 insertions(+), 32 deletions(-)

diff --git a/api/utils/docking_utils.py b/api/utils/docking_utils.py
index b0724e5..210e699 100755
--- a/api/utils/docking_utils.py
+++ b/api/utils/docking_utils.py
@@ -220,13 +220,22 @@ def result_dict_generator(self, monomer_number, threshold):
         for line in receptor_file_lines:
             splitted_line = line.split()
             if line[0:4] == 'ATOM':
-                coord = map(float, filter(None, splitted_line[6:9]))
 
                 # check if chain name and residue are in the same column, e.g. A1000
                 if re.search(r'\d', splitted_line[4]) is None:
                     residue = splitted_line[5]
                 else:
                     residue = splitted_line[4][1:]
+
+                # Get the coordinates by regex matching, since they are not
+                # always separated by a space
+                pattern = r"[-+]?\d+\.\d+"
+                stripped_coords = line[28:54].strip()
+                # Find all matches in the input string
+                matches = re.findall(pattern, stripped_coords)
+                # Convert the matches to floats
+                coord = [float(match) for match in matches]
+
                 if int(residue) in reference:
                     reference[int(residue)][int(splitted_line[1])] = tuple(coord)
                 else:
@@ -358,14 +367,14 @@ def normalize_results(self, threshold):
         ligand_key = list(results_dict[receptor_key].keys())[0]
 
         inside_dict = results_dict[receptor_key][ligand_key]
-        abs_max = None
-        abs_min = None
+        max_energy = None
+        min_energy = None
 
         # To eliminate empty dictionaries that might cause division errors below
         # normalized_mon_dicitonary calculations
         if inside_dict != {}:
-            abs_min = min(inside_dict.values())
-            abs_max = max(inside_dict.values())
+            min_energy = min(inside_dict.values())
+            max_energy = max(inside_dict.values())
 
         all_normalized_results = {}
 
@@ -375,12 +384,12 @@ def normalize_results(self, threshold):
 
         # prevent substraction of equal values or values that doesn't make any sense
         # in terms of accuracy
-        if abs_min == abs_max:
+        if min_energy == max_energy:
             for k, v in inside_dict.items():
                 normalized_mon_dict[receptor_key][ligand_key][k] = 1
         else:
             for k, v in inside_dict.items():
-                normalized_value = (v - abs_min) / (abs_max - abs_min)
+                normalized_value = (v - min_energy) / (max_energy - min_energy)
                 normalized_mon_dict[receptor_key][ligand_key][k] = normalized_value
         all_normalized_results.update(normalized_mon_dict)
         return all_normalized_results
@@ -527,9 +536,10 @@ def start(receptor: str, ligand: str, docking_pdb_path: str):
         ct = datetime.datetime.now()
         print("Starting the docking process at {}".format(ct))
         docking = Docker.create_docking(receptor, ligand, docking_pdb_path)
-        if docking is None:
-            receptor = receptor.split('.')[0]
-            results_path = docking_pdb_path + receptor + '_' + ligand + '/'
+        if isinstance(docking, list):
+            # receptor = receptor.split('.')[0]
+            # results_path = docking_pdb_path + receptor + '_' + ligand + '/'
+            results_path = docking[1]
             with open(results_path + "final.json") as json_file:
                 final_json = json.load(json_file)
             return final_json
@@ -538,7 +548,7 @@ def start(receptor: str, ligand: str, docking_pdb_path: str):
         elif docking == "Ligand file not found":
             return "Ligand file not found"
 
-        results_path = docking_pdb_path + receptor + '_' + ligand + '/'
+        results_path = docking_pdb_path + docking.receptor.name + '_' + ligand + '/'
 
         # create folder to store docking results
         os.makedirs(results_path)
@@ -548,12 +558,16 @@ def start(receptor: str, ligand: str, docking_pdb_path: str):
             docking.separate_results()
         docking.crte_ligand_reserved_attr()
         normalized_results = docking.normalize_results(5)
+        final_json = {}
+        final_json["energies_json"] = normalized_results
+        final_json["path"] = '//bar.utoronto.ca/HEX_RESULTS/' + docking.receptor.name + '_' + ligand + '/'
+        final_json["best_HEX_result_path"] = final_json["path"] + docking.receptor.name + '_' + ligand + '0001.pdb'
+        final_json["date"] = datetime.datetime.now().date().strftime("%Y-%m-%d")
         new_json = docking.results_path + "final.json"
         with open(new_json, 'w') as file:
-            file.write(json.dumps(normalized_results))
-        ct = datetime.datetime.now()
-        print("current time:-", ct)
-        return normalized_results
+            file.write(json.dumps(final_json))
+        print("current time:-", datetime.datetime.now())
+        return final_json
 
     def create_receptor(receptor_name: str, receptor_file_path: str):
         """Return a new receptor with the name receptor_name, by parsing
@@ -589,27 +603,42 @@ def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str):
         """Return a docking pair, which contains a Receptor and a Ligand, as
         specified by receptor_name and ligand_name, respectively.
         """
+        # find receptor file and create receptor object
+        receptor_folder = "/DATA/AF2-pdbs/Arabidopsis/AF2_Ath_PDBs_FAs_renamed/"
+
         # check that the docking combination has not been run before
         # results_path = docking_pdb_path + 'RESULTS/' + receptor_name + '_' + ligand_name + '/'
         if '.' in receptor_name:
             receptor_name = receptor_name[:receptor_name.index('.')]
+        command = ['ls ' + 'AF2_' + receptor_name + '*.pdb']
+        completed_process = subprocess.run(command,
+                                       shell = True, 
+                                       cwd = receptor_folder,
+                                       stdout = subprocess.PIPE, 
+                                       stderr = subprocess.PIPE, 
+                                       text = True)
+        if completed_process.returncode != 0:
+            print("Receptor file not found")
+            # return "Receptor file not found"
+        receptor_file = completed_process.stdout[:-1]
+        
+        receptor_file_path = receptor_folder + receptor_file
+        receptor_name = receptor_file[4:(receptor_file.index('.') + 2)]
+        
         results_path = docking_pdb_path + receptor_name + '_' + ligand_name + '/'
         print(results_path)
+
         if os.path.exists(results_path):
             print("The docking between {0} and {1} has already been done.".format(receptor_name,
                                                                                   ligand_name))
-            return None
-
-        # find receptor file and create receptor object
-        receptor_folder = '/DATA/AF2-pdbs/Arabidopsis/AF2_Ath_PDBs_FAs_renamed/'
-        receptor_file_found = False
+            return [None, results_path]
+        receptor = Docker.create_receptor(receptor_name, receptor_file_path)
 
-        for receptor_file in os.listdir(receptor_folder):
-            if receptor_file[0] != '.' and receptor_file[-4:] == '.pdb' and \
-                    (receptor_name in receptor_file):
-                receptor_file_found = True
-                receptor_file_path = receptor_folder + receptor_file
-                receptor = Docker.create_receptor(receptor_name, receptor_file_path)
+        # for receptor_file in os.listdir(receptor_folder):
+        #     if receptor_file[0] != '.' and receptor_file[-4:] == '.pdb' and \
+        #             (receptor_name in receptor_file):
+        #         receptor_file_path = receptor_folder + receptor_file
+        #         receptor = Docker.create_receptor(receptor_name, receptor_file_path)
 
         # find ligand file and create ligand object
         ligand_folder = '/DATA/HEX_API/HEX_SELECTED_LIGANDS/'
@@ -622,10 +651,8 @@ def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str):
                 ligand_file_found = True
                 ligand_file_path = ligand_folder + '/' + ligand_file
                 ligand = Ligand(ligand_name, ligand_file_path)
-
-        if not receptor_file_found:
-            return "Receptor file not found"
-        elif not ligand_file_found:
+            
+        if not ligand_file_found:
             return "Ligand file not found"
 
         # receptor and ligand objects are created and ready for docking
@@ -675,12 +702,13 @@ def create_mapping_filtered(folder_path: str, results_path: str):
         folder_path: where the sdf files are stored
         results_path: where the json file should be created
         """
-        mapped_sdf = {}
+        mapped_sdf = []
         sdf_files = os.listdir(folder_path)
         for file in sdf_files:
             if file[0] != "." and file[-4:] == ".sdf":
                 name = file[file.index("_") + 1:-4]
-                mapped_sdf[name] = file
+                mapped_sdf.append({'value': file, 'text': name})
+                # mapped_sdf[name] = file
         json_file = results_path + "sdf_mapping_filtered.json"
         with open(json_file, 'w') as file:
             file.write(json.dumps(mapped_sdf))
@@ -707,3 +735,6 @@ def create_mapping_unfiltered(self, folder_path: str, results_path: str):
         with open(json_file, 'w') as file:
             file.write(json.dumps(mapped_sdf))
         return mapped_sdf
+
+# if __name__ == "__main__":
+#     Docker.start("AT3G22150", "801_Auxin", "/DATA/HEX_API/RESULTS/")
\ No newline at end of file

From a7ad94ef2817678ada80233ac0e9e52a09e9d320 Mon Sep 17 00:00:00 2001
From: Dien Nguyen <dnguyen@bar.utoronto.ca>
Date: Thu, 21 Mar 2024 17:02:13 -0400
Subject: [PATCH 16/35] Add timestamp to final energies json

---
 api/utils/docking_utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/api/utils/docking_utils.py b/api/utils/docking_utils.py
index 210e699..112a9b5 100755
--- a/api/utils/docking_utils.py
+++ b/api/utils/docking_utils.py
@@ -534,6 +534,7 @@ def start(receptor: str, ligand: str, docking_pdb_path: str):
         """
         # create docking object
         ct = datetime.datetime.now()
+        ct_string = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
         print("Starting the docking process at {}".format(ct))
         docking = Docker.create_docking(receptor, ligand, docking_pdb_path)
         if isinstance(docking, list):
@@ -562,7 +563,7 @@ def start(receptor: str, ligand: str, docking_pdb_path: str):
         final_json["energies_json"] = normalized_results
         final_json["path"] = '//bar.utoronto.ca/HEX_RESULTS/' + docking.receptor.name + '_' + ligand + '/'
         final_json["best_HEX_result_path"] = final_json["path"] + docking.receptor.name + '_' + ligand + '0001.pdb'
-        final_json["date"] = datetime.datetime.now().date().strftime("%Y-%m-%d")
+        final_json["date"] = ct_string
         new_json = docking.results_path + "final.json"
         with open(new_json, 'w') as file:
             file.write(json.dumps(final_json))

From 35bb983bbcfcf74e3d14f3f40a60a5a034b687af Mon Sep 17 00:00:00 2001
From: Dien Nguyen <dnguyen@bar.utoronto.ca>
Date: Thu, 21 Mar 2024 17:11:45 -0400
Subject: [PATCH 17/35] Fix styling issues

---
 api/utils/docking_utils.py | 25 ++++++++-----------------
 1 file changed, 8 insertions(+), 17 deletions(-)

diff --git a/api/utils/docking_utils.py b/api/utils/docking_utils.py
index 112a9b5..15329f8 100755
--- a/api/utils/docking_utils.py
+++ b/api/utils/docking_utils.py
@@ -613,19 +613,19 @@ def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str):
             receptor_name = receptor_name[:receptor_name.index('.')]
         command = ['ls ' + 'AF2_' + receptor_name + '*.pdb']
         completed_process = subprocess.run(command,
-                                       shell = True, 
-                                       cwd = receptor_folder,
-                                       stdout = subprocess.PIPE, 
-                                       stderr = subprocess.PIPE, 
-                                       text = True)
+                                           shell=True,
+                                           cwd=receptor_folder,
+                                           stdout=subprocess.PIPE,
+                                           stderr=subprocess.PIPE,
+                                           text=True)
         if completed_process.returncode != 0:
             print("Receptor file not found")
             # return "Receptor file not found"
         receptor_file = completed_process.stdout[:-1]
-        
+
         receptor_file_path = receptor_folder + receptor_file
         receptor_name = receptor_file[4:(receptor_file.index('.') + 2)]
-        
+
         results_path = docking_pdb_path + receptor_name + '_' + ligand_name + '/'
         print(results_path)
 
@@ -635,12 +635,6 @@ def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str):
             return [None, results_path]
         receptor = Docker.create_receptor(receptor_name, receptor_file_path)
 
-        # for receptor_file in os.listdir(receptor_folder):
-        #     if receptor_file[0] != '.' and receptor_file[-4:] == '.pdb' and \
-        #             (receptor_name in receptor_file):
-        #         receptor_file_path = receptor_folder + receptor_file
-        #         receptor = Docker.create_receptor(receptor_name, receptor_file_path)
-
         # find ligand file and create ligand object
         ligand_folder = '/DATA/HEX_API/HEX_SELECTED_LIGANDS/'
         ligand_file_found = False
@@ -652,7 +646,7 @@ def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str):
                 ligand_file_found = True
                 ligand_file_path = ligand_folder + '/' + ligand_file
                 ligand = Ligand(ligand_name, ligand_file_path)
-            
+
         if not ligand_file_found:
             return "Ligand file not found"
 
@@ -736,6 +730,3 @@ def create_mapping_unfiltered(self, folder_path: str, results_path: str):
         with open(json_file, 'w') as file:
             file.write(json.dumps(mapped_sdf))
         return mapped_sdf
-
-# if __name__ == "__main__":
-#     Docker.start("AT3G22150", "801_Auxin", "/DATA/HEX_API/RESULTS/")
\ No newline at end of file

From be5d74abd0b0ad1086b83c0b456d7ed7d5d20bd9 Mon Sep 17 00:00:00 2001
From: Dien Nguyen <dnguyen@bar.utoronto.ca>
Date: Fri, 22 Mar 2024 14:18:17 -0400
Subject: [PATCH 18/35] Fix styling issues

---
 api/resources/snps.py | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/api/resources/snps.py b/api/resources/snps.py
index aa5616e..404b2a6 100755
--- a/api/resources/snps.py
+++ b/api/resources/snps.py
@@ -50,35 +50,23 @@
 
 @snps.route("/docking/<receptor>/<ligand>")
 class Docking(Resource):
-    decorators = [limiter.limit("2/minute")]    
+    decorators = [limiter.limit("2/minute")]
 
     @snps.param("receptor", _in="path", default="bri1")
     @snps.param("ligand", _in="path", default="brass")
     def get(self, receptor, ligand):
         receptor = escape(receptor)
         ligand = escape(ligand)
-
-        # TODO: Clean comments left by metyu before commit
-    
-        docking_pdb_link = "//bar.utoronto.ca/docking-pdbs/"
         docking_pdb_path = "/DATA/HEX_API/RESULTS/"
 
-        # TODO: Then add regex check to receptors/ligands (For Arabidopsis genes, simply reuse 
-        # is_arabidopsis_gene_valid; but you will need make regex check for your SDFs)
-        #Receptors can be adjusted please adjust the file format on the directories as well (sdf vs pdb)
-
         if not BARUtils.is_arabidopsis_gene_valid(receptor):
             return BARUtils.error_exit("Invalid arapbidopsis pdb gene id"), 400
-        
+
         matched = re.search("[a-z]", ligand)
         if matched is None:
             return BARUtils.error_exit("Invalid ligand name"), 400
 
-        docking_file_name = receptor.upper() + "-" + ligand.upper() + \
-                "-docking0001.pdb "
-        response = requests.get("https:" + docking_pdb_link + docking_file_name)
-
-        # Importing start function to initiate docking_utils  file
+        # start function to initiate docking_utils file
 
         final_json = Docker.start(receptor, ligand, docking_pdb_path)
         return BARUtils.success_exit(final_json)

From 203f5471f085b19f77842008eae1fb920c1d0b6f Mon Sep 17 00:00:00 2001
From: Dien Nguyen <dnguyen@bar.utoronto.ca>
Date: Fri, 22 Mar 2024 17:54:30 -0400
Subject: [PATCH 19/35] Fix test for testing docking that already exists

---
 tests/data/AT1G66340.1_6325_Ethylene/final.json | 1 +
 tests/utils/test_docking_utils.py               | 7 ++-----
 2 files changed, 3 insertions(+), 5 deletions(-)
 create mode 100644 tests/data/AT1G66340.1_6325_Ethylene/final.json

diff --git a/tests/data/AT1G66340.1_6325_Ethylene/final.json b/tests/data/AT1G66340.1_6325_Ethylene/final.json
new file mode 100644
index 0000000..e872899
--- /dev/null
+++ b/tests/data/AT1G66340.1_6325_Ethylene/final.json
@@ -0,0 +1 @@
+{"dummyjson": "true"}
\ No newline at end of file
diff --git a/tests/utils/test_docking_utils.py b/tests/utils/test_docking_utils.py
index a225f8f..0a6ce2d 100644
--- a/tests/utils/test_docking_utils.py
+++ b/tests/utils/test_docking_utils.py
@@ -71,14 +71,11 @@ def test_docking_exists(self):
         """Test that Docker.create_docking returns None when the docking
         already exists."""
 
-        receptor_name = "AT8G88888_complex"
+        receptor_name = "AT1G66340"
         ligand_name = "6325_Ethylene"
-        receptor_name_2 = "AT9G99999_monomer"
         results_path = "tests/data/"
         docking = Docker.create_docking(receptor_name, ligand_name, results_path)
-        docking2 = Docker.create_docking(receptor_name_2, ligand_name, results_path)
-        self.assertEqual(docking, None)
-        self.assertEqual(docking2, None)
+        self.assertEqual(docking[0], None)
 
 
 class TestDockingClass(unittest.TestCase):

From 0e4f104adab2f342e6339347534ee286c6784daf Mon Sep 17 00:00:00 2001
From: Dien Nguyen <dnguyen@bar.utoronto.ca>
Date: Thu, 28 Mar 2024 13:41:25 -0400
Subject: [PATCH 20/35] Make changes to test file to skip in gitbuh environment

Change json results when receptor or ligand not found
---
 api/resources/snps.py             | 7 ++++++-
 api/utils/docking_utils.py        | 4 ++--
 tests/utils/test_docking_utils.py | 2 ++
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/api/resources/snps.py b/api/resources/snps.py
index 404b2a6..112c11d 100755
--- a/api/resources/snps.py
+++ b/api/resources/snps.py
@@ -69,7 +69,12 @@ def get(self, receptor, ligand):
         # start function to initiate docking_utils file
 
         final_json = Docker.start(receptor, ligand, docking_pdb_path)
-        return BARUtils.success_exit(final_json)
+        if final_json == "Receptor file not found":
+            return BARUtils.error_exit("There are no data found for the given gene"), 400
+        elif final_json == "Ligand file not found":
+            return BARUtils.error_exit("There are no data found for the given ligand"), 400
+        else:
+            return BARUtils.success_exit(final_json)
 
 
 @snps.route("/phenix/<fixed_pdb>/<moving_pdb>")
diff --git a/api/utils/docking_utils.py b/api/utils/docking_utils.py
index 15329f8..3f64782 100755
--- a/api/utils/docking_utils.py
+++ b/api/utils/docking_utils.py
@@ -618,9 +618,9 @@ def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str):
                                            stdout=subprocess.PIPE,
                                            stderr=subprocess.PIPE,
                                            text=True)
+        print("return code" + str(completed_process.returncode))
         if completed_process.returncode != 0:
-            print("Receptor file not found")
-            # return "Receptor file not found"
+            return "Receptor file not found"
         receptor_file = completed_process.stdout[:-1]
 
         receptor_file_path = receptor_folder + receptor_file
diff --git a/tests/utils/test_docking_utils.py b/tests/utils/test_docking_utils.py
index 0a6ce2d..0bc10e0 100644
--- a/tests/utils/test_docking_utils.py
+++ b/tests/utils/test_docking_utils.py
@@ -1,4 +1,5 @@
 import unittest
+import pytest
 from api.utils.docking_utils import Receptor, ComplexReceptor, MonomerReceptor
 from api.utils.docking_utils import Ligand
 from api.utils.docking_utils import Docker
@@ -67,6 +68,7 @@ def test_create_complex_receptor(self):
         self.assertEqual(receptor.monomers_list, ["A", "B"])
         self.assertEqual(receptor.line_numbers, [[48, 180], [181, 195]])
 
+    @pytest.mark.integration
     def test_docking_exists(self):
         """Test that Docker.create_docking returns None when the docking
         already exists."""

From 88b92f0e94e101650c05ef25c4451826dd613793 Mon Sep 17 00:00:00 2001
From: Dien Nguyen <dnguyen@bar.utoronto.ca>
Date: Thu, 28 Mar 2024 17:37:18 -0400
Subject: [PATCH 21/35] Add code to skip test in CI

---
 tests/utils/test_docking_utils.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/utils/test_docking_utils.py b/tests/utils/test_docking_utils.py
index 0bc10e0..105bb68 100644
--- a/tests/utils/test_docking_utils.py
+++ b/tests/utils/test_docking_utils.py
@@ -4,7 +4,9 @@
 from api.utils.docking_utils import Ligand
 from api.utils.docking_utils import Docker
 from api.utils.docking_utils import MonomerDocking, ComplexDocking
+import os
 
+IN_CI = os.getenv("CI") == "true"
 
 class TestReceptorClasses(unittest.TestCase):
 
@@ -68,7 +70,7 @@ def test_create_complex_receptor(self):
         self.assertEqual(receptor.monomers_list, ["A", "B"])
         self.assertEqual(receptor.line_numbers, [[48, 180], [181, 195]])
 
-    @pytest.mark.integration
+    @pytest.mark.skipif(IN_CI, reason = "Doesn't work in Github CI")
     def test_docking_exists(self):
         """Test that Docker.create_docking returns None when the docking
         already exists."""

From 1a20055fc1a0e6fcce3035d63ba2f2341e67093b Mon Sep 17 00:00:00 2001
From: Dien Nguyen <dnguyen@bar.utoronto.ca>
Date: Thu, 28 Mar 2024 17:42:46 -0400
Subject: [PATCH 22/35] Fix code to skip test if not running on BAR

---
 tests/utils/test_docking_utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/utils/test_docking_utils.py b/tests/utils/test_docking_utils.py
index 105bb68..9b97d89 100644
--- a/tests/utils/test_docking_utils.py
+++ b/tests/utils/test_docking_utils.py
@@ -6,7 +6,7 @@
 from api.utils.docking_utils import MonomerDocking, ComplexDocking
 import os
 
-IN_CI = os.getenv("CI") == "true"
+NOT_IN_BAR = not os.environ.get("BAR") == "true"
 
 class TestReceptorClasses(unittest.TestCase):
 
@@ -70,7 +70,7 @@ def test_create_complex_receptor(self):
         self.assertEqual(receptor.monomers_list, ["A", "B"])
         self.assertEqual(receptor.line_numbers, [[48, 180], [181, 195]])
 
-    @pytest.mark.skipif(IN_CI, reason = "Doesn't work in Github CI")
+    @pytest.mark.skipif(NOT_IN_BAR, reason = "Only works on BAR")
     def test_docking_exists(self):
         """Test that Docker.create_docking returns None when the docking
         already exists."""

From 6853a159c6b4a23a6368dba4146d4951baadd109 Mon Sep 17 00:00:00 2001
From: Dien Nguyen <dnguyen@bar.utoronto.ca>
Date: Thu, 28 Mar 2024 18:00:07 -0400
Subject: [PATCH 23/35] Fix styling

---
 tests/utils/test_docking_utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/utils/test_docking_utils.py b/tests/utils/test_docking_utils.py
index 9b97d89..610e457 100644
--- a/tests/utils/test_docking_utils.py
+++ b/tests/utils/test_docking_utils.py
@@ -8,6 +8,7 @@
 
 NOT_IN_BAR = not os.environ.get("BAR") == "true"
 
+
 class TestReceptorClasses(unittest.TestCase):
 
     def test_monomer_receptor_init(self):
@@ -70,7 +71,7 @@ def test_create_complex_receptor(self):
         self.assertEqual(receptor.monomers_list, ["A", "B"])
         self.assertEqual(receptor.line_numbers, [[48, 180], [181, 195]])
 
-    @pytest.mark.skipif(NOT_IN_BAR, reason = "Only works on BAR")
+    @pytest.mark.skipif(NOT_IN_BAR, reason="Only works on BAR")
     def test_docking_exists(self):
         """Test that Docker.create_docking returns None when the docking
         already exists."""

From dbd55fcf703a5fdafa83775651a4fdb83c11f089 Mon Sep 17 00:00:00 2001
From: Dien Nguyen <dnguyen@bar.utoronto.ca>
Date: Thu, 4 Apr 2024 09:56:50 -0400
Subject: [PATCH 24/35] Add tests for SDFMapping

---
 api/utils/docking_utils.py                    |   7 +-
 .../filtered/443453_Gibberellin_A15.sdf       | 266 ++++++++++++++
 .../filtered/5984_D-(-)-Fructose.sdf          | 297 ++++++++++++++++
 .../filtered/73672_isoxaben.sdf               | 332 ++++++++++++++++++
 .../sample_ligands/filtered/801_Auxin.sdf     | 183 ++++++++++
 .../sample_ligands/unfiltered/103061392.sdf   |  36 ++
 .../sample_ligands/unfiltered/134970870.sdf   | 107 ++++++
 .../sample_ligands/unfiltered/135191341.sdf   | 105 ++++++
 .../sample_ligands/unfiltered/135355153.sdf   |  41 +++
 tests/utils/test_docking_utils.py             |  61 +++-
 10 files changed, 1429 insertions(+), 6 deletions(-)
 create mode 100644 tests/data/sample_ligands/filtered/443453_Gibberellin_A15.sdf
 create mode 100644 tests/data/sample_ligands/filtered/5984_D-(-)-Fructose.sdf
 create mode 100644 tests/data/sample_ligands/filtered/73672_isoxaben.sdf
 create mode 100644 tests/data/sample_ligands/filtered/801_Auxin.sdf
 create mode 100644 tests/data/sample_ligands/unfiltered/103061392.sdf
 create mode 100644 tests/data/sample_ligands/unfiltered/134970870.sdf
 create mode 100644 tests/data/sample_ligands/unfiltered/135191341.sdf
 create mode 100644 tests/data/sample_ligands/unfiltered/135355153.sdf

diff --git a/api/utils/docking_utils.py b/api/utils/docking_utils.py
index 3f64782..e2c6f4e 100755
--- a/api/utils/docking_utils.py
+++ b/api/utils/docking_utils.py
@@ -703,7 +703,6 @@ def create_mapping_filtered(folder_path: str, results_path: str):
             if file[0] != "." and file[-4:] == ".sdf":
                 name = file[file.index("_") + 1:-4]
                 mapped_sdf.append({'value': file, 'text': name})
-                # mapped_sdf[name] = file
         json_file = results_path + "sdf_mapping_filtered.json"
         with open(json_file, 'w') as file:
             file.write(json.dumps(mapped_sdf))
@@ -719,13 +718,13 @@ def create_mapping_unfiltered(self, folder_path: str, results_path: str):
         folder_path: where the sdf files are stored
         results_path: where the json file should be created
         """
-        mapped_sdf = {}
+        mapped_sdf = []
         sdf_files = os.listdir(folder_path)
         for file in sdf_files:
             if file[0] != "." and file[-4:] == ".sdf":
                 names = self.get_substance_name(file, folder_path)
-                sdf_number = file.split(".")[0]
-                mapped_sdf[sdf_number] = ",".join(names)
+                all_names = ",".join(names)
+                mapped_sdf.append({'value': file, 'text': all_names})
         json_file = results_path + "sdf_mapping_unfiltered.json"
         with open(json_file, 'w') as file:
             file.write(json.dumps(mapped_sdf))
diff --git a/tests/data/sample_ligands/filtered/443453_Gibberellin_A15.sdf b/tests/data/sample_ligands/filtered/443453_Gibberellin_A15.sdf
new file mode 100644
index 0000000..1cede6e
--- /dev/null
+++ b/tests/data/sample_ligands/filtered/443453_Gibberellin_A15.sdf
@@ -0,0 +1,266 @@
+443453
+  -OEChem-03192020593D
+
+ 53 56  0     1  0  0  0  0  0999 V2000
+    0.1236    3.0990    0.8730 O   0  0  0  0  0  0  0  0  0  0  0  0
+   -0.3854   -3.4237    0.8105 O   0  0  0  0  0  0  0  0  0  0  0  0
+    0.1809   -2.9472   -1.3454 O   0  0  0  0  0  0  0  0  0  0  0  0
+    2.3584   -0.9329    1.9903 O   0  0  0  0  0  0  0  0  0  0  0  0
+    3.7768    0.7463    1.3864 O   0  0  0  0  0  0  0  0  0  0  0  0
+   -1.3538   -0.4319    0.0963 C   0  0  1  0  0  0  0  0  0  0  0  0
+   -1.0105    0.8566   -0.7318 C   0  0  2  0  0  0  0  0  0  0  0  0
+    0.4869    1.1322   -0.4269 C   0  0  1  0  0  0  0  0  0  0  0  0
+    1.0193   -0.3273   -0.5291 C   0  0  2  0  0  0  0  0  0  0  0  0
+    0.0074   -1.1685    0.2825 C   0  0  2  0  0  0  0  0  0  0  0  0
+   -2.0293   -0.1097    1.4402 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.5154   -1.1885   -0.5954 C   0  0  0  0  0  0  0  0  0  0  0  0
+    2.5430   -0.4670   -0.3113 C   0  0  2  0  0  0  0  0  0  0  0  0
+   -3.3341    0.5290    0.9785 C   0  0  1  0  0  0  0  0  0  0  0  0
+   -2.0059    2.0256   -0.6338 C   0  0  0  0  0  0  0  0  0  0  0  0
+    1.1925    1.9736   -1.5082 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -3.0181    1.9563    0.5166 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -3.7200   -0.3852   -0.1626 C   0  0  0  0  0  0  0  0  0  0  0  0
+    0.6814    1.8077    0.9486 C   0  0  0  0  0  0  0  0  0  0  0  0
+    3.2622    0.4602   -1.3279 C   0  0  0  0  0  0  0  0  0  0  0  0
+    2.7218    1.8959   -1.3938 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -0.0462   -2.5866   -0.2001 C   0  0  0  0  0  0  0  0  0  0  0  0
+    3.0264   -1.9089   -0.5428 C   0  0  0  0  0  0  0  0  0  0  0  0
+    2.9767   -0.1289    1.0910 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -4.9386   -0.4761   -0.7046 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -1.0369    0.5646   -1.7951 H   0  0  0  0  0  0  0  0  0  0  0  0
+    0.8790   -0.6259   -1.5832 H   0  0  0  0  0  0  0  0  0  0  0  0
+    0.2566   -1.1727    1.3441 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -1.4833    0.5460    2.1147 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.2272   -1.0338    2.0020 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.6237   -2.2082   -0.2092 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.4083   -1.2280   -1.6838 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -4.0782    0.5310    1.7815 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.6097    1.9810   -1.5537 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -1.5388    3.0110   -0.7017 H   0  0  0  0  0  0  0  0  0  0  0  0
+    0.8704    3.0201   -1.5008 H   0  0  0  0  0  0  0  0  0  0  0  0
+    0.9153    1.5956   -2.5018 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.6222    2.5239    1.3681 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -3.9411    2.4664    0.2142 H   0  0  0  0  0  0  0  0  0  0  0  0
+    1.7267    1.9624    1.2042 H   0  0  0  0  0  0  0  0  0  0  0  0
+    0.2548    1.2701    1.7884 H   0  0  0  0  0  0  0  0  0  0  0  0
+    3.1601    0.0269   -2.3328 H   0  0  0  0  0  0  0  0  0  0  0  0
+    4.3408    0.4975   -1.1273 H   0  0  0  0  0  0  0  0  0  0  0  0
+    3.0569    2.4587   -0.5161 H   0  0  0  0  0  0  0  0  0  0  0  0
+    3.1681    2.4021   -2.2586 H   0  0  0  0  0  0  0  0  0  0  0  0
+    2.6031   -2.6245    0.1690 H   0  0  0  0  0  0  0  0  0  0  0  0
+    2.7809   -2.2519   -1.5539 H   0  0  0  0  0  0  0  0  0  0  0  0
+    4.1167   -1.9748   -0.4362 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -5.7590    0.1327   -0.3404 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -5.1367   -1.1604   -1.5223 H   0  0  0  0  0  0  0  0  0  0  0  0
+    0.7990    3.7041    0.5229 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -0.4280   -4.3576    0.5136 H   0  0  0  0  0  0  0  0  0  0  0  0
+    2.6260   -0.7265    2.9110 H   0  0  0  0  0  0  0  0  0  0  0  0
+  1 19  1  0  0  0  0
+  1 51  1  0  0  0  0
+  2 22  1  0  0  0  0
+  2 52  1  0  0  0  0
+  3 22  2  0  0  0  0
+  4 24  1  0  0  0  0
+  4 53  1  0  0  0  0
+  5 24  2  0  0  0  0
+  6  7  1  0  0  0  0
+  6 10  1  0  0  0  0
+  6 11  1  0  0  0  0
+  6 12  1  0  0  0  0
+  7  8  1  0  0  0  0
+  7 15  1  0  0  0  0
+  7 26  1  0  0  0  0
+  8  9  1  0  0  0  0
+  8 16  1  0  0  0  0
+  8 19  1  0  0  0  0
+  9 10  1  0  0  0  0
+  9 13  1  0  0  0  0
+  9 27  1  0  0  0  0
+ 10 22  1  0  0  0  0
+ 10 28  1  0  0  0  0
+ 11 14  1  0  0  0  0
+ 11 29  1  0  0  0  0
+ 11 30  1  0  0  0  0
+ 12 18  1  0  0  0  0
+ 12 31  1  0  0  0  0
+ 12 32  1  0  0  0  0
+ 13 20  1  0  0  0  0
+ 13 23  1  0  0  0  0
+ 13 24  1  0  0  0  0
+ 14 17  1  0  0  0  0
+ 14 18  1  0  0  0  0
+ 14 33  1  0  0  0  0
+ 15 17  1  0  0  0  0
+ 15 34  1  0  0  0  0
+ 15 35  1  0  0  0  0
+ 16 21  1  0  0  0  0
+ 16 36  1  0  0  0  0
+ 16 37  1  0  0  0  0
+ 17 38  1  0  0  0  0
+ 17 39  1  0  0  0  0
+ 18 25  2  0  0  0  0
+ 19 40  1  0  0  0  0
+ 19 41  1  0  0  0  0
+ 20 21  1  0  0  0  0
+ 20 42  1  0  0  0  0
+ 20 43  1  0  0  0  0
+ 21 44  1  0  0  0  0
+ 21 45  1  0  0  0  0
+ 23 46  1  0  0  0  0
+ 23 47  1  0  0  0  0
+ 23 48  1  0  0  0  0
+ 25 49  1  0  0  0  0
+ 25 50  1  0  0  0  0
+M  END
+> <PUBCHEM_COMPOUND_CID>
+443453
+
+> <PUBCHEM_CONFORMER_RMSD>
+0.8
+
+> <PUBCHEM_CONFORMER_DIVERSEORDER>
+1
+
+> <PUBCHEM_MMFF94_PARTIAL_CHARGES>
+19
+1 -0.68
+10 0.06
+12 0.14
+13 0.06
+14 0.14
+18 -0.28
+19 0.28
+2 -0.65
+22 0.66
+24 0.66
+25 -0.3
+3 -0.57
+4 -0.65
+49 0.15
+5 -0.57
+50 0.15
+51 0.4
+52 0.5
+53 0.5
+
+> <PUBCHEM_EFFECTIVE_ROTOR_COUNT>
+4.8
+
+> <PUBCHEM_PHARMACOPHORE_FEATURES>
+11
+1 1 acceptor
+1 1 donor
+1 2 acceptor
+1 3 acceptor
+1 4 acceptor
+1 5 acceptor
+3 2 3 22 anion
+3 4 5 24 anion
+5 6 7 8 9 10 rings
+6 8 9 13 16 20 21 rings
+8 6 7 11 12 14 15 17 18 rings
+
+> <PUBCHEM_HEAVY_ATOM_COUNT>
+25
+
+> <PUBCHEM_ATOM_DEF_STEREO_COUNT>
+7
+
+> <PUBCHEM_ATOM_UDEF_STEREO_COUNT>
+0
+
+> <PUBCHEM_BOND_DEF_STEREO_COUNT>
+0
+
+> <PUBCHEM_BOND_UDEF_STEREO_COUNT>
+0
+
+> <PUBCHEM_ISOTOPIC_ATOM_COUNT>
+0
+
+> <PUBCHEM_COMPONENT_COUNT>
+1
+
+> <PUBCHEM_CACTVS_TAUTO_COUNT>
+1
+
+> <PUBCHEM_CONFORMER_ID>
+0006C43D00000001
+
+> <PUBCHEM_MMFF94_ENERGY>
+109.8608
+
+> <PUBCHEM_FEATURE_SELFOVERLAP>
+56.05
+
+> <PUBCHEM_SHAPE_FINGERPRINT>
+10863032 1 17775002341051126289
+10967382 1 18337399321749125231
+11132069 177 18410571760632776393
+11578080 2 17701800121260520545
+12011746 2 18271238417445912848
+12553582 1 18340217378952578125
+12592029 89 18260545624169624827
+12633257 1 18125411302951212400
+13140716 1 18267019453643120282
+13172582 1 18408322181248669898
+13224815 77 18408325466861773613
+13538477 17 18042969963765403390
+13583140 156 16988268769054638501
+14178342 30 18194960747293312025
+14787075 74 17773863363032392001
+15309172 13 18335713749033407887
+16752209 62 18408610240252119924
+16945 1 17822014194095494282
+17349148 13 18408877426284028311
+17492 54 18261966265710939357
+1813 80 17895468108494128074
+18186145 218 17632583720907680458
+192875 21 18408877456327615965
+20028762 73 17915180341888784015
+20600515 1 18341608244214032320
+20691752 17 18272943729620618673
+20715895 44 17970892381477415541
+20739085 24 17971504886336515381
+20905425 154 17982735168883945983
+2334 1 18411142424152217716
+23419403 2 17684601663405341276
+23559900 14 18199188395620067732
+2748010 2 17253711577657852757
+3286 77 17560798775942189230
+34934 24 18042688493083725276
+352729 6 18265064698321524087
+394222 165 17896054272508719104
+474 4 18335149661193390307
+484985 159 14682784046149967332
+70251023 43 17909274585694312047
+90525 40 18335427850166549029
+
+> <PUBCHEM_SHAPE_MULTIPOLES>
+485.14
+6.36
+3.04
+1.47
+2.87
+0.9
+0.26
+-0.26
+-0.16
+-0.37
+-0.24
+-0.77
+-0.18
+-0.53
+
+> <PUBCHEM_SHAPE_SELFOVERLAP>
+1060.249
+
+> <PUBCHEM_SHAPE_VOLUME>
+262.2
+
+> <PUBCHEM_COORDINATE_TYPE>
+2
+5
+10
+
+$$$$
diff --git a/tests/data/sample_ligands/filtered/5984_D-(-)-Fructose.sdf b/tests/data/sample_ligands/filtered/5984_D-(-)-Fructose.sdf
new file mode 100644
index 0000000..46fa7c4
--- /dev/null
+++ b/tests/data/sample_ligands/filtered/5984_D-(-)-Fructose.sdf
@@ -0,0 +1,297 @@
+5984
+  -OEChem-03192014583D
+
+ 24 23  0     1  0  0  0  0  0999 V2000
+   -0.3508    0.2852    1.6168 O   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.1229   -0.0967   -1.5767 O   0  0  0  0  0  0  0  0  0  0  0  0
+    0.3999   -1.9832    0.0163 O   0  0  0  0  0  0  0  0  0  0  0  0
+   -4.2737    0.3477    0.1329 O   0  0  0  0  0  0  0  0  0  0  0  0
+    2.7917   -1.0026    0.5184 O   0  0  0  0  0  0  0  0  0  0  0  0
+    3.8346    1.3519   -0.1667 O   0  0  0  0  0  0  0  0  0  0  0  0
+   -0.4980    0.2331    0.2008 C   0  0  2  0  0  0  0  0  0  0  0  0
+   -1.9435   -0.1555   -0.1619 C   0  0  1  0  0  0  0  0  0  0  0  0
+    0.5984   -0.6447   -0.4216 C   0  0  1  0  0  0  0  0  0  0  0  0
+   -2.9638    0.7835    0.4823 C   0  0  0  0  0  0  0  0  0  0  0  0
+    2.0330   -0.2421   -0.0847 C   0  0  0  0  0  0  0  0  0  0  0  0
+    2.4951    1.1234   -0.5560 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -0.3271    1.2533   -0.1665 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.1596   -1.1825    0.1530 H   0  0  0  0  0  0  0  0  0  0  0  0
+    0.5128   -0.6492   -1.5135 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.8919    0.7765    1.5733 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.8443    1.8081    0.1148 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -0.7717    1.1026    1.9320 H   0  0  0  0  0  0  0  0  0  0  0  0
+    2.4381    1.1623   -1.6468 H   0  0  0  0  0  0  0  0  0  0  0  0
+    1.8696    1.9070   -0.1228 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -1.9237    0.8101   -1.8659 H   0  0  0  0  0  0  0  0  0  0  0  0
+    0.4087   -1.9817    0.9890 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -4.8996    0.9640    0.5499 H   0  0  0  0  0  0  0  0  0  0  0  0
+    3.8537    1.3937    0.8047 H   0  0  0  0  0  0  0  0  0  0  0  0
+  1  7  1  0  0  0  0
+  1 18  1  0  0  0  0
+  2  8  1  0  0  0  0
+  2 21  1  0  0  0  0
+  3  9  1  0  0  0  0
+  3 22  1  0  0  0  0
+  4 10  1  0  0  0  0
+  4 23  1  0  0  0  0
+  5 11  2  0  0  0  0
+  6 12  1  0  0  0  0
+  6 24  1  0  0  0  0
+  7  8  1  0  0  0  0
+  7  9  1  0  0  0  0
+  7 13  1  0  0  0  0
+  8 10  1  0  0  0  0
+  8 14  1  0  0  0  0
+  9 11  1  0  0  0  0
+  9 15  1  0  0  0  0
+ 10 16  1  0  0  0  0
+ 10 17  1  0  0  0  0
+ 11 12  1  0  0  0  0
+ 12 19  1  0  0  0  0
+ 12 20  1  0  0  0  0
+M  END
+> <PUBCHEM_COMPOUND_CID>
+5984
+
+> <PUBCHEM_CONFORMER_RMSD>
+0.6
+
+> <PUBCHEM_CONFORMER_DIVERSEORDER>
+1
+113
+63
+77
+90
+29
+4
+106
+81
+93
+21
+64
+52
+13
+83
+56
+43
+8
+42
+66
+103
+107
+5
+100
+94
+3
+50
+31
+73
+86
+47
+17
+23
+20
+91
+110
+10
+14
+22
+61
+104
+7
+97
+85
+105
+45
+44
+95
+6
+59
+69
+87
+70
+16
+74
+41
+78
+33
+99
+46
+112
+114
+49
+12
+25
+51
+101
+89
+109
+18
+35
+28
+96
+2
+62
+27
+57
+108
+65
+36
+39
+72
+9
+98
+79
+55
+84
+58
+30
+88
+24
+11
+102
+38
+32
+34
+26
+37
+71
+40
+19
+48
+82
+53
+80
+67
+60
+54
+15
+111
+68
+76
+92
+75
+
+> <PUBCHEM_MMFF94_PARTIAL_CHARGES>
+17
+1 -0.68
+10 0.28
+11 0.45
+12 0.34
+18 0.4
+2 -0.68
+21 0.4
+22 0.4
+23 0.4
+24 0.4
+3 -0.68
+4 -0.68
+5 -0.57
+6 -0.68
+7 0.28
+8 0.28
+9 0.34
+
+> <PUBCHEM_EFFECTIVE_ROTOR_COUNT>
+5
+
+> <PUBCHEM_PHARMACOPHORE_FEATURES>
+11
+1 1 acceptor
+1 1 donor
+1 2 acceptor
+1 2 donor
+1 3 acceptor
+1 3 donor
+1 4 acceptor
+1 4 donor
+1 5 acceptor
+1 6 acceptor
+1 6 donor
+
+> <PUBCHEM_HEAVY_ATOM_COUNT>
+12
+
+> <PUBCHEM_ATOM_DEF_STEREO_COUNT>
+3
+
+> <PUBCHEM_ATOM_UDEF_STEREO_COUNT>
+0
+
+> <PUBCHEM_BOND_DEF_STEREO_COUNT>
+0
+
+> <PUBCHEM_BOND_UDEF_STEREO_COUNT>
+0
+
+> <PUBCHEM_ISOTOPIC_ATOM_COUNT>
+0
+
+> <PUBCHEM_COMPONENT_COUNT>
+1
+
+> <PUBCHEM_CACTVS_TAUTO_COUNT>
+6
+
+> <PUBCHEM_CONFORMER_ID>
+0000176000000001
+
+> <PUBCHEM_MMFF94_ENERGY>
+14.8267
+
+> <PUBCHEM_FEATURE_SELFOVERLAP>
+55.858
+
+> <PUBCHEM_SHAPE_FINGERPRINT>
+10219947 1 18259706679105643821
+10857977 72 15647061456026330781
+12251169 10 14346077581156446260
+12932764 1 17385438795719614274
+14325111 11 18409451396669739967
+15310529 11 18341612560571437217
+15775835 57 18343023310977726820
+170605 34 18341619195779048951
+18186145 218 18411136926699554526
+20645464 45 16660360411259445224
+20645476 183 17703790348838210055
+20711985 344 13253979848264163674
+20871999 31 16271639109051498853
+21119208 17 17060347326849160756
+21293036 1 16917071070392623369
+21499 59 18410854390739347150
+228727 97 17489598835037837440
+23211744 41 17385720270780590995
+23402539 116 16515958268129347903
+23552423 10 17773044050770881839
+5084963 1 18059013874308216274
+528886 8 18411135861458162993
+57812782 119 16515402967153822125
+
+> <PUBCHEM_SHAPE_MULTIPOLES>
+211.74
+6.02
+1.28
+1.02
+1.45
+0.31
+0
+-2.26
+-0.19
+-0.91
+0
+0.36
+-0.04
+-0.43
+
+> <PUBCHEM_SHAPE_SELFOVERLAP>
+401.786
+
+> <PUBCHEM_SHAPE_VOLUME>
+129.9
+
+> <PUBCHEM_COORDINATE_TYPE>
+2
+5
+10
+
+$$$$
diff --git a/tests/data/sample_ligands/filtered/73672_isoxaben.sdf b/tests/data/sample_ligands/filtered/73672_isoxaben.sdf
new file mode 100644
index 0000000..0cbc2b4
--- /dev/null
+++ b/tests/data/sample_ligands/filtered/73672_isoxaben.sdf
@@ -0,0 +1,332 @@
+73672
+  -OEChem-01292022313D
+
+ 48 49  0     0  0  0  0  0  0999 V2000
+    1.5815   -0.0149   -1.9046 O   0  0  0  0  0  0  0  0  0  0  0  0
+   -1.0163    0.0226    1.5313 O   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.8096   -2.4008    0.0430 O   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.7579    2.3983   -0.0157 O   0  0  0  0  0  0  0  0  0  0  0  0
+    2.9484   -0.0044   -1.6550 N   0  0  0  0  0  0  0  0  0  0  0  0
+   -0.4548   -0.0353   -0.7579 N   0  0  0  0  0  0  0  0  0  0  0  0
+    4.3874   -0.0002    0.3050 C   0  0  0  0  0  0  0  0  0  0  0  0
+    5.2162   -1.2511   -0.1539 C   0  0  0  0  0  0  0  0  0  0  0  0
+    5.1981    1.2668   -0.1418 C   0  0  0  0  0  0  0  0  0  0  0  0
+    2.9913   -0.0073   -0.3541 C   0  0  0  0  0  0  0  0  0  0  0  0
+    4.3418   -0.0076    1.8550 C   0  0  0  0  0  0  0  0  0  0  0  0
+    4.5852   -2.6088    0.1428 C   0  0  0  0  0  0  0  0  0  0  0  0
+    4.6001    2.6119    0.2617 C   0  0  0  0  0  0  0  0  0  0  0  0
+    1.8038   -0.0189    0.3139 C   0  0  0  0  0  0  0  0  0  0  0  0
+    0.9104   -0.0234   -0.7245 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -1.3267   -0.0121    0.3457 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.7765   -0.0013    0.0151 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -3.4417   -1.2039   -0.1184 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -3.4156    1.2117   -0.1483 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -4.8018   -1.1931   -0.4280 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -4.7756    1.2226   -0.4577 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -5.4687    0.0202   -0.5976 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.7911   -2.9509    1.3584 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.7280    2.9798    1.2859 C   0  0  0  0  0  0  0  0  0  0  0  0
+    5.3947   -1.1959   -1.2362 H   0  0  0  0  0  0  0  0  0  0  0  0
+    6.2082   -1.2246    0.3161 H   0  0  0  0  0  0  0  0  0  0  0  0
+    6.2145    1.2125    0.2704 H   0  0  0  0  0  0  0  0  0  0  0  0
+    5.3162    1.2638   -1.2335 H   0  0  0  0  0  0  0  0  0  0  0  0
+    3.7968    0.8551    2.2526 H   0  0  0  0  0  0  0  0  0  0  0  0
+    5.3508    0.0103    2.2822 H   0  0  0  0  0  0  0  0  0  0  0  0
+    3.8332   -0.8963    2.2434 H   0  0  0  0  0  0  0  0  0  0  0  0
+    5.2276   -3.4060   -0.2462 H   0  0  0  0  0  0  0  0  0  0  0  0
+    4.4723   -2.7775    1.2170 H   0  0  0  0  0  0  0  0  0  0  0  0
+    3.6058   -2.7158   -0.3320 H   0  0  0  0  0  0  0  0  0  0  0  0
+    4.5901    2.7419    1.3473 H   0  0  0  0  0  0  0  0  0  0  0  0
+    5.2044    3.4239   -0.1564 H   0  0  0  0  0  0  0  0  0  0  0  0
+    3.5802    2.7318   -0.1145 H   0  0  0  0  0  0  0  0  0  0  0  0
+    1.5877   -0.0248    1.3710 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -0.8640   -0.0470   -1.6888 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -5.3482   -2.1257   -0.5393 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -5.3019    2.1637   -0.5921 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -6.5275    0.0286   -0.8389 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.1678   -3.8491    1.3445 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -3.8037   -3.2337    1.6632 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.3693   -2.2449    2.0797 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.0576    3.8432    1.2592 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -3.7287    3.3237    1.5662 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.3565    2.2725    2.0328 H   0  0  0  0  0  0  0  0  0  0  0  0
+  1  5  1  0  0  0  0
+  1 15  1  0  0  0  0
+  2 16  2  0  0  0  0
+  3 18  1  0  0  0  0
+  3 23  1  0  0  0  0
+  4 19  1  0  0  0  0
+  4 24  1  0  0  0  0
+  5 10  2  0  0  0  0
+  6 15  1  0  0  0  0
+  6 16  1  0  0  0  0
+  6 39  1  0  0  0  0
+  7  8  1  0  0  0  0
+  7  9  1  0  0  0  0
+  7 10  1  0  0  0  0
+  7 11  1  0  0  0  0
+  8 12  1  0  0  0  0
+  8 25  1  0  0  0  0
+  8 26  1  0  0  0  0
+  9 13  1  0  0  0  0
+  9 27  1  0  0  0  0
+  9 28  1  0  0  0  0
+ 10 14  1  0  0  0  0
+ 11 29  1  0  0  0  0
+ 11 30  1  0  0  0  0
+ 11 31  1  0  0  0  0
+ 12 32  1  0  0  0  0
+ 12 33  1  0  0  0  0
+ 12 34  1  0  0  0  0
+ 13 35  1  0  0  0  0
+ 13 36  1  0  0  0  0
+ 13 37  1  0  0  0  0
+ 14 15  2  0  0  0  0
+ 14 38  1  0  0  0  0
+ 16 17  1  0  0  0  0
+ 17 18  2  0  0  0  0
+ 17 19  1  0  0  0  0
+ 18 20  1  0  0  0  0
+ 19 21  2  0  0  0  0
+ 20 22  2  0  0  0  0
+ 20 40  1  0  0  0  0
+ 21 22  1  0  0  0  0
+ 21 41  1  0  0  0  0
+ 22 42  1  0  0  0  0
+ 23 43  1  0  0  0  0
+ 23 44  1  0  0  0  0
+ 23 45  1  0  0  0  0
+ 24 46  1  0  0  0  0
+ 24 47  1  0  0  0  0
+ 24 48  1  0  0  0  0
+M  END
+> <PUBCHEM_COMPOUND_CID>
+73672
+
+> <PUBCHEM_CONFORMER_RMSD>
+0.8
+
+> <PUBCHEM_CONFORMER_DIVERSEORDER>
+1
+14
+60
+56
+31
+22
+58
+64
+63
+7
+13
+42
+27
+9
+57
+29
+12
+51
+61
+66
+33
+59
+39
+23
+25
+36
+32
+62
+28
+10
+18
+41
+65
+26
+24
+15
+35
+11
+5
+8
+45
+30
+47
+44
+52
+21
+4
+16
+20
+3
+46
+48
+54
+2
+55
+49
+6
+50
+19
+38
+53
+68
+17
+37
+43
+34
+70
+67
+69
+40
+
+> <PUBCHEM_MMFF94_PARTIAL_CHARGES>
+24
+1 -0.02
+10 0.11
+14 -0.15
+15 0.2
+16 0.54
+17 0.09
+18 0.08
+19 0.08
+2 -0.57
+20 -0.15
+21 -0.15
+22 -0.15
+23 0.28
+24 0.28
+3 -0.36
+38 0.15
+39 0.37
+4 -0.36
+40 0.15
+41 0.15
+42 0.15
+5 -0.41
+6 -0.49
+7 0.18
+
+> <PUBCHEM_EFFECTIVE_ROTOR_COUNT>
+8
+
+> <PUBCHEM_PHARMACOPHORE_FEATURES>
+10
+1 11 hydrophobe
+1 12 hydrophobe
+1 13 hydrophobe
+1 2 acceptor
+1 3 acceptor
+1 4 acceptor
+1 5 acceptor
+1 6 donor
+5 1 5 10 14 15 rings
+6 17 18 19 20 21 22 rings
+
+> <PUBCHEM_HEAVY_ATOM_COUNT>
+24
+
+> <PUBCHEM_ATOM_DEF_STEREO_COUNT>
+0
+
+> <PUBCHEM_ATOM_UDEF_STEREO_COUNT>
+0
+
+> <PUBCHEM_BOND_DEF_STEREO_COUNT>
+0
+
+> <PUBCHEM_BOND_UDEF_STEREO_COUNT>
+0
+
+> <PUBCHEM_ISOTOPIC_ATOM_COUNT>
+0
+
+> <PUBCHEM_COMPONENT_COUNT>
+1
+
+> <PUBCHEM_CACTVS_TAUTO_COUNT>
+4
+
+> <PUBCHEM_CONFORMER_ID>
+00011FC800000001
+
+> <PUBCHEM_MMFF94_ENERGY>
+85.2359
+
+> <PUBCHEM_FEATURE_SELFOVERLAP>
+50.748
+
+> <PUBCHEM_SHAPE_FINGERPRINT>
+10366900 7 17846498153403680307
+10595046 47 18412261757249425332
+10670039 82 16845310345148862312
+11405975 8 18339928125900624090
+12107183 9 17762899866583206346
+12166972 35 18114184133342584557
+12236239 1 17748820817921540483
+12596602 18 18113613525498890928
+12670546 56 18260544498576716388
+13167823 11 18410852140451225614
+13224815 77 15913334567591032886
+13533116 47 18409164390134199090
+13583140 156 18131063866327987482
+13911987 19 16988564653377796772
+14251764 38 18272935994669744648
+14341114 176 18410295826116619172
+15788980 27 18187368718197926686
+15961568 22 18338800125861386260
+17349148 13 17603588512614199393
+17844677 252 18341619209117683300
+1813 80 16588026775584437974
+19489759 90 18341610386996167121
+19958102 18 18113889434530828159
+20511986 3 17749933557500052928
+20645477 70 16773525360836679502
+21033648 29 17131255892496812096
+21065198 57 18411138022222234766
+21859007 373 17387112196978238957
+23402539 116 18411975850346435557
+23557571 272 18202289086671170445
+23559900 14 18410292523613517392
+23569943 247 17097762466360446034
+2838139 119 16371273443410758141
+300161 21 18114456842448656190
+3004659 81 18334293141904462754
+34797466 226 18059021690990667228
+351380 180 18413385432136517725
+3633792 109 18115293476867354335
+4073 2 18114185276426065634
+4214541 1 18410855421536876668
+5104073 3 18409732863341281898
+5283173 99 18271242720887508337
+67856867 119 17970350527596444980
+90127 26 18130798858260937044
+9971528 1 17749109980937210316
+9981440 41 18411704245215730995
+
+> <PUBCHEM_SHAPE_MULTIPOLES>
+460.47
+13.64
+2.76
+1.29
+4.71
+0.05
+0.14
+-0.01
+-0.2
+-0.79
+1.02
+0.5
+-0.02
+0.13
+
+> <PUBCHEM_SHAPE_SELFOVERLAP>
+963.108
+
+> <PUBCHEM_SHAPE_VOLUME>
+263
+
+> <PUBCHEM_COORDINATE_TYPE>
+2
+5
+10
+
+$$$$
diff --git a/tests/data/sample_ligands/filtered/801_Auxin.sdf b/tests/data/sample_ligands/filtered/801_Auxin.sdf
new file mode 100644
index 0000000..ca48c82
--- /dev/null
+++ b/tests/data/sample_ligands/filtered/801_Auxin.sdf
@@ -0,0 +1,183 @@
+801
+  -OEChem-03192020413D
+
+ 21 22  0     0  0  0  0  0  0999 V2000
+   -3.1373   -0.4826   -0.9682 O   0  5  0  0  0  0  0  0  0  0  0  0
+   -3.0086    1.7026   -0.3061 O   0  0  0  0  0  0  0  0  0  0  0  0
+    0.8176   -2.0611   -0.1166 N   0  0  0  0  0  0  0  0  0  0  0  0
+    0.5445    0.1149    0.2923 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -0.6808   -0.5631    0.5447 C   0  0  0  0  0  0  0  0  0  0  0  0
+    1.4680   -0.8477   -0.1212 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -0.4813   -1.8993    0.2843 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -1.9454    0.0466    1.0039 C   0  0  0  0  0  0  0  0  0  0  0  0
+    0.9580    1.4594    0.3762 C   0  0  0  0  0  0  0  0  0  0  0  0
+    2.7895   -0.5319   -0.4562 C   0  0  0  0  0  0  0  0  0  0  0  0
+    2.2769    1.7901    0.0441 C   0  0  0  0  0  0  0  0  0  0  0  0
+    3.1768    0.8081   -0.3654 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.7779    0.4639   -0.2120 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -1.1452   -2.7503    0.3478 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.5298   -0.6664    1.5980 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -1.7520    0.9102    1.6506 H   0  0  0  0  0  0  0  0  0  0  0  0
+    1.2390   -2.9434   -0.3723 H   0  0  0  0  0  0  0  0  0  0  0  0
+    0.2693    2.2375    0.6926 H   0  0  0  0  0  0  0  0  0  0  0  0
+    3.4908   -1.2956   -0.7751 H   0  0  0  0  0  0  0  0  0  0  0  0
+    2.6013    2.8254    0.1067 H   0  0  0  0  0  0  0  0  0  0  0  0
+    4.1964    1.0872   -0.6186 H   0  0  0  0  0  0  0  0  0  0  0  0
+  1 13  1  0  0  0  0
+  2 13  2  0  0  0  0
+  3  6  1  0  0  0  0
+  3  7  1  0  0  0  0
+  3 17  1  0  0  0  0
+  4  5  1  0  0  0  0
+  4  6  1  0  0  0  0
+  4  9  2  0  0  0  0
+  5  7  2  0  0  0  0
+  5  8  1  0  0  0  0
+  6 10  2  0  0  0  0
+  7 14  1  0  0  0  0
+  8 13  1  0  0  0  0
+  8 15  1  0  0  0  0
+  8 16  1  0  0  0  0
+  9 11  1  0  0  0  0
+  9 18  1  0  0  0  0
+ 10 12  1  0  0  0  0
+ 10 19  1  0  0  0  0
+ 11 12  2  0  0  0  0
+ 11 20  1  0  0  0  0
+ 12 21  1  0  0  0  0
+M  CHG  1   1  -1
+M  END
+> <PUBCHEM_COMPOUND_CID>
+801
+
+> <PUBCHEM_CONFORMER_RMSD>
+0.6
+
+> <PUBCHEM_CONFORMER_DIVERSEORDER>
+1
+2
+
+> <PUBCHEM_MMFF94_PARTIAL_CHARGES>
+18
+1 -0.9
+10 -0.15
+11 -0.15
+12 -0.15
+13 0.91
+14 0.15
+17 0.27
+18 0.15
+19 0.15
+2 -0.9
+20 0.15
+21 0.15
+3 0.03
+5 -0.18
+6 -0.15
+7 -0.3
+8 0.07
+9 -0.15
+
+> <PUBCHEM_EFFECTIVE_ROTOR_COUNT>
+2
+
+> <PUBCHEM_PHARMACOPHORE_FEATURES>
+7
+1 1 acceptor
+1 2 acceptor
+1 3 cation
+1 3 donor
+3 1 2 13 anion
+5 3 4 5 6 7 rings
+6 4 6 9 10 11 12 rings
+
+> <PUBCHEM_HEAVY_ATOM_COUNT>
+13
+
+> <PUBCHEM_ATOM_DEF_STEREO_COUNT>
+0
+
+> <PUBCHEM_ATOM_UDEF_STEREO_COUNT>
+0
+
+> <PUBCHEM_BOND_DEF_STEREO_COUNT>
+0
+
+> <PUBCHEM_BOND_UDEF_STEREO_COUNT>
+0
+
+> <PUBCHEM_ISOTOPIC_ATOM_COUNT>
+0
+
+> <PUBCHEM_COMPONENT_COUNT>
+1
+
+> <PUBCHEM_CACTVS_TAUTO_COUNT>
+1
+
+> <PUBCHEM_CONFORMER_ID>
+0000032100000001
+
+> <PUBCHEM_MMFF94_ENERGY>
+19.5653
+
+> <PUBCHEM_FEATURE_SELFOVERLAP>
+35.666
+
+> <PUBCHEM_SHAPE_FINGERPRINT>
+1 1 18410013217136608694
+10608611 8 18337393858566906048
+11206711 2 17981333596590782830
+11769659 78 12535357753079887780
+124424 183 17967811643942126866
+12654215 9 18263925449534713868
+13380535 76 18339640148232832674
+14325111 11 18410013255712095577
+14911166 2 18340498802568737270
+15279308 100 18336836372128195060
+15775835 57 18413673500199140813
+16945 1 18340218422576708290
+17844478 74 18043270096074572139
+18186145 218 18272662237532389144
+20653085 51 18410860962287236201
+21028194 46 18335426806473554737
+21524375 3 18334289894196932995
+21947302 44 18334855004513486555
+23402655 69 18342171146833090685
+23493267 7 17603883199332416488
+23559900 14 18199773461140915892
+238 59 15804079125244740965
+25 1 18336551503874047873
+2748010 2 18125173864078903938
+528886 8 18411982477037724968
+63268167 104 18342743974667681472
+81228 2 17547010092349083291
+
+> <PUBCHEM_SHAPE_MULTIPOLES>
+250.81
+4.9
+1.95
+0.82
+1.27
+0.2
+0.03
+-1.84
+-1.19
+-0.26
+0
+0.32
+0
+0.21
+
+> <PUBCHEM_SHAPE_SELFOVERLAP>
+548.117
+
+> <PUBCHEM_SHAPE_VOLUME>
+137.2
+
+> <PUBCHEM_COORDINATE_TYPE>
+2
+5
+10
+
+$$$$
diff --git a/tests/data/sample_ligands/unfiltered/103061392.sdf b/tests/data/sample_ligands/unfiltered/103061392.sdf
new file mode 100644
index 0000000..6be5427
--- /dev/null
+++ b/tests/data/sample_ligands/unfiltered/103061392.sdf
@@ -0,0 +1,36 @@
+103061392
+  -OEChem-02242004582D
+
+  0  0  0     0  0  0  0  0  0999 V2000
+M  END
+> <PUBCHEM_SUBSTANCE_ID>
+103061392
+
+> <PUBCHEM_SUBSTANCE_VERSION>
+1
+
+> <PUBCHEM_EXT_DATASOURCE_NAME>
+KEGG
+
+> <PUBCHEM_EXT_DATASOURCE_REGID>
+C18210
+
+> <PUBCHEM_SUBSTANCE_COMMENT>
+polypeptide placental hormone
+
+> <PUBCHEM_SUBSTANCE_SYNONYM>
+C18210
+Chorionic somatomammotropin hormone
+PL
+Placental lactogen
+
+> <PUBCHEM_XREF_EXT_ID>
+C18210
+
+> <PUBCHEM_EXT_DATASOURCE_URL>
+http://www.genome.jp/kegg/
+
+> <PUBCHEM_EXT_SUBSTANCE_URL>
+http://www.genome.jp/dbget-bin/www_bget?cpd+C18210
+
+$$$$
diff --git a/tests/data/sample_ligands/unfiltered/134970870.sdf b/tests/data/sample_ligands/unfiltered/134970870.sdf
new file mode 100644
index 0000000..5d961b2
--- /dev/null
+++ b/tests/data/sample_ligands/unfiltered/134970870.sdf
@@ -0,0 +1,107 @@
+134970870
+  -OEChem-02242004282D
+
+ 12 11  0     1  0  0  0  0  0999 V2000
+    0.2500   -1.3250    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+   -0.4625    0.7375    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+    0.9625    0.7375    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+   -1.1750   -1.3250    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+    2.3875   -0.0875    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.6000   -0.5000    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+    0.2500   -0.5000    0.0000 C   0  0  3  0  0  0  0  0  0  0  0  0
+   -0.4625   -0.0875    0.0000 C   0  0  3  0  0  0  0  0  0  0  0  0
+    0.9625   -0.0875    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -1.1750   -0.5000    0.0000 C   0  0  3  0  0  0  0  0  0  0  0  0
+    1.6750   -0.5000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -1.8875   -0.0875    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+  7  1  1  1  0  0  0
+  8  2  1  1  0  0  0
+  3  9  2  0  0  0  0
+ 10  4  1  6  0  0  0
+  5 11  1  0  0  0  0
+  6 12  1  0  0  0  0
+  7  8  1  0  0  0  0
+  7  9  1  0  0  0  0
+  8 10  1  0  0  0  0
+  9 11  1  0  0  0  0
+ 10 12  1  0  0  0  0
+M  END
+> <PUBCHEM_COMPOUND_ID_TYPE>
+0
+
+> <PUBCHEM_TOTAL_CHARGE>
+0
+
+> <PUBCHEM_SUBSTANCE_ID>
+134970870
+
+> <PUBCHEM_SUBSTANCE_VERSION>
+1
+
+> <PUBCHEM_EXT_DATASOURCE_NAME>
+ChemIDplus
+
+> <PUBCHEM_EXT_DATASOURCE_REGID>
+0000057487
+
+> <PUBCHEM_SUBSTANCE_COMMENT>
+Sweetening Agents
+
+> <PUBCHEM_SUBSTANCE_SYNONYM>
+10597-68-9
+149014-33-5
+196419-06-4
+3812-57-5
+57-48-7
+69-67-0
+AI3-23514
+Advantose FS 95
+CCRIS 3335
+D-(-)-Fructose
+D-(-)-Levulose
+D-Fructose
+EINECS 200-333-3
+Fructose
+Fructose solution
+Fructose, D-
+Fructose, pure
+Fruit sugar
+Furucton
+Hi-Fructo 970
+Krystar 300
+Levulose
+Nevulose
+Sugar, fruit
+UNII-6YSS42VSEV
+arabino-Hexulose
+
+> <PUBCHEM_GENERIC_REGISTRY_NAME>
+10597-68-9
+149014-33-5
+196419-06-4
+3812-57-5
+57-48-7
+69-67-0
+
+> <PUBCHEM_XREF_EXT_ID>
+0000057487
+
+> <PUBCHEM_EXT_DATASOURCE_URL>
+http://chem.sis.nlm.nih.gov/chemidplus/
+
+> <PUBCHEM_EXT_SUBSTANCE_URL>
+http://chem.sis.nlm.nih.gov/chemidplus/direct.jsp?result=advanced&regno=0000057487
+
+> <PUBCHEM_CID_ASSOCIATIONS>
+5984  1
+
+> <PUBCHEM_COORDINATE_TYPE>
+1
+3
+
+> <PUBCHEM_BONDANNOTATIONS>
+7  1  5
+8  2  5
+10  4  6
+
+$$$$
diff --git a/tests/data/sample_ligands/unfiltered/135191341.sdf b/tests/data/sample_ligands/unfiltered/135191341.sdf
new file mode 100644
index 0000000..cf0ee8d
--- /dev/null
+++ b/tests/data/sample_ligands/unfiltered/135191341.sdf
@@ -0,0 +1,105 @@
+135191341
+  -OEChem-02242004592D
+
+ 23 23  0     1  0  0  0  0  0999 V2000
+    6.7508    3.8281    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+    2.4207    2.5781    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+    9.6951   -0.6918    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+    3.5032    4.4531    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+    1.3382   -1.7969    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+    5.6683    1.9531    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
+    7.9640    0.7100    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
+    3.5032    0.7031    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
+    3.5032   -1.7969    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
+    7.8334    1.9531    0.0000 C   0  0  3  0  0  0  0  0  0  0  0  0
+    6.7508    2.5781    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    4.5858    2.5781    0.0000 C   0  0  3  0  0  0  0  0  0  0  0  0
+    3.5032    1.9531    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    9.1867    0.4501    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    8.9753    2.4615    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    2.4207    0.0781    0.0000 C   0  0  3  0  0  0  0  0  0  0  0  0
+    9.8117    1.5326    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    4.5858    3.8281    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    2.4207   -1.1719    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    1.3382    0.7031    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    0.2556    0.0781    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    0.2556   -1.1719    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -0.8269    0.7031    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+  1 11  2  0  0  0  0
+  2 13  2  0  0  0  0
+  3 14  2  0  0  0  0
+  4 18  1  0  0  0  0
+  5 19  2  0  0  0  0
+  6 11  1  0  0  0  0
+ 12  6  1  6  0  0  0
+  7 10  1  0  0  0  0
+  7 14  1  0  0  0  0
+  8 13  1  0  0  0  0
+ 16  8  1  6  0  0  0
+  9 19  1  0  0  0  0
+ 10 11  1  6  0  0  0
+ 10 15  1  0  0  0  0
+ 12 13  1  0  0  0  0
+ 12 18  1  0  0  0  0
+ 14 17  1  0  0  0  0
+ 15 17  1  0  0  0  0
+ 16 19  1  0  0  0  0
+ 16 20  1  0  0  0  0
+ 20 21  1  0  0  0  0
+ 21 22  1  0  0  0  0
+ 21 23  1  0  0  0  0
+M  END
+> <PUBCHEM_COMPOUND_ID_TYPE>
+0
+
+> <PUBCHEM_TOTAL_CHARGE>
+0
+
+> <PUBCHEM_SUBSTANCE_ID>
+135191341
+
+> <PUBCHEM_SUBSTANCE_VERSION>
+1
+
+> <PUBCHEM_EXT_DATASOURCE_NAME>
+ChemIDplus
+
+> <PUBCHEM_EXT_DATASOURCE_REGID>
+0073684807
+
+> <PUBCHEM_SUBSTANCE_SYNONYM>
+73684-80-7
+L-Leucinamide, 5-oxo-L-prolyl-L-seryl-
+Pyr-ser-leu-NH2
+Pyro-gln-ser-leu-amide
+Pyroglutamine-serine-leucinamide
+Pyroglutaminyl-seryl-leucinamide
+Pyroglutamylserylleucinamide
+Thyrotropin releasing hormone-AN
+Trh-AN
+
+> <PUBCHEM_GENERIC_REGISTRY_NAME>
+73684-80-7
+
+> <PUBCHEM_XREF_EXT_ID>
+0073684807
+
+> <PUBCHEM_EXT_DATASOURCE_URL>
+http://chem.sis.nlm.nih.gov/chemidplus/
+
+> <PUBCHEM_EXT_SUBSTANCE_URL>
+http://chem.sis.nlm.nih.gov/chemidplus/direct.jsp?result=advanced&regno=0073684807
+
+> <PUBCHEM_CID_ASSOCIATIONS>
+173203  1
+
+> <PUBCHEM_COORDINATE_TYPE>
+1
+3
+
+> <PUBCHEM_BONDANNOTATIONS>
+10  11  6
+12  6  6
+16  8  6
+
+$$$$
diff --git a/tests/data/sample_ligands/unfiltered/135355153.sdf b/tests/data/sample_ligands/unfiltered/135355153.sdf
new file mode 100644
index 0000000..83712c0
--- /dev/null
+++ b/tests/data/sample_ligands/unfiltered/135355153.sdf
@@ -0,0 +1,41 @@
+135355153
+  -OEChem-02242004292D
+
+  0  0  0     0  0  0  0  0  0999 V2000
+M  END
+> <PUBCHEM_COMPOUND_ID_TYPE>
+0
+
+> <PUBCHEM_SUBS_AUTO_STRUCTURE>
+Deposited Substance is allowed to be autogenerated
+
+> <PUBCHEM_SUBSTANCE_ID>
+135355153
+
+> <PUBCHEM_SUBSTANCE_VERSION>
+1
+
+> <PUBCHEM_EXT_DATASOURCE_NAME>
+ChemIDplus
+
+> <PUBCHEM_EXT_DATASOURCE_REGID>
+LK41100000
+
+> <PUBCHEM_SUBSTANCE_COMMENT>
+Natural Product
+
+> <PUBCHEM_SUBSTANCE_SYNONYM>
+F II (sugar fraction)
+LK41100000
+NIOSH/LK4110000
+
+> <PUBCHEM_XREF_EXT_ID>
+LK41100000
+
+> <PUBCHEM_EXT_DATASOURCE_URL>
+http://chem.sis.nlm.nih.gov/chemidplus/
+
+> <PUBCHEM_EXT_SUBSTANCE_URL>
+http://chem.sis.nlm.nih.gov/chemidplus/direct.jsp?result=advanced&regno=LK41100000
+
+$$$$
diff --git a/tests/utils/test_docking_utils.py b/tests/utils/test_docking_utils.py
index 610e457..4e16e9e 100644
--- a/tests/utils/test_docking_utils.py
+++ b/tests/utils/test_docking_utils.py
@@ -3,10 +3,15 @@
 from api.utils.docking_utils import Receptor, ComplexReceptor, MonomerReceptor
 from api.utils.docking_utils import Ligand
 from api.utils.docking_utils import Docker
-from api.utils.docking_utils import MonomerDocking, ComplexDocking
+from api.utils.docking_utils import Docking, MonomerDocking, ComplexDocking
+from api.utils.docking_utils import SDFMapping
 import os
 
-NOT_IN_BAR = not os.environ.get("BAR") == "true"
+
+if os.environ.get("BAR") == "None":
+    NOT_IN_BAR = True
+else:
+    NOT_IN_BAR = False
 
 
 class TestReceptorClasses(unittest.TestCase):
@@ -71,6 +76,35 @@ def test_create_complex_receptor(self):
         self.assertEqual(receptor.monomers_list, ["A", "B"])
         self.assertEqual(receptor.line_numbers, [[48, 180], [181, 195]])
 
+    @pytest.mark.skipif(NOT_IN_BAR, reason="Only works on BAR")
+    def test_create_valid_docking(self):
+        """Test that the Docking instance is correct."""
+
+        receptor = "AT4G36360"
+        ligand = "443454_Gibberellin_A24"
+        docking = Docker.create_docking(receptor, ligand, "tests/data/")
+
+        self.assertIsInstance(docking, Docking)
+
+    @pytest.mark.skipif(NOT_IN_BAR, reason="Only works on BAR")
+    def test_create_docking_invalid_receptor(self):
+        """Test that invalid receptor returns an error message."""
+
+        receptor = "AT9G99999"
+        ligand = "443454_Gibberellin_A24"
+        docking = Docker.create_docking(receptor, ligand, "tests/data/")
+
+        self.assertEqual(docking, "Receptor file not found")
+
+    @pytest.mark.skipif(NOT_IN_BAR, reason="Only works on BAR")
+    def test_create_docking_invalid_ligand(self):
+        """Test that invalid ligand returns an error message"""
+        receptor = "AT4G36360"
+        ligand = "ABCD"
+        docking = Docker.create_docking(receptor, ligand, "tests/data/")
+
+        self.assertEqual(docking, "Ligand file not found")
+
     @pytest.mark.skipif(NOT_IN_BAR, reason="Only works on BAR")
     def test_docking_exists(self):
         """Test that Docker.create_docking returns None when the docking
@@ -129,5 +163,28 @@ def test_docking_monomer_results(self):
         self.assertIn('6325_Ethylene', normalized_results['AT9G99999_monomer'])
 
 
+class TestSDFMappingClass(unittest.TestCase):
+
+    def test_create_mapping_filtered(self):
+        """Test that the correct mapping is returned"""
+
+        mapping_results = SDFMapping.create_mapping_filtered("tests/data/sample_ligands/filtered/", "tests/data/")
+        correct_mapping = [{"value": "443453_Gibberellin_A15.sdf", "text": "Gibberellin_A15"}, {"value": "5984_D-(-)-Fructose.sdf", "text": "D-(-)-Fructose"}, {"value": "801_Auxin.sdf", "text": "Auxin"}, {"value": "73672_isoxaben.sdf", "text": "isoxaben"}]
+        self.assertEqual(mapping_results, correct_mapping)
+        self.assertTrue(os.path.exists("tests/data/sdf_mapping_filtered.json"))
+        if os.path.exists("tests/data/sdf_mapping_filtered.json"):
+            os.remove("tests/data/sdf_mapping_filtered.json")
+
+    def test_create_mapping_unfiltered(self):
+        """Test that the correct mapping is returned"""
+        mapping = SDFMapping()
+        mapping_results = mapping.create_mapping_unfiltered("tests/data/sample_ligands/unfiltered/", "tests/data/")
+        correct_mapping = [{"value": "135355153.sdf", "text": "F II (sugar fraction),LK41100000,NIOSH/LK4110000"}, {"value": "134970870.sdf", "text": "10597-68-9,149014-33-5,196419-06-4,3812-57-5,57-48-7,69-67-0,AI3-23514,Advantose FS 95,CCRIS 3335,D-(-)-Fructose,D-(-)-Levulose,D-Fructose,EINECS 200-333-3,Fructose,Fructose solution,Fructose, D-,Fructose, pure,Fruit sugar,Furucton,Hi-Fructo 970,Krystar 300,Levulose,Nevulose,Sugar, fruit,UNII-6YSS42VSEV,arabino-Hexulose"}, {"value": "103061392.sdf", "text": "C18210,Chorionic somatomammotropin hormone,PL,Placental lactogen"}, {"value": "135191341.sdf", "text": "73684-80-7,L-Leucinamide, 5-oxo-L-prolyl-L-seryl-,Pyr-ser-leu-NH2,Pyro-gln-ser-leu-amide,Pyroglutamine-serine-leucinamide,Pyroglutaminyl-seryl-leucinamide,Pyroglutamylserylleucinamide,Thyrotropin releasing hormone-AN,Trh-AN"}]
+        self.assertEqual(mapping_results, correct_mapping)
+        self.assertTrue(os.path.exists("tests/data/sdf_mapping_unfiltered.json"))
+        if os.path.exists("tests/data/sdf_mapping_unfiltered.json"):
+            os.remove("tests/data/sdf_mapping_unfiltered.json")
+
+
 if __name__ == '__main__':
     unittest.main()

From d7483d3b9a5e3e55fcaa8bdfd43c6c76d10f7f5c Mon Sep 17 00:00:00 2001
From: Dien Nguyen <dnguyen@bar.utoronto.ca>
Date: Thu, 4 Apr 2024 13:48:18 -0400
Subject: [PATCH 25/35] Fix tests to skip when not running on the BAR

---
 tests/utils/test_docking_utils.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/tests/utils/test_docking_utils.py b/tests/utils/test_docking_utils.py
index 4e16e9e..983f32f 100644
--- a/tests/utils/test_docking_utils.py
+++ b/tests/utils/test_docking_utils.py
@@ -8,10 +8,7 @@
 import os
 
 
-if os.environ.get("BAR") == "None":
-    NOT_IN_BAR = True
-else:
-    NOT_IN_BAR = False
+NOT_IN_BAR = not os.environ.get("BAR") == "true"
 
 
 class TestReceptorClasses(unittest.TestCase):

From bfea1ac8f639b231d446c898befea07ff0cf5c77 Mon Sep 17 00:00:00 2001
From: Dien Nguyen <dnguyen@bar.utoronto.ca>
Date: Thu, 4 Apr 2024 18:04:28 -0400
Subject: [PATCH 26/35] Add skip tags for tests and remove print statements

---
 api/utils/docking_utils.py        | 4 ----
 tests/utils/test_docking_utils.py | 2 ++
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/api/utils/docking_utils.py b/api/utils/docking_utils.py
index e2c6f4e..c128aed 100755
--- a/api/utils/docking_utils.py
+++ b/api/utils/docking_utils.py
@@ -533,9 +533,7 @@ def start(receptor: str, ligand: str, docking_pdb_path: str):
         normalized residue-energyy dictionary.
         """
         # create docking object
-        ct = datetime.datetime.now()
         ct_string = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-        print("Starting the docking process at {}".format(ct))
         docking = Docker.create_docking(receptor, ligand, docking_pdb_path)
         if isinstance(docking, list):
             # receptor = receptor.split('.')[0]
@@ -618,7 +616,6 @@ def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str):
                                            stdout=subprocess.PIPE,
                                            stderr=subprocess.PIPE,
                                            text=True)
-        print("return code" + str(completed_process.returncode))
         if completed_process.returncode != 0:
             return "Receptor file not found"
         receptor_file = completed_process.stdout[:-1]
@@ -627,7 +624,6 @@ def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str):
         receptor_name = receptor_file[4:(receptor_file.index('.') + 2)]
 
         results_path = docking_pdb_path + receptor_name + '_' + ligand_name + '/'
-        print(results_path)
 
         if os.path.exists(results_path):
             print("The docking between {0} and {1} has already been done.".format(receptor_name,
diff --git a/tests/utils/test_docking_utils.py b/tests/utils/test_docking_utils.py
index 983f32f..d5e0d6c 100644
--- a/tests/utils/test_docking_utils.py
+++ b/tests/utils/test_docking_utils.py
@@ -162,6 +162,7 @@ def test_docking_monomer_results(self):
 
 class TestSDFMappingClass(unittest.TestCase):
 
+    @pytest.mark.skipif(NOT_IN_BAR, reason="Only works on BAR")
     def test_create_mapping_filtered(self):
         """Test that the correct mapping is returned"""
 
@@ -172,6 +173,7 @@ def test_create_mapping_filtered(self):
         if os.path.exists("tests/data/sdf_mapping_filtered.json"):
             os.remove("tests/data/sdf_mapping_filtered.json")
 
+    @pytest.mark.skipif(NOT_IN_BAR, reason="Only works on BAR")
     def test_create_mapping_unfiltered(self):
         """Test that the correct mapping is returned"""
         mapping = SDFMapping()

From c2ec1cd9ec6a19a13af6cfc01d9bf222d9c244f5 Mon Sep 17 00:00:00 2001
From: asherpasha <asher.pasha@utoronto.ca>
Date: Thu, 25 Apr 2024 19:54:52 -0400
Subject: [PATCH 27/35] Linting...

---
 api/resources/fastpheno.py        |   1 +
 api/resources/sequence.py         |   1 +
 api/utils/docking_utils.py        | 161 ++++++++++++++++--------------
 tests/utils/test_docking_utils.py |  40 +++++---
 4 files changed, 116 insertions(+), 87 deletions(-)

diff --git a/api/resources/fastpheno.py b/api/resources/fastpheno.py
index 81701f5..7a9b892 100644
--- a/api/resources/fastpheno.py
+++ b/api/resources/fastpheno.py
@@ -3,6 +3,7 @@
 Author: Vince L
 Fastpheno endpoint for retrieving tree data
 """
+
 from flask_restx import Namespace, Resource
 from api import db
 from api.models.fastpheno import Sites, Trees, Band, Height
diff --git a/api/resources/sequence.py b/api/resources/sequence.py
index 8496669..9b802f5 100644
--- a/api/resources/sequence.py
+++ b/api/resources/sequence.py
@@ -4,6 +4,7 @@
 Sequence endpoint that returns the amino acid sequence of a given protein, with additional options
 for predicted sequences (Phyre2) that we host
 """
+
 from flask_restx import Namespace, Resource
 from api.utils.bar_utils import BARUtils
 from markupsafe import escape
diff --git a/api/utils/docking_utils.py b/api/utils/docking_utils.py
index c128aed..f8cc951 100755
--- a/api/utils/docking_utils.py
+++ b/api/utils/docking_utils.py
@@ -8,7 +8,7 @@
 import json
 import datetime
 
-HEX_BIN_PATH = '/usr/local/bin/hex/bin/hex'
+HEX_BIN_PATH = "/usr/local/bin/hex/bin/hex"
 
 
 class Receptor(ABC):
@@ -18,6 +18,7 @@ class Receptor(ABC):
     name (str): the name of the receptor
     file_path (str): the relative path to the receptors pdb file
     """
+
     @abstractmethod
     def __init__(self, name: str, file_path: str):
         self.name = name
@@ -25,13 +26,14 @@ def __init__(self, name: str, file_path: str):
 
 
 class MonomerReceptor(Receptor):
-    """ A class that represents a receptor that is a monomer, meaning it consists
+    """A class that represents a receptor that is a monomer, meaning it consists
     of only one chain.
 
     --- Attributes ---
     name (str): the name of the receptor
     file_path (str): the relative path to the receptors pdb file
     """
+
     name: str
     file_path: str
 
@@ -40,7 +42,7 @@ def __init__(self, name, file_path):
 
 
 class ComplexReceptor(Receptor):
-    """ A class that represents a receptor that is a complex, meaning it consists
+    """A class that represents a receptor that is a complex, meaning it consists
     of more than one chain.
 
     --- Attributes ---
@@ -49,6 +51,7 @@ class ComplexReceptor(Receptor):
     monomer_list (List[str]): the list of monomers that make up the complex
     line_numbers (List[List[int]]): the list of line numbers that separate the monomers, e.g. [[100,200],[300,500]]
     """
+
     def __init__(self, name: str, file_path: str, monomers_list: List[str]):
         super().__init__(name, file_path)
         self.monomers_list = monomers_list
@@ -65,12 +68,12 @@ def separate_monomers(self):
         line = file.readline()
         prev = None
         curr_line = 0
-        while line != '':
+        while line != "":
             # the first line of the first monomer
             if line[:12] == "ATOM      1 ":
                 prev = curr_line - 1
             # the last line of a monomer
-            elif line[:3] == 'TER':
+            elif line[:3] == "TER":
                 # line_numbers.append(curr_line)
                 line_numbers.append([prev + 1, curr_line])
                 prev = curr_line
@@ -87,6 +90,7 @@ class Ligand:
     name (str): the name of the receptor
     file_path (str): the relative path to the receptors pdb file
     """
+
     def __init__(self, name: str, file_path: str):
         self.name = name
         self.file_path = file_path
@@ -112,15 +116,19 @@ def __init__(self, receptor: Receptor, ligand: Ligand, results_path: str):
         self.ligand_reserved_list = []
 
     def hex_docking(self):
-        """Run hex docking using the command line.
-        """
-        hex_output_file = open(self.results_path + 'hex_output.txt', "w")
-
-    # Function to call Hex, including hard coded settings
-
-    # max_docking_solutions set at 5 for testing
-        hex_command = """ open_receptor  """ + self.receptor.file_path + """
-                open_ligand  """ + self.ligand.file_path + """
+        """Run hex docking using the command line."""
+        hex_output_file = open(self.results_path + "hex_output.txt", "w")
+
+        # Function to call Hex, including hard coded settings
+
+        # max_docking_solutions set at 5 for testing
+        hex_command = (
+            """ open_receptor  """
+            + self.receptor.file_path
+            + """
+                open_ligand  """
+            + self.ligand.file_path
+            + """
                 docking_correlation 1
                 docking_score_threshold 0
                 max_docking_solutions 25
@@ -131,12 +139,13 @@ def hex_docking(self):
                 receptor_origin C-825:VAL-O
                 commit_edits
                 activate_docking
-                save_range 1 100 """ \
-        + self.results_path + """ %s pdb""" % (self.receptor.name + '_' + self.ligand.name)
-        subprocess.Popen(HEX_BIN_PATH,
-                         stdin=subprocess.PIPE,
-                         stderr=subprocess.STDOUT,
-                         stdout=hex_output_file).communicate(bytes(hex_command.encode('utf-8')))
+                save_range 1 100 """
+            + self.results_path
+            + """ %s pdb""" % (self.receptor.name + "_" + self.ligand.name)
+        )
+        subprocess.Popen(
+            HEX_BIN_PATH, stdin=subprocess.PIPE, stderr=subprocess.STDOUT, stdout=hex_output_file
+        ).communicate(bytes(hex_command.encode("utf-8")))
         hex_output_file.close()
         ct = datetime.datetime.now()
         print("current time:-", ct)
@@ -152,7 +161,7 @@ def crte_ligand_reserved_attr(self):
         """
         line_numbers = []
         for filename in os.listdir(self.results_path):
-            if filename[-3:] == 'pdb':
+            if filename[-3:] == "pdb":
                 file = open(self.results_path + filename, "r")
                 lines = file.readlines()
                 for i in range(len(lines)):
@@ -167,7 +176,7 @@ def parse_hex_output(self):
         where its value is the total number of solutions.
         For example: {num_soln : 5, 1 : [2, 4], 2 : [1, 3, 5]}
         """
-        hex_output = open(self.results_path + 'hex_output.txt', "r")
+        hex_output = open(self.results_path + "hex_output.txt", "r")
         lines = hex_output.readlines()
         # line number where the clustering starts and ends
         result_start = 0
@@ -219,10 +228,10 @@ def result_dict_generator(self, monomer_number, threshold):
         reference = {}
         for line in receptor_file_lines:
             splitted_line = line.split()
-            if line[0:4] == 'ATOM':
+            if line[0:4] == "ATOM":
 
                 # check if chain name and residue are in the same column, e.g. A1000
-                if re.search(r'\d', splitted_line[4]) is None:
+                if re.search(r"\d", splitted_line[4]) is None:
                     residue = splitted_line[5]
                 else:
                     residue = splitted_line[4][1:]
@@ -239,7 +248,7 @@ def result_dict_generator(self, monomer_number, threshold):
                 if int(residue) in reference:
                     reference[int(residue)][int(splitted_line[1])] = tuple(coord)
                 else:
-                    reference[int(residue)] = {int(splitted_line[1]) : tuple(coord)}
+                    reference[int(residue)] = {int(splitted_line[1]): tuple(coord)}
 
         # here, the structure of the reference dict is is {residue: {atom_num :(x, y, z)}},
 
@@ -247,7 +256,7 @@ def result_dict_generator(self, monomer_number, threshold):
         ac = {}
         result_list = []
         for filename in os.listdir(self.results_path):
-            if filename[-3:] == 'pdb':
+            if filename[-3:] == "pdb":
                 result_list.append(filename)
 
         lowest_en = None  # to keep track of lowest energy
@@ -256,10 +265,10 @@ def result_dict_generator(self, monomer_number, threshold):
         cluster_dict = self.parse_hex_output()
 
         for i in range(len(result_list)):
-            energy = ''
+            energy = ""
 
             # get the ligand_reserved section of the result file
-            file = open(self.results_path + result_list[i], 'r')
+            file = open(self.results_path + result_list[i], "r")
             ligand_reserved_start = self.ligand_reserved_list[i]
             ligand_reserved_section = file.readlines()[ligand_reserved_start:]
 
@@ -267,18 +276,18 @@ def result_dict_generator(self, monomer_number, threshold):
             residue_set = set()
             coor = []
             for line in ligand_reserved_section:
-                if 'REMARK' in line.split(' ') and 'Energy' in line.split(' '):
+                if "REMARK" in line.split(" ") and "Energy" in line.split(" "):
                     cluster_size = len(cluster_dict[i + 1])
-                    total_solutions = cluster_dict['num_soln']
+                    total_solutions = cluster_dict["num_soln"]
 
                     # energy is weighed according to the number of solutions
                     # in that cluster
-                    energy = ((float(line.split(' ')[6][:-1]))/total_solutions) * cluster_size
+                    energy = ((float(line.split(" ")[6][:-1])) / total_solutions) * cluster_size
 
                     # record values if lowest energy
                     if lowest_en is None or energy < lowest_en:
                         lowest_en = energy
-                elif line[:4] == 'ATOM':
+                elif line[:4] == "ATOM":
                     # coordinates of one atom
                     coordinates = tuple(map(float, filter(None, line.split()[6:9])))
                     coor.append(coordinates)
@@ -292,9 +301,15 @@ def result_dict_generator(self, monomer_number, threshold):
                     for aa in reference[res].keys():  # for each atom of that amino acid
                         # check if the distance between atoms of the ligands
                         # and of the amino acid are lower than chosen threshold (5)
-                        distance = math.sqrt(sum([(reference[res][aa][0] - atom[0]) ** 2,
-                                                  (reference[res][aa][1] - atom[1]) ** 2,
-                                                  (reference[res][aa][2] - atom[2]) ** 2]))
+                        distance = math.sqrt(
+                            sum(
+                                [
+                                    (reference[res][aa][0] - atom[0]) ** 2,
+                                    (reference[res][aa][1] - atom[1]) ** 2,
+                                    (reference[res][aa][2] - atom[2]) ** 2,
+                                ]
+                            )
+                        )
 
                         distances.append(distance)
 
@@ -347,7 +362,7 @@ def best_result(self):
         pass
 
     def crte_receptor_dict(self, threshold):
-        """"Return a dictionary that contains the residue-energy
+        """ "Return a dictionary that contains the residue-energy
         dictionary of the monomer. This is not necessary, but maintains
         consistency between monomer and complex receptor dictionaries.
         """
@@ -435,14 +450,14 @@ def separate_results(self):
             line = result_file.readline()
             curr_line = 0
             prev = None
-            while line != '':
+            while line != "":
                 # the start of the first chain
                 if line.split()[0] == "ATOM" and line.split()[1] == "1":
                     # if line.startswith('ATOM      1  '):
                     prev = curr_line - 1
 
                 # the end of a chain
-                elif line[0:3] == 'TER':
+                elif line[0:3] == "TER":
                     line_numbers.append([prev + 1, curr_line])
                     prev = curr_line
 
@@ -462,7 +477,7 @@ def crte_receptor_dict(self, threshold):
             ligand_res = {}
             res_dict = self.result_dict_generator(i, threshold)
             ligand_res[self.ligand.name] = res_dict
-            all_monomers.append({self.receptor.name + '_' + self.receptor.monomers_list[i] : ligand_res})
+            all_monomers.append({self.receptor.name + "_" + self.receptor.monomers_list[i]: ligand_res})
         return all_monomers
 
     def normalize_results(self, threshold):
@@ -547,7 +562,7 @@ def start(receptor: str, ligand: str, docking_pdb_path: str):
         elif docking == "Ligand file not found":
             return "Ligand file not found"
 
-        results_path = docking_pdb_path + docking.receptor.name + '_' + ligand + '/'
+        results_path = docking_pdb_path + docking.receptor.name + "_" + ligand + "/"
 
         # create folder to store docking results
         os.makedirs(results_path)
@@ -559,11 +574,11 @@ def start(receptor: str, ligand: str, docking_pdb_path: str):
         normalized_results = docking.normalize_results(5)
         final_json = {}
         final_json["energies_json"] = normalized_results
-        final_json["path"] = '//bar.utoronto.ca/HEX_RESULTS/' + docking.receptor.name + '_' + ligand + '/'
-        final_json["best_HEX_result_path"] = final_json["path"] + docking.receptor.name + '_' + ligand + '0001.pdb'
+        final_json["path"] = "//bar.utoronto.ca/HEX_RESULTS/" + docking.receptor.name + "_" + ligand + "/"
+        final_json["best_HEX_result_path"] = final_json["path"] + docking.receptor.name + "_" + ligand + "0001.pdb"
         final_json["date"] = ct_string
         new_json = docking.results_path + "final.json"
-        with open(new_json, 'w') as file:
+        with open(new_json, "w") as file:
             file.write(json.dumps(final_json))
         print("current time:-", datetime.datetime.now())
         return final_json
@@ -575,27 +590,24 @@ def create_receptor(receptor_name: str, receptor_file_path: str):
         with open(receptor_file_path) as f:
             is_monomer = True
             for line in f.readlines():
-                if re.match(r'COMPND   \d CHAIN: \w, \w*', line) is not None:
+                if re.match(r"COMPND   \d CHAIN: \w, \w*", line) is not None:
                     is_monomer = False
                     # if the receptor would be a monomer the regex would be
                     # r'COMPND   \d CHAIN: \w;'
 
                     # To make a list of the monomers' labels
-                    print(receptor_name + ' identified as a protein complex')
-                    if line[11:16] == 'CHAIN':
-                        monomers_list = line.split(': ')[-1].split(', ')
+                    print(receptor_name + " identified as a protein complex")
+                    if line[11:16] == "CHAIN":
+                        monomers_list = line.split(": ")[-1].split(", ")
                         # The COMPND line ends with ';' therefore it needs to be
                         # removed from the last label
                         monomers_list[-1] = monomers_list[-1][0]
-                        new_receptor = ComplexReceptor(receptor_name,
-                                                       receptor_file_path,
-                                                       monomers_list)
+                        new_receptor = ComplexReceptor(receptor_name, receptor_file_path, monomers_list)
                         return new_receptor
                     print("Unknown pdb structure, need further investigation")
 
             if is_monomer:
-                new_receptor = MonomerReceptor(receptor_name,
-                                               receptor_file_path)
+                new_receptor = MonomerReceptor(receptor_name, receptor_file_path)
                 return new_receptor
 
     def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str):
@@ -607,40 +619,39 @@ def create_docking(receptor_name: str, ligand_name: str, docking_pdb_path: str):
 
         # check that the docking combination has not been run before
         # results_path = docking_pdb_path + 'RESULTS/' + receptor_name + '_' + ligand_name + '/'
-        if '.' in receptor_name:
-            receptor_name = receptor_name[:receptor_name.index('.')]
-        command = ['ls ' + 'AF2_' + receptor_name + '*.pdb']
-        completed_process = subprocess.run(command,
-                                           shell=True,
-                                           cwd=receptor_folder,
-                                           stdout=subprocess.PIPE,
-                                           stderr=subprocess.PIPE,
-                                           text=True)
+        if "." in receptor_name:
+            receptor_name = receptor_name[: receptor_name.index(".")]
+        command = ["ls " + "AF2_" + receptor_name + "*.pdb"]
+        completed_process = subprocess.run(
+            command, shell=True, cwd=receptor_folder, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
+        )
         if completed_process.returncode != 0:
             return "Receptor file not found"
         receptor_file = completed_process.stdout[:-1]
 
         receptor_file_path = receptor_folder + receptor_file
-        receptor_name = receptor_file[4:(receptor_file.index('.') + 2)]
+        receptor_name = receptor_file[4 : (receptor_file.index(".") + 2)]
 
-        results_path = docking_pdb_path + receptor_name + '_' + ligand_name + '/'
+        results_path = docking_pdb_path + receptor_name + "_" + ligand_name + "/"
 
         if os.path.exists(results_path):
-            print("The docking between {0} and {1} has already been done.".format(receptor_name,
-                                                                                  ligand_name))
+            print("The docking between {0} and {1} has already been done.".format(receptor_name, ligand_name))
             return [None, results_path]
         receptor = Docker.create_receptor(receptor_name, receptor_file_path)
 
         # find ligand file and create ligand object
-        ligand_folder = '/DATA/HEX_API/HEX_SELECTED_LIGANDS/'
+        ligand_folder = "/DATA/HEX_API/HEX_SELECTED_LIGANDS/"
         ligand_file_found = False
 
         for ligand_file in os.listdir(ligand_folder):
-            if ligand_file[0] != '.' and len(ligand_file.split('.')) == 2 and \
-                ligand_file.split('.')[1] == 'sdf' and \
-                    ligand_file[:-4].lower() == ligand_name.lower():
+            if (
+                ligand_file[0] != "."
+                and len(ligand_file.split(".")) == 2
+                and ligand_file.split(".")[1] == "sdf"
+                and ligand_file[:-4].lower() == ligand_name.lower()
+            ):
                 ligand_file_found = True
-                ligand_file_path = ligand_folder + '/' + ligand_file
+                ligand_file_path = ligand_folder + "/" + ligand_file
                 ligand = Ligand(ligand_name, ligand_file_path)
 
         if not ligand_file_found:
@@ -697,10 +708,10 @@ def create_mapping_filtered(folder_path: str, results_path: str):
         sdf_files = os.listdir(folder_path)
         for file in sdf_files:
             if file[0] != "." and file[-4:] == ".sdf":
-                name = file[file.index("_") + 1:-4]
-                mapped_sdf.append({'value': file, 'text': name})
+                name = file[file.index("_") + 1 : -4]
+                mapped_sdf.append({"value": file, "text": name})
         json_file = results_path + "sdf_mapping_filtered.json"
-        with open(json_file, 'w') as file:
+        with open(json_file, "w") as file:
             file.write(json.dumps(mapped_sdf))
         return mapped_sdf
 
@@ -720,8 +731,8 @@ def create_mapping_unfiltered(self, folder_path: str, results_path: str):
             if file[0] != "." and file[-4:] == ".sdf":
                 names = self.get_substance_name(file, folder_path)
                 all_names = ",".join(names)
-                mapped_sdf.append({'value': file, 'text': all_names})
+                mapped_sdf.append({"value": file, "text": all_names})
         json_file = results_path + "sdf_mapping_unfiltered.json"
-        with open(json_file, 'w') as file:
+        with open(json_file, "w") as file:
             file.write(json.dumps(mapped_sdf))
         return mapped_sdf
diff --git a/tests/utils/test_docking_utils.py b/tests/utils/test_docking_utils.py
index d5e0d6c..7caf703 100644
--- a/tests/utils/test_docking_utils.py
+++ b/tests/utils/test_docking_utils.py
@@ -27,9 +27,9 @@ def test_complex_receptor_init(self):
         """
 
         monomers_list = ["A", "B"]
-        complex_receptor = ComplexReceptor("test_complex_receptor",
-                                           "tests/data/AF2_AT8G88888_complex.pdb",
-                                           monomers_list)
+        complex_receptor = ComplexReceptor(
+            "test_complex_receptor", "tests/data/AF2_AT8G88888_complex.pdb", monomers_list
+        )
         self.assertEqual(complex_receptor.name, "test_complex_receptor")
         self.assertEqual(complex_receptor.file_path, "tests/data/AF2_AT8G88888_complex.pdb")
         self.assertEqual(complex_receptor.monomers_list, monomers_list)
@@ -134,10 +134,10 @@ def test_docking_complex_results(self):
 
         self.assertIsInstance(normalized_results, dict)
         self.assertIsNot(len(normalized_results), 0)
-        self.assertIn('AT8G88888_complex_A', normalized_results)
-        self.assertIn('AT8G88888_complex_B', normalized_results)
-        self.assertIn('6325_Ethylene', normalized_results['AT8G88888_complex_A'])
-        self.assertIn('6325_Ethylene', normalized_results['AT8G88888_complex_B'])
+        self.assertIn("AT8G88888_complex_A", normalized_results)
+        self.assertIn("AT8G88888_complex_B", normalized_results)
+        self.assertIn("6325_Ethylene", normalized_results["AT8G88888_complex_A"])
+        self.assertIn("6325_Ethylene", normalized_results["AT8G88888_complex_B"])
 
     def test_docking_monomer_results(self):
         """Test that correct dictionary is created in normalized_results for
@@ -156,8 +156,8 @@ def test_docking_monomer_results(self):
 
         self.assertIsInstance(normalized_results, dict)
         self.assertIsNot(len(normalized_results), 0)
-        self.assertIn('AT9G99999_monomer', normalized_results)
-        self.assertIn('6325_Ethylene', normalized_results['AT9G99999_monomer'])
+        self.assertIn("AT9G99999_monomer", normalized_results)
+        self.assertIn("6325_Ethylene", normalized_results["AT9G99999_monomer"])
 
 
 class TestSDFMappingClass(unittest.TestCase):
@@ -167,7 +167,12 @@ def test_create_mapping_filtered(self):
         """Test that the correct mapping is returned"""
 
         mapping_results = SDFMapping.create_mapping_filtered("tests/data/sample_ligands/filtered/", "tests/data/")
-        correct_mapping = [{"value": "443453_Gibberellin_A15.sdf", "text": "Gibberellin_A15"}, {"value": "5984_D-(-)-Fructose.sdf", "text": "D-(-)-Fructose"}, {"value": "801_Auxin.sdf", "text": "Auxin"}, {"value": "73672_isoxaben.sdf", "text": "isoxaben"}]
+        correct_mapping = [
+            {"value": "443453_Gibberellin_A15.sdf", "text": "Gibberellin_A15"},
+            {"value": "5984_D-(-)-Fructose.sdf", "text": "D-(-)-Fructose"},
+            {"value": "801_Auxin.sdf", "text": "Auxin"},
+            {"value": "73672_isoxaben.sdf", "text": "isoxaben"},
+        ]
         self.assertEqual(mapping_results, correct_mapping)
         self.assertTrue(os.path.exists("tests/data/sdf_mapping_filtered.json"))
         if os.path.exists("tests/data/sdf_mapping_filtered.json"):
@@ -178,12 +183,23 @@ def test_create_mapping_unfiltered(self):
         """Test that the correct mapping is returned"""
         mapping = SDFMapping()
         mapping_results = mapping.create_mapping_unfiltered("tests/data/sample_ligands/unfiltered/", "tests/data/")
-        correct_mapping = [{"value": "135355153.sdf", "text": "F II (sugar fraction),LK41100000,NIOSH/LK4110000"}, {"value": "134970870.sdf", "text": "10597-68-9,149014-33-5,196419-06-4,3812-57-5,57-48-7,69-67-0,AI3-23514,Advantose FS 95,CCRIS 3335,D-(-)-Fructose,D-(-)-Levulose,D-Fructose,EINECS 200-333-3,Fructose,Fructose solution,Fructose, D-,Fructose, pure,Fruit sugar,Furucton,Hi-Fructo 970,Krystar 300,Levulose,Nevulose,Sugar, fruit,UNII-6YSS42VSEV,arabino-Hexulose"}, {"value": "103061392.sdf", "text": "C18210,Chorionic somatomammotropin hormone,PL,Placental lactogen"}, {"value": "135191341.sdf", "text": "73684-80-7,L-Leucinamide, 5-oxo-L-prolyl-L-seryl-,Pyr-ser-leu-NH2,Pyro-gln-ser-leu-amide,Pyroglutamine-serine-leucinamide,Pyroglutaminyl-seryl-leucinamide,Pyroglutamylserylleucinamide,Thyrotropin releasing hormone-AN,Trh-AN"}]
+        correct_mapping = [
+            {"value": "135355153.sdf", "text": "F II (sugar fraction),LK41100000,NIOSH/LK4110000"},
+            {
+                "value": "134970870.sdf",
+                "text": "10597-68-9,149014-33-5,196419-06-4,3812-57-5,57-48-7,69-67-0,AI3-23514,Advantose FS 95,CCRIS 3335,D-(-)-Fructose,D-(-)-Levulose,D-Fructose,EINECS 200-333-3,Fructose,Fructose solution,Fructose, D-,Fructose, pure,Fruit sugar,Furucton,Hi-Fructo 970,Krystar 300,Levulose,Nevulose,Sugar, fruit,UNII-6YSS42VSEV,arabino-Hexulose",
+            },
+            {"value": "103061392.sdf", "text": "C18210,Chorionic somatomammotropin hormone,PL,Placental lactogen"},
+            {
+                "value": "135191341.sdf",
+                "text": "73684-80-7,L-Leucinamide, 5-oxo-L-prolyl-L-seryl-,Pyr-ser-leu-NH2,Pyro-gln-ser-leu-amide,Pyroglutamine-serine-leucinamide,Pyroglutaminyl-seryl-leucinamide,Pyroglutamylserylleucinamide,Thyrotropin releasing hormone-AN,Trh-AN",
+            },
+        ]
         self.assertEqual(mapping_results, correct_mapping)
         self.assertTrue(os.path.exists("tests/data/sdf_mapping_unfiltered.json"))
         if os.path.exists("tests/data/sdf_mapping_unfiltered.json"):
             os.remove("tests/data/sdf_mapping_unfiltered.json")
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()

From e2b8045dce61b14b8887c1ebb361e819915533b4 Mon Sep 17 00:00:00 2001
From: asherpasha <asher.pasha@utoronto.ca>
Date: Thu, 25 Apr 2024 20:13:21 -0400
Subject: [PATCH 28/35] Minor updates.

---
 api/utils/docking_utils.py        | 15 ++++++++++++---
 tests/utils/test_docking_utils.py |  4 ----
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/api/utils/docking_utils.py b/api/utils/docking_utils.py
index f8cc951..024be5f 100755
--- a/api/utils/docking_utils.py
+++ b/api/utils/docking_utils.py
@@ -148,8 +148,6 @@ def hex_docking(self):
         ).communicate(bytes(hex_command.encode("utf-8")))
         hex_output_file.close()
         ct = datetime.datetime.now()
-        print("current time:-", ct)
-        print("Hex docking completed")
 
     def crte_ligand_reserved_attr(self):
         """This function populates the Docking instance's ligand_reserved_list attribute
@@ -160,14 +158,17 @@ def crte_ligand_reserved_attr(self):
         it begins at line 1499, and so on ...
         """
         line_numbers = []
+
         for filename in os.listdir(self.results_path):
             if filename[-3:] == "pdb":
                 file = open(self.results_path + filename, "r")
                 lines = file.readlines()
+
                 for i in range(len(lines)):
                     if "Docked ligand coordinates..." in lines[i]:
                         line_numbers.append(i)
                         break
+
         self.ligand_reserved_list = line_numbers
 
     def parse_hex_output(self):
@@ -181,28 +182,35 @@ def parse_hex_output(self):
         # line number where the clustering starts and ends
         result_start = 0
         result_end = 0
+
         for i in range(len(lines)):
             splitted_line = lines[i].split(" ")
             if len(splitted_line) > 8 and splitted_line[0] == "Clst":
                 result_start = i + 2
             if len(splitted_line) > 2 and "save_range" in splitted_line:
                 result_end = i - 2
+
         clustering_lines = lines[result_start:result_end]
         clusters = {}
         clusters["num_soln"] = len(clustering_lines)
+
         for line in clustering_lines:
             cleaned_line = line.strip().split(" ")
             res = []
+
             # only keep non-blank items in line
             for ch in cleaned_line:
                 if ch != "":
                     res.append(ch)
+
             clst = int(res[0])
             sln = int(res[1])
+
             if clst not in clusters:
                 clusters[clst] = [sln]
             else:
                 clusters[clst].append(sln)
+
         return clusters
 
     def result_dict_generator(self, monomer_number, threshold):
@@ -228,8 +236,8 @@ def result_dict_generator(self, monomer_number, threshold):
         reference = {}
         for line in receptor_file_lines:
             splitted_line = line.split()
-            if line[0:4] == "ATOM":
 
+            if line[0:4] == "ATOM":
                 # check if chain name and residue are in the same column, e.g. A1000
                 if re.search(r"\d", splitted_line[4]) is None:
                     residue = splitted_line[5]
@@ -255,6 +263,7 @@ def result_dict_generator(self, monomer_number, threshold):
         # The energy for each reference element will be stored in dictionary 'ac'
         ac = {}
         result_list = []
+
         for filename in os.listdir(self.results_path):
             if filename[-3:] == "pdb":
                 result_list.append(filename)
diff --git a/tests/utils/test_docking_utils.py b/tests/utils/test_docking_utils.py
index 7caf703..22b8f8d 100644
--- a/tests/utils/test_docking_utils.py
+++ b/tests/utils/test_docking_utils.py
@@ -199,7 +199,3 @@ def test_create_mapping_unfiltered(self):
         self.assertTrue(os.path.exists("tests/data/sdf_mapping_unfiltered.json"))
         if os.path.exists("tests/data/sdf_mapping_unfiltered.json"):
             os.remove("tests/data/sdf_mapping_unfiltered.json")
-
-
-if __name__ == "__main__":
-    unittest.main()

From 2c4bace4a7ce5706b9241f3a6d43116279f42b99 Mon Sep 17 00:00:00 2001
From: asherpasha <asher.pasha@utoronto.ca>
Date: Thu, 25 Apr 2024 20:20:00 -0400
Subject: [PATCH 29/35] Should work this time.

---
 api/utils/docking_utils.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/api/utils/docking_utils.py b/api/utils/docking_utils.py
index 024be5f..68987ff 100755
--- a/api/utils/docking_utils.py
+++ b/api/utils/docking_utils.py
@@ -148,6 +148,8 @@ def hex_docking(self):
         ).communicate(bytes(hex_command.encode("utf-8")))
         hex_output_file.close()
         ct = datetime.datetime.now()
+        print("current time:-", ct)
+        print("Hex docking completed")
 
     def crte_ligand_reserved_attr(self):
         """This function populates the Docking instance's ligand_reserved_list attribute

From 37a3a040b75dee39e2c0f069c370b3a65690d80f Mon Sep 17 00:00:00 2001
From: asherpasha <asher.pasha@utoronto.ca>
Date: Thu, 25 Apr 2024 20:26:03 -0400
Subject: [PATCH 30/35] Updated dependancies.

---
 requirements.txt | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 2af6bf4..3de5643 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,31 +1,31 @@
 aniso8601==9.0.1
 async-timeout==4.0.3
 attrs==23.2.0
-black==24.3.0
+black==24.4.1
 blinker==1.7.0
 cachelib==0.9.0
 certifi==2024.2.2
 charset-normalizer==3.3.2
 click==8.1.7
-coverage==7.4.4
+coverage==7.5.0
 Deprecated==1.2.14
 flake8==7.0.0
-Flask==3.0.2
+Flask==3.0.3
 Flask-Caching==2.1.0
 Flask-Cors==4.0.0
-Flask-Limiter==3.5.1
-flask-marshmallow==1.2.0
+Flask-Limiter==3.6.0
+flask-marshmallow==1.2.1
 flask-restx==1.3.0
 Flask-SQLAlchemy==3.1.1
 greenlet==3.0.3
-idna==3.6
-importlib_resources==6.3.1
+idna==3.7
+importlib_resources==6.4.0
 iniconfig==2.0.0
-itsdangerous==2.1.2
+itsdangerous==2.2.0
 Jinja2==3.1.3
 jsonschema==4.21.1
 jsonschema-specifications==2023.12.1
-limits==3.10.1
+limits==3.11.0
 markdown-it-py==3.0.0
 MarkupSafe==2.1.5
 marshmallow==3.21.1
@@ -36,8 +36,8 @@ mysqlclient==2.2.4
 ordered-set==4.1.0
 packaging==24.0
 pathspec==0.12.1
-platformdirs==4.2.0
-pluggy==1.4.0
+platformdirs==4.2.1
+pluggy==1.5.0
 pycodestyle==2.11.1
 pyflakes==3.2.0
 Pygments==2.17.2
@@ -45,14 +45,14 @@ pyrsistent==0.20.0
 pytest==8.1.1
 python-dateutil==2.9.0.post0
 pytz==2024.1
-redis==5.0.3
-referencing==0.34.0
+redis==5.0.4
+referencing==0.35.0
 requests==2.31.0
 rich==13.7.1
 rpds-py==0.18.0
 six==1.16.0
-SQLAlchemy==2.0.28
-typing_extensions==4.10.0
+SQLAlchemy==2.0.29
+typing_extensions==4.11.0
 urllib3==2.2.1
-Werkzeug==3.0.1
+Werkzeug==3.0.2
 wrapt==1.16.0

From 114b5e43331cd8d3feff50d416d213ad277b9e9d Mon Sep 17 00:00:00 2001
From: asherpasha <asher.pasha@utoronto.ca>
Date: Thu, 25 Apr 2024 20:31:56 -0400
Subject: [PATCH 31/35] Update GitHub Actions and Docker stuff (not tested).

---
 .github/workflows/bar-api.yml | 2 +-
 docker-compose.yml            | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/bar-api.yml b/.github/workflows/bar-api.yml
index abd298e..ddc8dfb 100644
--- a/.github/workflows/bar-api.yml
+++ b/.github/workflows/bar-api.yml
@@ -13,7 +13,7 @@ jobs:
     runs-on: ubuntu-22.04
     strategy:
       matrix:
-        python-version: [3.8, 3.9, 3.10.13, 3.11, 3.12]
+        python-version: [3.8, 3.9, 3.10.14, 3.11, 3.12]
 
     services:
       redis:
diff --git a/docker-compose.yml b/docker-compose.yml
index a2fea40..e723e92 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -3,7 +3,7 @@ version: "3.7"
 services:
 
   mysqldb:
-    image: mysql:8.1.0
+    image: mysql:8.3.0
     container_name: BAR_mysqldb
     # Must use this for mariadb client to connect
     command: --default-authentication-plugin=mysql_native_password
@@ -12,7 +12,7 @@ services:
       - MYSQL_ROOT_PASSWORD=root
 
   redis:
-    image: redis:7.2.1
+    image: redis:7.2.4
     container_name: BAR_redis
     restart: always
     ports:

From 02e644696bfba477ab4ea9e844da3808a0cfc339 Mon Sep 17 00:00:00 2001
From: asherpasha <asher.pasha@utoronto.ca>
Date: Thu, 25 Apr 2024 20:46:07 -0400
Subject: [PATCH 32/35] Working on readthedocs.

---
 docs/source/conf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/conf.py b/docs/source/conf.py
index 96e422b..735e5c9 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -18,7 +18,7 @@
 # -- Project information -----------------------------------------------------
 
 project = "BAR API"
-copyright = "2023, BAR Developers"
+copyright = "2024, BAR Developers"
 author = "BAR Developers"
 
 # The full version, including alpha/beta/rc tags

From f16eb4e7cb8540bb0e96fc5540564d5e016afdb3 Mon Sep 17 00:00:00 2001
From: asherpasha <asher.pasha@utoronto.ca>
Date: Thu, 25 Apr 2024 20:47:21 -0400
Subject: [PATCH 33/35] Added readthedocs.yaml

---
 .readthedocs.yaml | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)
 create mode 100644 .readthedocs.yaml

diff --git a/.readthedocs.yaml b/.readthedocs.yaml
new file mode 100644
index 0000000..789ff29
--- /dev/null
+++ b/.readthedocs.yaml
@@ -0,0 +1,32 @@
+# Read the Docs configuration file for Sphinx projects
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+# Required
+version: 2
+
+# Set the OS, Python version and other tools you might need
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "3.12"
+
+# Build documentation in the "docs/" directory with Sphinx
+sphinx:
+  configuration: docs/source/conf.py
+  # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs
+  # builder: "dirhtml"
+  # Fail on all warnings to avoid broken references
+  fail_on_warning: true
+
+# Optionally build your docs in additional formats such as PDF and ePub
+formats:
+  - pdf
+
+# Optional but recommended, declare the Python requirements required
+# to build your documentation
+# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
+python:
+  install:
+    - requirements: docs/requirements.txt
+    - method: pip
+      path: .

From 50bacc68eaa26fc86a4af5747ce505fdb05c9f85 Mon Sep 17 00:00:00 2001
From: asherpasha <asher.pasha@utoronto.ca>
Date: Thu, 25 Apr 2024 20:59:07 -0400
Subject: [PATCH 34/35] Working on documentation.

---
 .readthedocs.yaml     |  3 +--
 docs/requirements.txt | 42 +++++++++++++++++++++---------------------
 2 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index 789ff29..98f605d 100644
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -21,6 +21,7 @@ sphinx:
 # Optionally build your docs in additional formats such as PDF and ePub
 formats:
   - pdf
+  - epub
 
 # Optional but recommended, declare the Python requirements required
 # to build your documentation
@@ -28,5 +29,3 @@ formats:
 python:
   install:
     - requirements: docs/requirements.txt
-    - method: pip
-      path: .
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 91f7292..8b37131 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,27 +1,27 @@
-alabaster==0.7.13
-Babel==2.12.1
-beautifulsoup4==4.12.2
-certifi==2023.7.22
-charset-normalizer==3.2.0
-docutils==0.20.1
-furo==2023.8.19
-idna==3.4
+alabaster==0.7.16
+Babel==2.14.0
+beautifulsoup4==4.12.3
+certifi==2024.2.2
+charset-normalizer==3.3.2
+docutils==0.21.2
+furo==2024.1.29
+idna==3.7
 imagesize==1.4.1
-Jinja2==3.1.2
-MarkupSafe==2.1.3
-packaging==23.1
-Pygments==2.16.1
-pytz==2023.3
+Jinja2==3.1.3
+MarkupSafe==2.1.5
+packaging==24.0
+Pygments==2.17.2
+pytz==2024.1
 requests==2.31.0
 snowballstemmer==2.2.0
-soupsieve==2.4.1
-Sphinx==7.2.4
+soupsieve==2.5
+Sphinx==7.3.7
 sphinx-basic-ng==1.0.0b1
 sphinx-copybutton==0.5.2
-sphinxcontrib-applehelp==1.0.7
-sphinxcontrib-devhelp==1.0.5
-sphinxcontrib-htmlhelp==2.0.4
+sphinxcontrib-applehelp==1.0.8
+sphinxcontrib-devhelp==1.0.6
+sphinxcontrib-htmlhelp==2.0.5
 sphinxcontrib-jsmath==1.0.1
-sphinxcontrib-qthelp==1.0.6
-sphinxcontrib-serializinghtml==1.1.9
-urllib3==2.0.4
+sphinxcontrib-qthelp==1.0.7
+sphinxcontrib-serializinghtml==1.1.10
+urllib3==2.2.1

From 1a9ac7c556955d7e4972de8b8b39581db9db2246 Mon Sep 17 00:00:00 2001
From: asherpasha <asher.pasha@utoronto.ca>
Date: Thu, 25 Apr 2024 21:01:16 -0400
Subject: [PATCH 35/35] Don't fail on warning.

---
 .readthedocs.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index 98f605d..a7ebce6 100644
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -16,7 +16,7 @@ sphinx:
   # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs
   # builder: "dirhtml"
   # Fail on all warnings to avoid broken references
-  fail_on_warning: true
+  # fail_on_warning: true
 
 # Optionally build your docs in additional formats such as PDF and ePub
 formats: