Skip to content

Commit

Permalink
Merge pull request #1 from delemottelab/more-hyper-params
Browse files Browse the repository at this point in the history
More hyper params
  • Loading branch information
oliverfleetwood authored Dec 9, 2019
2 parents ad870fe + 980da3b commit f0ed06c
Show file tree
Hide file tree
Showing 8 changed files with 477 additions and 196 deletions.
252 changes: 252 additions & 0 deletions ChangePDB.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,252 @@
import argparse
import sys
import numpy as np
import re
import matplotlib.pyplot as plotter
import os

parser = argparse.ArgumentParser(epilog='Change PDB files, eg the beta field. Annie Westerlund 2017.');
parser.add_argument('-top','--topology_file',help='Input 1 topology file (.gro, .pdb, etc)',type=str,default='');
parser.add_argument('-beta','--change_beta',help='Flag for writing beta of residues (optional).',action='store_true');
parser.add_argument('-hel_beta','--change_helix_beta',help='Flag for writing beta of helices (optional).',action='store_true');
parser.add_argument('-rename','--rename_atoms',help='Flag for renaming residues from CHARMM-gui to Gromacs (optional).',action='store_true');

parser.add_argument('-renumber','--renumber_residues',help='Flag for renumbering residues from sequential to chain numbering. Need to enter number of chains or set first resID. (optional)',action='store_true');
parser.add_argument('-n_chains','--number_of_chains',help='Number of chains to use when renumbering the residues',default=1);
parser.add_argument('-first_resid','--first_resID',help='Start resID in the structure.',default=1);

parser.add_argument('-f','--in_file',help='Input file.',default='');
parser.add_argument('-f_conv','--helix_ind_conv_file',help='File containing residue number of helices (optional).',default=0);
parser.add_argument('-fe','--file_end_name',type=str,help='Output file end name (optional)', default='');
parser.add_argument('-od','--out_directory',type=str,help='The directory where data should be saved (optional)',default='');
parser.add_argument('-o','--out_file',help='File name (optional).',default='structure');

args = parser.parse_args();

# Put / at end of out directory if not present. Check so that folder extsts, otherwise construct it.
if args.out_directory !='':
if args.out_directory[-1] != '/':
args.out_directory += '/';

if not os.path.exists(args.out_directory):
os.makedirs(args.out_directory);

# Change residue names
if args.rename_atoms:
print('Changing residue names to match gromacs names');
fID1 = open(args.topology_file,'r');
fID2 = open(args.out_directory + args.out_file + '_gmx_' + args.file_end_name+'.pdb','w');
first_line = True;

for line in fID1:
if line[0:3] == 'END':
continue;

if first_line:
first_line = False;
fID2.write(line);
else:
atom = line[13:17];
residue = line[17:21];

#Change the names of atoms and residues
if atom == 'CL ':
atom = 'CLA ';
residue = 'CLA ';
elif atom == 'K ':
atom = 'POT ';
residue = 'POT ';
elif atom == 'OH2 ':
atom = 'OW ';
residue = 'SOL ';
elif atom == 'H1 ':
atom = 'HW1 ';
residue = 'SOL ';
elif atom == 'H2 ':
atom = 'HW2 ';
residue = 'SOL ';

tmpLine = line[0:13] + atom + residue + line[21::];
fID2.write(tmpLine);

fID1.close();
fID2.close();


# Change beta columns general
if args.change_beta:
print('Changing beta column ' + args.file_end_name);
values = np.loadtxt(args.in_file);
print('Max value: '+str(np.max(values)));
print('Min value: '+str(np.min(values)));


counter = 0;
resid_counter = -1;
current_resid = -1;
next_resid_write = -1;
fID1 = open(args.topology_file,'r');
fID2 = open(args.out_directory + args.out_file + '_beta_'+ args.file_end_name+'.pdb','w');
first_line = True;

for line in fID1:
if line[0:3] == 'END':
continue;

if first_line:
first_line = False;
fID2.write(line);
else:
if line[0:3] == 'END' or line[0:3]=='TER':
continue;
resid = int(line[23:26]);

if resid != current_resid:
current_resid = resid;
resid_counter += 1;

if resid_counter < len(values):
fID2.write(line[0:60]);
fID2.write(' ');
fID2.write('%.3f' % values[resid_counter]);
fID2.write(line[66::]);
else:
fID2.write(line[0:60]);
fID2.write(' ');
fID2.write('0.00');
fID2.write(line[66::]);


fID1.close();
fID2.close();

# Change beta column of helices
if args.change_helix_beta:
print('Changing beta column for each helix');
helix_resid_convert_ind = np.loadtxt(args.helix_ind_conv_file);

values = np.loadtxt(args.in_file);
values -= np.min(values);
values /= np.max(values);
values = np.floor(1000*values)/100;

counter = 0;
resid_counter = -1;
current_resid = -1;
current_resid_min = helix_resid_convert_ind[counter,0];
current_resid_max = helix_resid_convert_ind[counter,1];
fID1 = open(args.topology_file,'r');
fID2 = open(args.out_directory + args.out_file + '_helix_beta_'+ args.file_end_name+'.pdb','w');
first_line = True;

for line in fID1:
if line[0:3] == 'END' or line[0:3]=='TER':
continue;

if first_line:
first_line = False;
fID2.write(line);
else:
resid = int(line[23:26]);
if resid != current_resid:
current_resid = resid;
resid_counter += 1;

if resid_counter >= current_resid_min and resid_counter <= current_resid_max:
fID2.write(line[0:62]);
fID2.write(str(values[counter]));
fID2.write(line[66::]);
else:
fID2.write(line[0:62]);
fID2.write('0.00');
fID2.write(line[66::]);
if resid_counter > current_resid_max:
counter += 1;
if counter < len(helix_resid_convert_ind[::,0]):
current_resid_min = helix_resid_convert_ind[counter,0];
current_resid_max = helix_resid_convert_ind[counter,1];
else:
current_resid_min = -1;
current_resid_max = -1;


fID1.close();
fID2.close();

if args.renumber_residues:

chain_letters = 'ABCDEFGHIJKLMNOPQRSTUVXYZ'

n_chains = float(args.number_of_chains);
first_resid = int(args.first_resID);

fID1 = open(args.topology_file,'r');
fID2 = open(args.out_directory + args.out_file + '_renumbered_'+ args.file_end_name+'.pdb','w');

lines = [];
# Collect all lines
for line in fID1:
lines.append(line);

# Count the number of residues
counter = 0;
resid_counter = 0;
current_resid = -1;
first_line = True;


for i in range(1,len(lines)):
line = lines[i];

if line[0:3] == 'END' or line[0:3]=='TER':
continue;


resid = int(line[23:26]);
if resid != current_resid:
current_resid = resid;
resid_counter += 1;

# Compute total number of residues per chain
nResiduesPerChain = np.floor(resid_counter/n_chains);

# Renumber residues
counter = 0;
resid_counter = 0;
current_resid = -1;
first_line = True;
new_resid = first_resid-1;
chain_id = 0;

for i in range(0,len(lines)):
line = lines[i];
if line[0:3] == 'END' or line[0:3]=='TER':
fID2.write(line);
continue;

if first_line:
first_line = False;
fID2.write(line);
continue;

resid = int(line[22:26]);
if resid != current_resid:
current_resid = resid;
resid_counter += 1;
new_resid += 1;
if resid_counter > nResiduesPerChain:
new_resid = first_resid;
resid_counter = 1;
chain_id += 1;
fID2.write('TER\n');

if new_resid < 10:
fID2.write(line[0:21]+chain_letters[chain_id]+' '+str(new_resid)+line[26::]);
elif new_resid < 100:
fID2.write(line[0:21]+chain_letters[chain_id]+' '+str(new_resid)+line[26::]);
else:
fID2.write(line[0:21]+chain_letters[chain_id]+' '+str(new_resid)+line[26::]);

print(nResiduesPerChain)


22 changes: 14 additions & 8 deletions benchmarking/computing.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ def compute(extractor_type,
overwrite=False,
accuracy_method='mse',
displacement=1e-1,
visualize=True,
visualize=False,
natoms=100,
noise_level=1e-2, # [1e-2, 1e-2, 2e-1, 2e-1],
output_dir="output/benchmarking/"):
"""
Expand All @@ -49,31 +50,36 @@ def compute(extractor_type,
n_iterations=n_iterations)
n_extractors = len(extractor_names)
for iter in range(iterations_per_model):
modeldir = "{output_dir}/{extractor_type}/{feature_type}/{test_model}/noise-{noise_level}/iter-{iter}/".format(
modeldir = "{output_dir}/{extractor_type}/{feature_type}/{test_model}/noise-{noise_level}/atoms-{natoms}/iter-{iter}/".format(
output_dir=output_dir,
extractor_type=extractor_type,
feature_type=feature_type,
test_model=test_model,
noise_level=noise_level,
iter=iter)
natoms=natoms,
iter=iter
)

finished_extractors = []
for name in extractor_names:
if not overwrite and os.path.exists(modeldir):
if os.path.exists(modeldir):
filepath = "{}/{}/importance_per_residue.npy".format(modeldir, name)
existing_files = glob.glob(filepath)
if len(existing_files) > 0:
if overwrite and len(existing_files) > 0:
logger.debug("File %s already exists. overwriting", existing_files[0])
elif len(existing_files) > 0:
logger.debug("File %s already exists. skipping computations", existing_files[0])
finished_extractors.append(name)
else:
logger.debug("File %s does not exists. performing computations", filepath)
else:
os.makedirs(modeldir)
needs_computations = len(finished_extractors) < n_extractors
dg = DataGenerator(natoms=100,
atoms_per_cluster = max(int(natoms / 10 + 0.5), 1)
dg = DataGenerator(natoms=natoms,
nclusters=3,
natoms_per_cluster=[10, 10, 10],
nframes_per_cluster=1200 if needs_computations else 2,
natoms_per_cluster=[atoms_per_cluster, atoms_per_cluster, atoms_per_cluster],
nframes_per_cluster=12 * natoms if needs_computations else 2,
# Faster generation for postprocessing purposes when we don't need the frames
test_model=test_model,
noise_natoms=None,
Expand Down
Loading

0 comments on commit f0ed06c

Please sign in to comment.