Skip to content

Commit

Permalink
new tuning script for smad
Browse files Browse the repository at this point in the history
  • Loading branch information
antoineBarbez committed Apr 20, 2019
1 parent 07ad6b9 commit f998f5e
Show file tree
Hide file tree
Showing 90 changed files with 244 additions and 39 deletions.
Binary file modified .DS_Store
Binary file not shown.
Binary file modified data/.DS_Store
Binary file not shown.
Binary file modified data/antipatterns/.DS_Store
Binary file not shown.
Empty file.
20 changes: 11 additions & 9 deletions experiments/training/train_smad.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,16 @@ def parse_args():
parser.add_argument('-dense_sizes', nargs='+', type=int, help="The sizes of each (dense) hidden layer in the network.")
parser.add_argument("-n_net", type=int, default=10, help="The number of distinct networks to be trained and saved.")
parser.add_argument("-n_step", type=int, default=300, help="The number of training steps.")
parser.add_argument("-decay_step", type=int, default=300, help="The number of training steps after which the learning rate is decayed")
parser.add_argument("-decay_step", type=int, default=100, help="The number of training steps after which the learning rate is decayed")
parser.add_argument("-lr_decay", type=float, default=0.5, help="The factor by which the learning rate is multiplied every 'decay_step' steps")
return parser.parse_args()

# Get the path of a trained model
def get_save_path(antipattern, net_number):
return os.path.join(ROOT_DIR, 'neural_networks', 'smad', 'trained_models', antipattern, 'network' + str(net_number))
def get_save_path(antipattern, test_system, net_number):
directory = os.path.join(ROOT_DIR, 'neural_networks', 'smad', 'trained_models', antipattern, test_system)
if not os.path.exists(directory):
os.makedirs(directory)
return os.path.join(directory, 'network' + str(net_number))

def build_dataset(antipattern, systems):
input_size = {'god_class':8, 'feature_envy':9}
Expand All @@ -60,14 +63,13 @@ def train(session, model, x_train, y_train, x_test, y_test, num_step, start_lr,
feed_dict_train = {
model.input_x: x_train,
model.input_y: y_train,
model.training: True,
model.learning_rate:learning_rate,
model.beta:beta}

session.run(model.learning_step, feed_dict=feed_dict_train)

loss_train = session.run(model.loss, feed_dict={model.input_x:x_train, model.input_y:y_train, model.training: False})
loss_test = session.run(model.loss, feed_dict={model.input_x:x_test, model.input_y:y_test, model.training: False})
loss_train = session.run(model.loss, feed_dict={model.input_x:x_train, model.input_y:y_train})
loss_test = session.run(model.loss, feed_dict={model.input_x:x_test, model.input_y:y_test})
losses_train.append(loss_train)
losses_test.append(loss_test)
return losses_train, losses_test
Expand All @@ -83,7 +85,7 @@ def train(session, model, x_train, y_train, x_test, y_test, num_step, start_lr,
# Create model
model = md.SMAD(
shape=args.dense_sizes,
input_size=x_train[0].shape[-1])
input_size=x_train.shape[-1])

# To save and restore a trained model
saver = tf.train.Saver(max_to_keep=args.n_net)
Expand Down Expand Up @@ -116,13 +118,13 @@ def train(session, model, x_train, y_train, x_test, y_test, num_step, start_lr,
all_losses_test.append(losses_test)

# Save the model
saver.save(sess=session, save_path=get_save_path(args.antipattern, i))
saver.save(sess=session, save_path=get_save_path(args.antipattern, args.test_system, i))


# Compute the ensemble prediction on the test system
ensemble_prediction = nnUtils.ensemble_prediction(
model=model,
save_paths=[get_save_path(args.antipattern, i) for i in range(args.n_net)],
save_paths=[get_save_path(args.antipattern, args.test_system, i) for i in range(args.n_net)],
input_x=x_test)

# Print Ensemble performances
Expand Down
9 changes: 9 additions & 0 deletions experiments/tuning/context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import os
import sys

ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))
sys.path.insert(0, ROOT_DIR)

import utils.nnUtils as nnUtils
import experiments.training.train_smad as train_smad
import neural_networks.smad.model as md
101 changes: 101 additions & 0 deletions experiments/tuning/results/smad_god_class_jedit.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
Learning rate;Beta;Dense sizes;F-measure
0.187625311004;0.14675747497;[37];0.5205479452054794
0.242540118664;0.111539495108;[92, 30, 7];0.5165562913907285
0.0576728013128;0.0279349021;[73, 22, 5];0.5151515151515151
0.0518786345733;0.139677933123;[86, 83, 27];0.5142857142857143
0.0673408441308;0.0673230293947;[25, 8, 6];0.5070422535211269
0.0267080986256;0.0152451192183;[93, 26, 8];0.5033112582781457
0.0311032954781;0.0386569620065;[100, 100];0.5
0.213815471538;0.0304431998344;[28, 8, 8];0.4971098265895954
0.41141073225;0.00935042747354;[42, 36];0.49664429530201337
0.0767772225021;0.290650469376;[50, 46];0.49275362318840576
0.523062412575;0.00359283334364;[37, 19];0.48854961832061067
0.41922727536;0.0207480660613;[70, 26, 25];0.488095238095238
0.226675137981;0.0677445110781;[46];0.48780487804878053
0.233111227278;0.0109480536631;[16, 8];0.48366013071895425
0.300110153836;0.0825943663458;[91, 47, 27];0.48322147651006714
0.0327801077527;0.00397447772989;[88, 86, 82];0.48226950354609927
0.0500852749125;0.0421708196263;[84, 34];0.48226950354609927
0.157904542009;0.0489894863328;[93, 35, 20];0.481203007518797
0.235254308971;0.129223814791;[99, 6, 5];0.4806201550387597
0.349744734361;0.0895465088309;[61, 12];0.4696969696969697
0.85599017676;0.0049801835805;[68, 63, 19];0.4600000000000001
0.23820006473;0.00337728335754;[55];0.45925925925925926
0.696660351954;0.00605014468883;[90];0.45833333333333337
0.521710749067;0.0531599404738;[32];0.4578313253012048
0.581053195198;0.0065973177479;[95, 92, 72];0.45614035087719296
0.569910465955;0.012661999135;[42, 16, 10];0.4512820512820513
0.0849450790357;0.193346121571;[38, 8, 6];0.448
0.514930761237;0.116705112524;[39, 31];0.44670050761421315
0.20066702827;0.016933463796;[86, 6];0.4444444444444445
0.430029378442;0.0146752953346;[19, 13];0.4444444444444445
0.677078288941;0.116114004774;[96];0.4419889502762431
0.0387443705299;0.0952209864936;[64, 14, 7];0.43678160919540227
0.159563120414;0.0107310916222;[21, 20, 7];0.4264705882352941
0.367522228601;0.0041977616693;[17, 11, 6];0.4197530864197531
0.36551226629;0.0210275757725;[61];0.41666666666666663
0.0672510156293;0.265729230812;[15];0.4097560975609756
0.632926800839;0.200424526599;[44];0.38862559241706157
0.0771137301817;0.140917366343;[20];0.3374485596707819
0.0675845526487;0.575567813039;[15];0.3274021352313167
0.0315757784368;0.00873004644068;[29];0.3047619047619048
0.0806591303696;0.0299125069307;[30];0.29365079365079366
0.0358852263939;0.0077475104817;[30, 5];0.2336182336182336
0.0185634461312;0.94839319324;[46, 39];0.2292993630573248
0.00997371678202;0.00692451846682;[99, 90];0.19659735349716445
0.0172259419392;0.773520904708;[57];0.19063545150501673
0.0192220822374;0.0074529785417;[28];0.18791946308724833
0.00775506126011;0.208671948166;[72, 70, 41];0.17812499999999998
0.025782841518;0.263117989012;[48];0.15384615384615383
0.20164635998;0.00904945612594;[4];0.1491395793499044
0.0146668968654;0.132363178673;[54, 36];0.14825174825174825
0.0244971929407;0.0377305752096;[24, 9, 8];0.1364221364221364
0.043488731109;0.198525517026;[38, 6, 6];0.13553719008264464
0.0115054498015;0.0456030013482;[84];0.1322314049586777
0.0161597814023;0.75952092995;[25];0.12817679558011047
0.0291452447242;0.18302832017;[7];0.12468827930174566
0.0169354916449;0.18872179022;[37, 23];0.12311265969802555
0.0567147888183;0.00337946451529;[32, 18];0.1174863387978142
0.020165766387;0.198471337686;[40];0.1101511879049676
0.0288530570004;0.0232640207533;[21];0.10477299185098952
0.00637426286374;0.212162122731;[46, 22, 21];0.09975062344139651
0.488728898928;0.239170015427;[34, 18];0.09780439121756489
0.00881665610108;0.015779562476;[79, 21];0.09760425909494232
0.00629958971871;0.0807374795471;[73, 27];0.09741550695825049
0.0139390457484;0.0113725931467;[28, 25];0.09573542210617929
0.00353404588975;0.0184318170263;[89, 8];0.09416581371545546
0.0120791978773;0.183021301698;[66, 16];0.09075342465753423
0.00620167083462;0.207947728748;[95, 34];0.08849557522123895
0.0118407887201;0.00935544694392;[65, 45];0.0855614973262032
0.0055747321968;0.187588683955;[30, 25];0.08548707753479126
0.0330444112935;0.0450198146887;[4, 4];0.08482142857142858
0.00887710338536;0.0139892514367;[29, 27, 25];0.08235294117647057
0.00813959647552;0.00326927246043;[52, 35, 14];0.07971014492753625
0.0564239852881;0.0125519862372;[7, 4];0.07518796992481203
0.00927245329718;0.224079957675;[44];0.07396664249456128
0.038134999858;0.00428911519924;[10];0.0694980694980695
0.00553237978431;0.0223508358307;[96, 9, 7];0.06744666207845837
0.0257345627096;0.0320198266225;[16, 5];0.06525285481239804
0.0061013365831;0.00438961751565;[24, 13, 9];0.061538461538461535
0.450022324106;0.449015783908;[31];0.06070826306913996
0.010338295358;0.00464760878807;[83, 5];0.058452722063037255
0.00790175250821;0.00661985365521;[29, 12, 8];0.05751633986928104
0.0183609220109;0.0218678540961;[13];0.05656819610307982
0.170432453544;0.22164653729;[15, 13, 6];0.05593607305936072
0.00440426281999;0.0187248538163;[40, 30, 21];0.05402542372881356
0.014217128222;0.017224569319;[9];0.05366492146596858
0.00455953923729;0.213867004434;[42, 37, 17];0.053128689492325853
0.00932510571364;0.100131659847;[13];0.04398689751988769
0.00756230203124;0.0443855465015;[8];0.040201005025125636
0.00511983439921;0.00328902664239;[90, 39];0.04008908685968819
0.489651305922;0.280614674322;[9, 5, 4];0.03248924988055423
0.060735594726;0.978996697146;[86, 38, 13];0.03248924988055423
0.0938976874343;0.659811776069;[85, 15];0.03248924988055423
0.105161372625;0.735831832433;[15, 12];0.03248924988055423
0.161824541463;0.930964885054;[61];0.03248924988055423
0.485893474095;0.775913010563;[55];0.03248924988055423
0.508516969085;0.372844508898;[31, 22, 11];0.03248924988055423
0.48969476847;0.504910514286;[17];0.03248924988055423
0.00611084309339;0.0214893186361;[63];0.03002421307506053
0.00390184507626;0.0292952263152;[22, 18];0.026812313803376366
0.00339056311938;0.00371395072775;[11, 10];0.011393847322445878
113 changes: 113 additions & 0 deletions experiments/tuning/tune_smad.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
from context import ROOT_DIR, nnUtils, train_smad, md

import tensorflow as tf
import numpy as np

import argparse
import os
import progressbar
import random
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'

def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("antipattern", help="Either 'god_class' or 'feature_envy'")
parser.add_argument("test_system", help="The name of the system to be used for testing.\n Hence, the cross-validation will be performed using all the systems except this one.")
parser.add_argument("-n_fold", type=int, default=5, help="Number of folds (k) for a k-fold-cross-validation")
parser.add_argument("-n_step", type=int, default=100, help="Number of training steps (i.e., epochs) to be performed for each fold")
parser.add_argument("-n_test", type=int, default=100, help="Number of random hyper-parameters sets to be tested")
return parser.parse_args()

def generateRandomHyperParameters():
learning_rate = 10**-random.uniform(0.0, 2.5)
beta = 10**-random.uniform(0.0, 2.5)

minBound = 4
maxBound = 100
dense_sizes = []
nb_dense_layer = random.randint(1, 3)
for _ in range(nb_dense_layer):
dense_size = random.randint(minBound, maxBound)
dense_sizes.append(dense_size)
maxBound = dense_size

return learning_rate, beta, dense_sizes

def get_cross_validation_dataset(X, Y, fold_index, n_fold):
folds_x, folds_y = nnUtils.split(X, Y, n_fold)
x_train = np.empty(shape=[0, X.shape[-1]])
y_train = np.empty(shape=[0, 1])
for i in range(n_fold):
if i != fold_index:
x_train = np.concatenate((x_train, folds_x[i]), axis=0)
y_train = np.concatenate((y_train, folds_y[i]), axis=0)

return x_train, y_train, folds_x[fold_index], folds_y[fold_index]

def train(session, model, x_train, y_train, num_step, lr, beta):
for step in range(num_step):
feed_dict_train = {
model.input_x: x_train,
model.input_y: y_train,
model.learning_rate:lr,
model.beta:beta}

session.run(model.learning_step, feed_dict=feed_dict_train)

if __name__ == "__main__":
args = parse_args()

# Remove the test system from the training set and build dataset
train_smad.training_systems.remove(args.test_system)
data_x, data_y = train_smad.build_dataset(args.antipattern, train_smad.training_systems)
data_x, data_y = nnUtils.shuffle(data_x, data_y)

bar = progressbar.ProgressBar(maxval=args.n_test, \
widgets=['Performing cross validation for ' + args.test_system + ': ' ,progressbar.Percentage()])
bar.start()

output_file_path = os.path.join(ROOT_DIR, 'experiments', 'tuning', 'results', 'smad_' + args.antipattern + '_' + args.test_system + '.csv')

params = []
perfs = []
for i in range(args.n_test):
learning_rate, beta, dense_sizes = generateRandomHyperParameters()
params.append([learning_rate, beta, dense_sizes])

predictions = np.empty(shape=[0, 1])
for j in range(args.n_fold):
x_train, y_train, x_test, y_test = get_cross_validation_dataset(data_x, data_y, j, args.n_fold)

# New graph
tf.reset_default_graph()

# Create model
model = md.SMAD(
shape=dense_sizes,
input_size=x_train.shape[-1])

with tf.Session() as session:
# Initialize the variables of the TensorFlow graph.
session.run(tf.global_variables_initializer())

train(
session=session,
model=model,
x_train=x_train,
y_train=y_train,
num_step=args.n_step,
lr=learning_rate,
beta=beta)

predictions = np.concatenate((predictions, session.run(model.inference, feed_dict={model.input_x: x_test})), axis=0)

perfs.append(nnUtils.f_measure(predictions, data_y))
indexes = np.argsort(np.array(perfs))
with open(output_file_path, 'w') as file:
file.write("Learning rate;Beta;Dense sizes;F-measure\n")
for j in reversed(indexes):
for k in range(len(params[j])):
file.write(str(params[j][k]) + ';')
file.write(str(perfs[j]) + '\n')
bar.update(i+1)
bar.finish()
Binary file modified neural_networks/smad/.DS_Store
Binary file not shown.
1 change: 0 additions & 1 deletion neural_networks/smad/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ def __init__(self, shape, input_size):
self.input_y = tf.placeholder(tf.float32,[None, 1], name="input_y")

# Placeholders for training parameters
self.training = tf.placeholder(tf.bool, name="training")
self.learning_rate = tf.placeholder(tf.float32, name="learning_rate")
self.beta = tf.placeholder(tf.float32, name="beta")

Expand Down
Binary file modified neural_networks/smad/trained_models/.DS_Store
Binary file not shown.
11 changes: 0 additions & 11 deletions neural_networks/smad/trained_models/feature_envy/checkpoint

This file was deleted.

Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

This file was deleted.

Binary file not shown.
Binary file not shown.
11 changes: 0 additions & 11 deletions neural_networks/smad/trained_models/god_class/checkpoint

This file was deleted.

Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
5 changes: 1 addition & 4 deletions utils/dataUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,14 +106,11 @@ def getCandidateFeatureEnvy(systemName):
methods = getMethods(systemName)
classes = getAllClasses(systemName)

candidates = []
with open(JDMetricFile, 'rb') as csvfile:
reader = csv.DictReader(csvfile, delimiter=';')

return [entityUtils.normalizeMethodName(row['Method']) + ';' + row['TargetClass'] for row in reader \
if (entityUtils.getEmbeddingClass(entityUtils.normalizeMethodName(row['Method']))!=row['TargetClass']) & (entityUtils.normalizeMethodName(row['Method']) in methods) & (row['TargetClass'] in classes)]

return candidates
if (entityUtils.getEmbeddingClass(entityUtils.normalizeMethodName(row['Method']))!=row['TargetClass']) & (entityUtils.normalizeMethodName(row['Method']) in methods) & (row['TargetClass'] in classes)]


### METRICS GETTERS FOR GOD CLASS ###
Expand Down
10 changes: 9 additions & 1 deletion utils/nnUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,22 @@ def shuffle(X, Y):

return shuffled_X, shuffled_Y

def split(X, Y, nb_split):
assert len(X) == len(Y), 'X and Y must have the same number of elements'

length = len(X)//nb_split
sections = [(i+1)*length for i in range(nb_split-1)]

return np.split(X, sections), np.split(Y, sections)

# Returns the Bayesian averaging between many network's predictions
def ensemble_prediction(model, save_paths, input_x):
saver = tf.train.Saver(max_to_keep=len(save_paths))
predictions = []
with tf.Session() as session:
for save_path in save_paths:
saver.restore(sess=session, save_path=save_path)
prediction = session.run(model.inference, feed_dict={model.input_x: input_x, model.training: False})
prediction = session.run(model.inference, feed_dict={model.input_x: input_x})
predictions.append(prediction)

return np.mean(np.array(predictions), axis=0)
Expand Down

0 comments on commit f998f5e

Please sign in to comment.