Skip to content

Commit

Permalink
Start refactoring the project to handle experiments related to both G…
Browse files Browse the repository at this point in the history
…od Class and Feature Envy together (using the same script files) in order to remove code duplication in the project
  • Loading branch information
antoineBarbez committed Apr 19, 2019
1 parent 2342cde commit d2fc346
Show file tree
Hide file tree
Showing 87 changed files with 229 additions and 42 deletions.
Binary file modified .DS_Store
Binary file not shown.
Binary file modified data/antipatterns/.DS_Store
Binary file not shown.
Binary file modified data/metric_files/.DS_Store
Binary file not shown.
Binary file modified data/metric_files/jdeodorant/.DS_Store
Binary file not shown.
Binary file not shown.
Binary file modified experiments/.DS_Store
Binary file not shown.
10 changes: 10 additions & 0 deletions experiments/training/context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import os
import sys

ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))
sys.path.insert(0, ROOT_DIR)

import utils.dataUtils as dataUtils
import utils.nnUtils as nnUtils

import neural_networks.smad.model as md
135 changes: 135 additions & 0 deletions experiments/training/train_smad.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
from context import ROOT_DIR, nnUtils, md

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

import argparse
import os

os.environ['TF_CPP_MIN_LOG_LEVEL']='2'

training_systems = {
'android-frameworks-opt-telephony',
'android-platform-support',
'apache-ant',
'lucene',
'apache-tomcat',
'argouml',
'jedit',
'xerces-2_7_0'
}

def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("antipattern", help="Either 'god_class' or 'feature_envy'")
parser.add_argument("test_system", help="The name of the system to be used for testing.\n Hence, the training will be performed using all the systems except this one.")
parser.add_argument("-lr", type=float, help="The learning rate to be used for training.")
parser.add_argument("-beta", type=float, help="The L2 regularization scale to be used for training.")
parser.add_argument('-dense_sizes', nargs='+', type=int, help="The sizes of each (dense) hidden layer in the network.")
parser.add_argument("-n_net", type=int, default=10, help="The number of distinct networks to be trained and saved.")
parser.add_argument("-n_step", type=int, default=300, help="The number of training steps.")
parser.add_argument("-decay_step", type=int, default=100, help="The number of training steps after which the learning rate is decayed")
parser.add_argument("-lr_decay", type=float, default=0.5, help="The factor by which the learning rate is multiplied every 'decay_step' steps")
return parser.parse_args()

# Get the path of a trained model
def get_save_path(antipattern, net_number):
return os.path.join(ROOT_DIR, 'neural_networks', 'smad', 'trained_models', antipattern, 'network' + str(net_number))

def build_dataset(antipattern, systems):
input_size = {'god_class':8, 'feature_envy':9}
X = np.empty(shape=[0, input_size[antipattern]])
Y = np.empty(shape=[0, 2])
for systemName in systems:
X = np.concatenate((X, nnUtils.getInstances(systemName, antipattern)), axis=0)
Y = np.concatenate((Y, nnUtils.getLabels(systemName, antipattern)), axis=0)

return X, Y

# Train a single network
def train(session, model, x_train, y_train, x_test, y_test, num_step, start_lr, beta, decay_step, lr_decay):
learning_rate = start_lr
losses_train = []
losses_test = []
for step in range(num_step):
# Learning rate decay
if (step%decay_step == 0) & (step>0):
learning_rate = learning_rate*lr_decay

feed_dict_train = {
model.input_x: x_train,
model.input_y: y_train,
model.learning_rate:learning_rate,
model.beta:beta}

session.run(model.learning_step, feed_dict=feed_dict_train)

loss_train = session.run(model.loss, feed_dict={model.input_x:x_train, model.input_y:y_train})
loss_test = session.run(model.loss, feed_dict={model.input_x:x_test, model.input_y:y_test})
losses_train.append(loss_train)
losses_test.append(loss_test)
return losses_train, losses_test

if __name__ == "__main__":
args = parse_args()

# Remove the test system from the training set and build dataset
training_systems.remove(args.test_system)
x_train, y_train = build_dataset(args.antipattern, training_systems)
x_test, y_test = build_dataset(args.antipattern, [args.test_system])

# Create model
model = md.SMAD(
shape=args.dense_sizes,
input_size=x_train[0].shape[-1])

# To save and restore a trained model
saver = tf.train.Saver(max_to_keep=args.n_net)

# Train several neural networks
all_losses_train = []
all_losses_test = []
with tf.Session() as session:
for i in range(args.n_net):
print('Training Neural Network :' + str(i+1))

# Initialize the variables of the TensorFlow graph.
session.run(tf.global_variables_initializer())

# Train the model
losses_train, losses_test = train(
session=session,
model=model,
x_train=x_train,
y_train=y_train,
x_test=x_test,
y_test=y_test,
num_step=args.n_step,
start_lr=args.lr,
beta=args.beta,
decay_step=args.decay_step,
lr_decay=args.lr_decay)

all_losses_train.append(losses_train)
all_losses_test.append(losses_test)

# Save the model
saver.save(sess=session, save_path=get_save_path(args.antipattern, i))


# Compute the ensemble prediction on the test system
ensemble_prediction = nnUtils.ensemble_prediction(
model=model,
save_paths=[get_save_path(args.antipattern, i) for i in range(args.n_net)],
input_x=x_test)

# Print Ensemble performances
print("\nPerformances on " + args.test_system + ": ")
print('Precision :')
print('Recall :')
print('F-Mesure :')
print('Accuracy :')

# Plot learning curves
nnUtils.plot_learning_curves(all_losses_train, all_losses_test)
23 changes: 4 additions & 19 deletions neural_networks/smad/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,37 +4,22 @@


class SMAD(object):
def __init__(self, shape, input_size, constants_size):
output_size = 2
def __init__(self, shape, input_size):

# Placeholders for instances and labels
self.input_x = tf.placeholder(tf.float32,[None, input_size], name="input_x")
self.input_y = tf.placeholder(tf.float32,[None, output_size], name="input_y")

# Placeholders for batch constants
self.constants = tf.placeholder(tf.float32, [constants_size], name="batch_constants")
self.input_y = tf.placeholder(tf.float32,[None, 2], name="input_y")

# Placeholders for learning parameters
self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
self.learning_rate = tf.placeholder(tf.float32, name="learning_rate")
self.beta = tf.placeholder(tf.float32, name="beta")

# L2 regularization & initialization
l2_reg = tf.contrib.layers.l2_regularizer(scale=self.beta)
xavier = tf.contrib.layers.xavier_initializer()

# Add batch constants to the input
with tf.name_scope("input"):
batch_constants = tf.expand_dims(self.constants, 0)
batch_constants = tf.tile(batch_constants, [tf.shape(self.input_x)[0], 1])
x = tf.concat([self.input_x, batch_constants], axis=1)

# Dropout
with tf.name_scope("dropout"):
h_drop = tf.nn.dropout(x, self.dropout_keep_prob)

# Hidden layers
h_in = h_drop
h_in = self.input_x
for size in shape:
with tf.name_scope("hidden-%s" % size):
h_in = tf.layers.dense(h_in,
Expand All @@ -47,7 +32,7 @@ def __init__(self, shape, input_size, constants_size):
# Output layer
with tf.name_scope("output"):
self.logits = tf.layers.dense(h_in,
output_size,
2,
kernel_initializer=xavier,
kernel_regularizer=l2_reg,
bias_regularizer=l2_reg)
Expand Down
17 changes: 11 additions & 6 deletions neural_networks/smad/trained_models/feature_envy/checkpoint
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
model_checkpoint_path: "/Users/antoinebarbez/Desktop/tensorflow/merged-detection/neural_networks/smad/trained_models/feature_envy/network4"
all_model_checkpoint_paths: "/Users/antoinebarbez/Desktop/tensorflow/merged-detection/neural_networks/smad/trained_models/feature_envy/network0"
all_model_checkpoint_paths: "/Users/antoinebarbez/Desktop/tensorflow/merged-detection/neural_networks/smad/trained_models/feature_envy/network1"
all_model_checkpoint_paths: "/Users/antoinebarbez/Desktop/tensorflow/merged-detection/neural_networks/smad/trained_models/feature_envy/network2"
all_model_checkpoint_paths: "/Users/antoinebarbez/Desktop/tensorflow/merged-detection/neural_networks/smad/trained_models/feature_envy/network3"
all_model_checkpoint_paths: "/Users/antoinebarbez/Desktop/tensorflow/merged-detection/neural_networks/smad/trained_models/feature_envy/network4"
model_checkpoint_path: "/Users/antoinebarbez/Desktop/tensorflow/SMAD/neural_networks/smad/trained_models/feature_envy/network9"
all_model_checkpoint_paths: "/Users/antoinebarbez/Desktop/tensorflow/SMAD/neural_networks/smad/trained_models/feature_envy/network0"
all_model_checkpoint_paths: "/Users/antoinebarbez/Desktop/tensorflow/SMAD/neural_networks/smad/trained_models/feature_envy/network1"
all_model_checkpoint_paths: "/Users/antoinebarbez/Desktop/tensorflow/SMAD/neural_networks/smad/trained_models/feature_envy/network2"
all_model_checkpoint_paths: "/Users/antoinebarbez/Desktop/tensorflow/SMAD/neural_networks/smad/trained_models/feature_envy/network3"
all_model_checkpoint_paths: "/Users/antoinebarbez/Desktop/tensorflow/SMAD/neural_networks/smad/trained_models/feature_envy/network4"
all_model_checkpoint_paths: "/Users/antoinebarbez/Desktop/tensorflow/SMAD/neural_networks/smad/trained_models/feature_envy/network5"
all_model_checkpoint_paths: "/Users/antoinebarbez/Desktop/tensorflow/SMAD/neural_networks/smad/trained_models/feature_envy/network6"
all_model_checkpoint_paths: "/Users/antoinebarbez/Desktop/tensorflow/SMAD/neural_networks/smad/trained_models/feature_envy/network7"
all_model_checkpoint_paths: "/Users/antoinebarbez/Desktop/tensorflow/SMAD/neural_networks/smad/trained_models/feature_envy/network8"
all_model_checkpoint_paths: "/Users/antoinebarbez/Desktop/tensorflow/SMAD/neural_networks/smad/trained_models/feature_envy/network9"
Binary file not shown.
Binary file modified neural_networks/smad/trained_models/feature_envy/network0.index
Binary file not shown.
Binary file modified neural_networks/smad/trained_models/feature_envy/network0.meta
Binary file not shown.
Binary file not shown.
Binary file modified neural_networks/smad/trained_models/feature_envy/network1.index
Binary file not shown.
Binary file modified neural_networks/smad/trained_models/feature_envy/network1.meta
Binary file not shown.
Binary file not shown.
Binary file modified neural_networks/smad/trained_models/feature_envy/network2.index
Binary file not shown.
Binary file modified neural_networks/smad/trained_models/feature_envy/network2.meta
Binary file not shown.
Binary file not shown.
Binary file modified neural_networks/smad/trained_models/feature_envy/network3.index
Binary file not shown.
Binary file modified neural_networks/smad/trained_models/feature_envy/network3.meta
Binary file not shown.
Binary file not shown.
Binary file modified neural_networks/smad/trained_models/feature_envy/network4.index
Binary file not shown.
Binary file modified neural_networks/smad/trained_models/feature_envy/network4.meta
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
17 changes: 11 additions & 6 deletions neural_networks/smad/trained_models/god_class/checkpoint
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
model_checkpoint_path: "/Users/antoinebarbez/Desktop/tensorflow/merged-detection/neural_networks/smad/trained_models/god_class/network4"
all_model_checkpoint_paths: "/Users/antoinebarbez/Desktop/tensorflow/merged-detection/neural_networks/smad/trained_models/god_class/network0"
all_model_checkpoint_paths: "/Users/antoinebarbez/Desktop/tensorflow/merged-detection/neural_networks/smad/trained_models/god_class/network1"
all_model_checkpoint_paths: "/Users/antoinebarbez/Desktop/tensorflow/merged-detection/neural_networks/smad/trained_models/god_class/network2"
all_model_checkpoint_paths: "/Users/antoinebarbez/Desktop/tensorflow/merged-detection/neural_networks/smad/trained_models/god_class/network3"
all_model_checkpoint_paths: "/Users/antoinebarbez/Desktop/tensorflow/merged-detection/neural_networks/smad/trained_models/god_class/network4"
model_checkpoint_path: "/Users/antoinebarbez/Desktop/tensorflow/SMAD/neural_networks/smad/trained_models/god_class/network9"
all_model_checkpoint_paths: "/Users/antoinebarbez/Desktop/tensorflow/SMAD/neural_networks/smad/trained_models/god_class/network0"
all_model_checkpoint_paths: "/Users/antoinebarbez/Desktop/tensorflow/SMAD/neural_networks/smad/trained_models/god_class/network1"
all_model_checkpoint_paths: "/Users/antoinebarbez/Desktop/tensorflow/SMAD/neural_networks/smad/trained_models/god_class/network2"
all_model_checkpoint_paths: "/Users/antoinebarbez/Desktop/tensorflow/SMAD/neural_networks/smad/trained_models/god_class/network3"
all_model_checkpoint_paths: "/Users/antoinebarbez/Desktop/tensorflow/SMAD/neural_networks/smad/trained_models/god_class/network4"
all_model_checkpoint_paths: "/Users/antoinebarbez/Desktop/tensorflow/SMAD/neural_networks/smad/trained_models/god_class/network5"
all_model_checkpoint_paths: "/Users/antoinebarbez/Desktop/tensorflow/SMAD/neural_networks/smad/trained_models/god_class/network6"
all_model_checkpoint_paths: "/Users/antoinebarbez/Desktop/tensorflow/SMAD/neural_networks/smad/trained_models/god_class/network7"
all_model_checkpoint_paths: "/Users/antoinebarbez/Desktop/tensorflow/SMAD/neural_networks/smad/trained_models/god_class/network8"
all_model_checkpoint_paths: "/Users/antoinebarbez/Desktop/tensorflow/SMAD/neural_networks/smad/trained_models/god_class/network9"
Binary file not shown.
Binary file modified neural_networks/smad/trained_models/god_class/network0.index
Binary file not shown.
Binary file modified neural_networks/smad/trained_models/god_class/network0.meta
Binary file not shown.
Binary file not shown.
Binary file modified neural_networks/smad/trained_models/god_class/network1.index
Binary file not shown.
Binary file modified neural_networks/smad/trained_models/god_class/network1.meta
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified neural_networks/smad/trained_models/god_class/network2.index
Binary file not shown.
Binary file modified neural_networks/smad/trained_models/god_class/network2.meta
Binary file not shown.
Binary file not shown.
Binary file modified neural_networks/smad/trained_models/god_class/network3.index
Binary file not shown.
Binary file modified neural_networks/smad/trained_models/god_class/network3.meta
Binary file not shown.
Binary file not shown.
Binary file modified neural_networks/smad/trained_models/god_class/network4.index
Binary file not shown.
Binary file modified neural_networks/smad/trained_models/god_class/network4.meta
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
)�����<;h_�Bo��S�&��:3�V/G=��>� ��ۮ����
>w��������j>�{>D�<cH�/�&�(>)��=3姻$���VW�<uꂻ��.��u.�] >=l!��FSۼ�;*�s�i�k=�Fi�9`���G�����g���&�;8� <���=N՜��H�;���mu1>y�=
�9=Dci�L튼�r�<n��=3�Q>��<f��=��W�|��;���=¶����_��`�8[ۧ:�g�=8@��Dq��`cZ��!t��=ӛ =�?j<��#���0�+Д=��,��1C��|6���t��f��m�뼄��=�㽻R�<����_��}�a�6������=�u��γ�g ���&�}��3fV=��Խ�D >rĽ`R����f�543=j�I>�o=�A�=}uE�\�>��ҽ�4>�b>%�>�l��tE`���!��4U���q���`��1�=^���_�B�=Z���䋽�j���h<�ȕ�C��;�L��/ O��ѿ<�Q�Y�<5��F�S=p��=����>�r���G>1��>ᘚ>�:��rK�s�M<:��=�;��'C>��ȽL2 ���Z>�~N=ˏT={k3�t��Ơ=�xi�0��<�Gٽ.J�=/�+=@0�!��=�9�<%-�<���:AB��H�<
��=��< t����N��U�� q�=I5�<v���d=��h����=����po��7�J�滖�>�w=$-B=�rj<ۼ7�5z۽�.�=�.>�Ya= ����ӑ����"�=�<�
v=��<K�=�v�=���=��ѽ��k��=�ڍ��ڍ>}Z�=�\*��"�j/�=V�R>�-�'���Te >;��>ĥ��\|�>�ѾT��>�5���h�����<���\lj>nW@�Gp=
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
64 changes: 53 additions & 11 deletions utils/nnUtils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from context import ROOT_DIR
from sklearn.preprocessing import StandardScaler

import numpy as np
import tensorflow as tf
import liuUtils
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

import dataUtils
import random
Expand Down Expand Up @@ -40,16 +40,52 @@ def accuracy(output, labels):


### UTILS ###
def shuffle(instances, labels):
assert len(instances) == len(labels), 'instances and labels must have the same number of elements'
def shuffle(X, Y):
assert len(X) == len(Y), 'X and Y must have the same number of elements'

idx = range(len(instances))
idx = range(len(X))
random.shuffle(idx)

x = np.array([instances[i] for i in idx])
y = np.array([labels[i] for i in idx])
shuffled_X = np.array([X[i] for i in idx])
shuffled_Y = np.array([Y[i] for i in idx])

return x, y
return shuffled_X, shuffled_Y

# Returns the Bayesian averaging between many network's predictions
def ensemble_prediction(model, save_paths, input_x):
saver = tf.train.Saver(max_to_keep=len(save_paths))
predictions = []
with tf.Session() as session:
for save_path in save_paths:
saver.restore(sess=session, save_path=save_path)
prediction = session.run(model.inference, feed_dict={model.input_x: input_x})
predictions.append(prediction)

return np.mean(np.array(predictions), axis=0)

def plot_learning_curves(losses_train, losses_test):
plt.figure()
plt.ylim((0.0, 1.0))
plt.xlabel("Epochs")
plt.ylabel("Loss")
mean_train = np.mean(losses_train, axis=0)
mean_test = np.mean(losses_test, axis=0)
percentile90_train = np.percentile(losses_train, 90, axis=0)
percentile90_test = np.percentile(losses_test, 90, axis=0)
percentile10_train = np.percentile(losses_train, 10, axis=0)
percentile10_test = np.percentile(losses_test, 10, axis=0)
plt.grid()

plt.fill_between(range(len(losses_train[0])), percentile90_train,
percentile10_train, alpha=0.2,
color="r")
plt.fill_between(range(len(losses_test[0])), percentile90_test,
percentile10_test, alpha=0.2,
color="g")
plt.plot(range(len(losses_train[0])), mean_train, color="r", label='Training set')
plt.plot(range(len(losses_test[0])), mean_test, color="g", label='Test set')
plt.legend(loc='best')
plt.show()


### INSTANCES AND LABELS GETTERS ###
Expand Down Expand Up @@ -100,10 +136,16 @@ def getInstances(systemName, antipattern, normalized=True):
instances = np.array(instances).astype(float)

# Batch normalization
if normalized:
'''if normalized:
scaler = StandardScaler()
scaler.fit(instances)
return scaler.transform(instances)
return scaler.transform(instances)'''

scaler = StandardScaler()
scaler.fit(instances)
instances = scaler.transform(instances)

instances = np.concatenate((instances, np.tile(getSystemConstants(systemName), (instances.shape[0],1))), axis=1)

return instances

Expand Down

0 comments on commit d2fc346

Please sign in to comment.