Skip to content

Commit

Permalink
implemented new training, tuning and predict scripts for smad and asci
Browse files Browse the repository at this point in the history
  • Loading branch information
antoineBarbez committed Apr 23, 2019
1 parent f998f5e commit 3ce21cc
Show file tree
Hide file tree
Showing 36 changed files with 460 additions and 185 deletions.
Binary file modified .DS_Store
Binary file not shown.
16 changes: 15 additions & 1 deletion detection_tools/feature_envy/hist.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,4 +86,18 @@ def getSmells(systemName, alpha=2.6):
smells.append(m + ';' + c)


return smells
return smells


def predict(systemName):
entities = dataUtils.getEntities('feature_envy', systemName)
smells = getSmells(systemName)

prediction = []
for entity in entities:
if entity in smells:
prediction.append([1.])
else:
prediction.append([0.])

return np.array(prediction)
16 changes: 16 additions & 0 deletions detection_tools/feature_envy/incode.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from __future__ import division
from context import ROOT_DIR, dataUtils, entityUtils

import numpy as np

import csv
import os

Expand Down Expand Up @@ -84,4 +86,18 @@ def getEnviedClasses(className, classAttributeMap, atfd, laa, fdp):
return enviedClass


def predict(systemName):
entities = dataUtils.getEntities('feature_envy', systemName)
smells = getSmells(systemName)

prediction = []
for entity in entities:
if entity in smells:
prediction.append([1.])
else:
prediction.append([0.])

return np.array(prediction)



15 changes: 15 additions & 0 deletions detection_tools/feature_envy/jdeodorant.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from __future__ import division
from context import ROOT_DIR, dataUtils, entityUtils

import numpy as np

import os

def getSmells(systemName):
Expand All @@ -23,3 +25,16 @@ def getSmells(systemName):

return list(set(smells))

def predict(systemName):
entities = dataUtils.getEntities('feature_envy', systemName)
smells = getSmells(systemName)

prediction = []
for entity in entities:
if entity in smells:
prediction.append([1.])
else:
prediction.append([0.])

return np.array(prediction)

18 changes: 17 additions & 1 deletion detection_tools/god_class/decor.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import division
from context import ROOT_DIR
from context import ROOT_DIR, dataUtils

import numpy as np

import csv
import os
Expand Down Expand Up @@ -31,3 +33,17 @@ def getSmells(systemName):
smells.append(row['ClassName'])

return smells


def predict(systemName):
entities = dataUtils.getEntities('god_class', systemName)
smells = getSmells(systemName)

prediction = []
for entity in entities:
if entity in smells:
prediction.append([1.])
else:
prediction.append([0.])

return np.array(prediction)
13 changes: 13 additions & 0 deletions detection_tools/god_class/hist.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,16 @@ def getSmells(systemName, alpha=8.0):

return smells

def predict(systemName):
entities = dataUtils.getEntities('god_class', systemName)
smells = getSmells(systemName)

prediction = []
for entity in entities:
if entity in smells:
prediction.append([1.])
else:
prediction.append([0.])

return np.array(prediction)

17 changes: 16 additions & 1 deletion detection_tools/god_class/jdeodorant.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from context import ROOT_DIR
from context import ROOT_DIR, dataUtils

import numpy as np

import os

Expand All @@ -8,3 +10,16 @@ def getSmells(systemName):

with open(JDBlobFile, 'r') as file:
return list(set([line.split()[0] for line in file]))

def predict(systemName):
entities = dataUtils.getEntities('god_class', systemName)
smells = getSmells(systemName)

prediction = []
for entity in entities:
if entity in smells:
prediction.append([1.])
else:
prediction.append([0.])

return np.array(prediction)
Binary file modified experiments/.DS_Store
Binary file not shown.
Binary file added experiments/training/.DS_Store
Binary file not shown.
5 changes: 3 additions & 2 deletions experiments/training/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
sys.path.insert(0, ROOT_DIR)

import utils.dataUtils as dataUtils
import utils.nnUtils as nnUtils
import utils.nnUtils as nnUtils

import neural_networks.smad.model as md
import neural_networks.asci.predict as asci
import neural_networks.smad.model as md
101 changes: 101 additions & 0 deletions experiments/training/train_asci.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
from context import nnUtils, asci
from sklearn import tree

import numpy as np

import argparse
import pickle

training_systems = {
'android-frameworks-opt-telephony',
'android-platform-support',
'apache-ant',
'lucene',
'apache-tomcat',
'argouml',
'jedit',
'xerces-2_7_0'
}

def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("antipattern", help="Either 'god_class' or 'feature_envy'.")
parser.add_argument("test_system", help="The name of the system to be used for testing.\n Hence, the training will be performed using all the systems except this one.")
parser.add_argument("-n_tree", type=int, default=10, help="The number of distinct trees to be trained and saved.")
parser.add_argument("-min_samples_split", type=int, default=5)
parser.add_argument("-max_features", default='log2')
parser.add_argument("-max_depth", type=int, default=None)
parser.add_argument("-min_samples_leaf", type=int, default=2)
return parser.parse_args()

# Build the dataset for asci, i.e., the labels are the indexes of the best tool for each input instance.
# The order of the tools is given by the function asci.getToolsPredictions(...):
# idx = 0: DECOR, InCode
# idx = 1: HIST
# idx = 2: JDeodorant
def build_asci_dataset(antipattern, systems):
# Get real instances and labels
instances, labels = nnUtils.build_dataset(antipattern, systems)

# Compute the performances of each tool in order to sort them accordingly
nb_tools = 3
toolsOverallPredictions = [np.empty(shape=[0, 1]) for _ in range(nb_tools)]
for system in systems:
toolsPredictions = asci.getToolsPredictions(antipattern, system)
for i in range(nb_tools):
toolsOverallPredictions[i] = np.concatenate((toolsOverallPredictions[i], toolsPredictions[i]), axis=0)

toolsPerformances = [nnUtils.f_measure(pred, labels) for pred in toolsOverallPredictions]

# Indexes of the tools, sorted according to their performances on the training set
toolsSortedIndexes = np.argsort(np.array(toolsPerformances))

# Assign to each instance, the index of the tool that best predicted its label.
# In case of conflict, assign the index of the tool that performed the best on overall.

# Initialize with the index of the best tool as default index
toolsIndexes = [toolsSortedIndexes[-1] for _ in instances]
for i, label in enumerate(labels):
for toolIndex in toolsSortedIndexes:
if toolsOverallPredictions[toolIndex][i] == label:
toolsIndexes[i] = toolIndex

return instances, np.array(toolsIndexes)

if __name__ == "__main__":
args = parse_args()

# Remove the test system from the training set and build dataset
training_systems.remove(args.test_system)
x_train, y_train = build_asci_dataset(args.antipattern, training_systems)

# Test dataset, note that here y_test contains the real labels while y_train contains tools' indexes
x_test, y_test = nnUtils.build_dataset(args.antipattern, [args.test_system])
toolsPredictions = asci.getToolsPredictions(args.antipattern, args.test_system)

# Train and compute ensemble prediction on test set
predictions = np.zeros((args.n_tree, x_test.shape[0], 1))
for i in range(args.n_tree):
clf = tree.DecisionTreeClassifier(
min_samples_split=args.min_samples_split,
max_features=args.max_features,
max_depth=args.max_depth,
min_samples_leaf=args.min_samples_leaf)
clf = clf.fit(x_train, y_train)

# Save the tree
with open(nnUtils.get_save_path('asci', args.antipattern, args.test_system, i), 'wb') as save_file:
pickle.dump(clf, save_file)

# Compute the prediction of the current tree
predictedToolIndexes = clf.predict(x_test)
for j, toolIndex in enumerate(predictedToolIndexes):
predictions[i, j, 0] = toolsPredictions[toolIndex][j]

ensemble_prediction = np.mean(predictions, axis=0)

# Print Ensemble performances
print("\nPerformances on " + args.test_system + ": ")
print('Precision: ' + str(nnUtils.precision(ensemble_prediction, y_test)))
print('Recall : ' + str(nnUtils.recall(ensemble_prediction, y_test)))
print('F-Mesure : ' + str(nnUtils.f_measure(ensemble_prediction, y_test)))
25 changes: 4 additions & 21 deletions experiments/training/train_smad.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,23 +33,6 @@ def parse_args():
parser.add_argument("-lr_decay", type=float, default=0.5, help="The factor by which the learning rate is multiplied every 'decay_step' steps")
return parser.parse_args()

# Get the path of a trained model
def get_save_path(antipattern, test_system, net_number):
directory = os.path.join(ROOT_DIR, 'neural_networks', 'smad', 'trained_models', antipattern, test_system)
if not os.path.exists(directory):
os.makedirs(directory)
return os.path.join(directory, 'network' + str(net_number))

def build_dataset(antipattern, systems):
input_size = {'god_class':8, 'feature_envy':9}
X = np.empty(shape=[0, input_size[antipattern]])
Y = np.empty(shape=[0, 1])
for systemName in systems:
X = np.concatenate((X, nnUtils.getInstances(systemName, antipattern)), axis=0)
Y = np.concatenate((Y, nnUtils.getLabels(systemName, antipattern)), axis=0)

return X, Y

# Train a single network
def train(session, model, x_train, y_train, x_test, y_test, num_step, start_lr, beta, decay_step, lr_decay):
learning_rate = start_lr
Expand Down Expand Up @@ -79,8 +62,8 @@ def train(session, model, x_train, y_train, x_test, y_test, num_step, start_lr,

# Remove the test system from the training set and build dataset
training_systems.remove(args.test_system)
x_train, y_train = build_dataset(args.antipattern, training_systems)
x_test, y_test = build_dataset(args.antipattern, [args.test_system])
x_train, y_train = nnUtils.build_dataset(args.antipattern, training_systems)
x_test, y_test = nnUtils.build_dataset(args.antipattern, [args.test_system])

# Create model
model = md.SMAD(
Expand Down Expand Up @@ -118,13 +101,13 @@ def train(session, model, x_train, y_train, x_test, y_test, num_step, start_lr,
all_losses_test.append(losses_test)

# Save the model
saver.save(sess=session, save_path=get_save_path(args.antipattern, args.test_system, i))
saver.save(sess=session, save_path=nnUtils.get_save_path('smad', args.antipattern, args.test_system, i))


# Compute the ensemble prediction on the test system
ensemble_prediction = nnUtils.ensemble_prediction(
model=model,
save_paths=[get_save_path(args.antipattern, args.test_system, i) for i in range(args.n_net)],
save_paths=[nnUtils.get_save_path('smad', args.antipattern, args.test_system, i) for i in range(args.n_net)],
input_x=x_test)

# Print Ensemble performances
Expand Down
Binary file added experiments/tuning/.DS_Store
Binary file not shown.
5 changes: 3 additions & 2 deletions experiments/tuning/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@
sys.path.insert(0, ROOT_DIR)

import utils.nnUtils as nnUtils
import experiments.training.train_smad as train_smad
import neural_networks.smad.model as md

import neural_networks.asci.predict as asci
import neural_networks.smad.model as md
Binary file added experiments/tuning/results/.DS_Store
Binary file not shown.
Binary file added experiments/tuning/results/asci/.DS_Store
Binary file not shown.
Binary file not shown.
Binary file added experiments/tuning/results/smad/.DS_Store
Binary file not shown.
Binary file not shown.
Loading

0 comments on commit 3ce21cc

Please sign in to comment.