Skip to content

Commit

Permalink
add experiments directory + compare_approaches
Browse files Browse the repository at this point in the history
  • Loading branch information
antoineBarbez committed Sep 21, 2018
1 parent ae3f6bd commit 5760c7c
Show file tree
Hide file tree
Showing 20 changed files with 181 additions and 37 deletions.
Binary file modified .DS_Store
Binary file not shown.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
7 changes: 6 additions & 1 deletion data_construction/oracle_feature_envy/createLabels.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@
import fnmatch
import os

# This script is used to automatically create the labels from the answers collected by our survey.
# It implements a vote descision between the different answers for a same instance.
# For each system, it creates a file "data/labels/feature_envy/system-name.csv" containing all the
# instances of feature envy detected by our survey.

systems = [
{
'name' : 'android-frameworks-opt-telephony',
Expand Down Expand Up @@ -82,7 +87,7 @@ def getCandidates():
smells = [candidates[i] for i in range(startIndex, endIndex) if getScore(answers[i]) >= 0.5]
startIndex = endIndex

labelFile = os.path.join(ROOT_DIR, 'data/labels/feature_envy/' + system['name'] + '.csv')
labelFile = os.path.join(ROOT_DIR, 'data/labels/feature_envy/' + system['name'] + '.txt')

with open(labelFile, 'w') as file:
for smell in smells:
Expand Down
2 changes: 1 addition & 1 deletion detection_tools/confidence_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def getHistFECM(systemName):

FECM = {}
for i, j in zip(*np.where(coOcc > 0)):
if classes[j] != entityUtils.getEmbeddingClass(methods[i])
if classes[j] != entityUtils.getEmbeddingClass(methods[i]):
instanceName = methods[i] + ';' + classes[j]
FECM[instanceName] = coOcc[i,j]

Expand Down
33 changes: 0 additions & 33 deletions detection_tools/replication/evaluate.py

This file was deleted.

Binary file added experiments/.DS_Store
Binary file not shown.
Empty file added experiments/__init__.py
Empty file.
27 changes: 27 additions & 0 deletions experiments/evaluate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from __future__ import division


def recall(detected, true):
truePositive = [entity for entity in detected if entity in true]

if len(true) == 0:
return float('nan')

return len(truePositive) / len(true)

def precision(detected, true):
truePositive = [entity for entity in detected if entity in true]

if len(detected) == 0:
return float('nan')

return len(truePositive) / len(detected)

def f_measure(detected, true, alpha=0.5):
pre = precision(detected, true)
rec = recall(detected, true)

if ((pre == 0) & (rec == 0)):
return 0.0

return pre*rec/(alpha*rec + (1-alpha)*pre)
57 changes: 57 additions & 0 deletions experiments/feature_envy/compare_approaches.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from context import dataUtils, evaluate, hist, incode, jdeodorant


# This script is used to compare the performances of various feature envy detection approaches
# on the systems considered in this study.

parameters = {'hist': 2.0, 'incode': (3.0, 3.0, 3.0)}

systems = [
'android-frameworks-opt-telephony',
'android-platform-support',
'apache-ant',
'apache-tomcat',
'lucene',
'argouml',
'jedit',
'xerces-2_7_0'
]


for system in systems:
# Get occurrences manually detected on the considered system
true = dataUtils.getLabels(system, 'feature_envy')

# Compute performances for HIST
detected_hist = hist.getSmells(system, parameters['hist'])

precision_hist = evaluate.precision(detected_hist, true)
recall_hist = evaluate.recall(detected_hist, true)
f_measure_hist = evaluate.f_measure(detected_hist, true)

# Compute performances for InCode
detected_incode = incode.getSmells(system, *parameters['incode'])

precision_incode = evaluate.precision(detected_incode, true)
recall_incode = evaluate.recall(detected_incode, true)
f_measure_incode = evaluate.f_measure(detected_incode, true)

# Compute performances for JDeodorant
detected_jdeodorant = jdeodorant.getSmells(system)

precision_jdeodorant = evaluate.precision(detected_jdeodorant, true)
recall_jdeodorant = evaluate.recall(detected_jdeodorant, true)
f_measure_jdeodorant = evaluate.f_measure(detected_jdeodorant, true)


# Output results
print(' |precision |recall |f_measure')
print('-------------------------------------------------')
print('HIST |' + "{0:.3f}".format(precision_hist) + ' |' + "{0:.3f}".format(recall_hist) + ' |' + "{0:.3f}".format(f_measure_hist))
print('-------------------------------------------------')
print('InCode |' + "{0:.3f}".format(precision_incode) + ' |' + "{0:.3f}".format(recall_incode) + ' |' + "{0:.3f}".format(f_measure_incode))
print('-------------------------------------------------')
print('JDeodorant |' + "{0:.3f}".format(precision_jdeodorant) + ' |' + "{0:.3f}".format(recall_jdeodorant) + ' |' + "{0:.3f}".format(f_measure_jdeodorant))
print('\n\n')


12 changes: 12 additions & 0 deletions experiments/feature_envy/context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import os
import sys

ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))
sys.path.insert(0, ROOT_DIR)

import utils.dataUtils as dataUtils
import experiments.evaluate as evaluate

import detection_tools.replication.feature_envy.hist as hist
import detection_tools.replication.feature_envy.incode as incode
import detection_tools.replication.feature_envy.jdeodorant as jdeodorant
2 changes: 2 additions & 0 deletions tests/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

import utils.dataUtils as dataUtils

import experiments.evaluate as evaluate

import detection_tools.confidence_metrics as cm

import detection_tools.replication.feature_envy.hist as hist_fe
Expand Down
65 changes: 65 additions & 0 deletions tests/test_evaluate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from context import evaluate

import math
import unittest

class TestDecorGCCM(unittest.TestCase):
def setUp(self):
self.set_1 = ['a','b','c','d', 'e']
self.set_2 = ['b','e', 'f', 'g']
self.set_3 = ['c']
self.set_4 = []

def tearDown(self):
del self.set_1
del self.set_2
del self.set_3
del self.set_4


def test_precision(self):
self.assertEqual(evaluate.precision(self.set_1, self.set_1), 1.0)
self.assertEqual(evaluate.precision(self.set_1, self.set_2), 0.4)
self.assertEqual(evaluate.precision(self.set_1, self.set_3), 0.2)
self.assertEqual(evaluate.precision(self.set_1, self.set_4), 0.0)
self.assertEqual(evaluate.precision(self.set_2, self.set_1), 0.5)
self.assertEqual(evaluate.precision(self.set_2, self.set_2), 1.0)
self.assertEqual(evaluate.precision(self.set_2, self.set_3), 0.0)
self.assertEqual(evaluate.precision(self.set_3, self.set_1), 1.0)
self.assertEqual(evaluate.precision(self.set_3, self.set_4), 0.0)
self.assertTrue(math.isnan(evaluate.precision(self.set_4, self.set_1)))
self.assertTrue(math.isnan(evaluate.precision(self.set_4, self.set_3)))
self.assertTrue(math.isnan(evaluate.precision(self.set_4, self.set_4)))

def test_recall(self):
self.assertEqual(evaluate.recall(self.set_1, self.set_1), 1.0)
self.assertEqual(evaluate.recall(self.set_1, self.set_2), 0.5)
self.assertEqual(evaluate.recall(self.set_1, self.set_3), 1.0)
self.assertTrue(math.isnan(evaluate.recall(self.set_1, self.set_4)))
self.assertEqual(evaluate.recall(self.set_2, self.set_1), 0.4)
self.assertEqual(evaluate.recall(self.set_2, self.set_2), 1.0)
self.assertEqual(evaluate.recall(self.set_2, self.set_3), 0.0)
self.assertEqual(evaluate.recall(self.set_3, self.set_1), 0.2)
self.assertTrue(math.isnan(evaluate.recall(self.set_3, self.set_4)))
self.assertEqual(evaluate.recall(self.set_4, self.set_1), 0.0)
self.assertEqual(evaluate.recall(self.set_4, self.set_3), 0.0)
self.assertTrue(math.isnan(evaluate.recall(self.set_4, self.set_4)))

def test_f_measure(self):
self.assertEqual(evaluate.f_measure(self.set_1, self.set_1), 1.0)
self.assertEqual(evaluate.f_measure(self.set_1, self.set_2), 0.4444444444444445)
self.assertEqual(evaluate.f_measure(self.set_1, self.set_3), 0.33333333333333337)
self.assertTrue(math.isnan(evaluate.f_measure(self.set_1, self.set_4)))
self.assertEqual(evaluate.f_measure(self.set_2, self.set_1), 0.4444444444444445)
self.assertEqual(evaluate.f_measure(self.set_2, self.set_2), 1.0)
self.assertEqual(evaluate.f_measure(self.set_2, self.set_3), 0.0)
self.assertEqual(evaluate.f_measure(self.set_3, self.set_1), 0.33333333333333337)
self.assertTrue(math.isnan(evaluate.f_measure(self.set_3, self.set_4)))
self.assertTrue(math.isnan(evaluate.f_measure(self.set_4, self.set_1)))
self.assertTrue(math.isnan(evaluate.f_measure(self.set_4, self.set_3)))
self.assertTrue(math.isnan(evaluate.f_measure(self.set_4, self.set_4)))



if __name__ == '__main__':
unittest.main()
13 changes: 11 additions & 2 deletions utils/dataUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,13 +87,22 @@ def getMethods(systemName):

methods = []
with open(systemMethodsFile, 'rb') as csvfile:
rdr = csv.reader(csvfile, delimiter=';')
reader = csv.reader(csvfile, delimiter=';')

for row in rdr:
for row in reader:
methods.append(row[0])

return methods

# Get the hand-validated occurences reported in the considered system for antipattern in [god_class, feature_envy].
def getLabels(systemName, antipattern):
if antipattern not in ['god_class', 'feature_envy']:
print(str(antipattern) + ' not valid antipattern name. Choose "god_class" or "feature_envy instead"')
return

labelFile = os.path.join(ROOT_DIR, 'data/labels/' + antipattern + '/' + systemName + '.txt')
with open(labelFile, 'r') as file:
return file.read().splitlines()


##### MERGED DETECTION INSTANCES GETTERS #####
Expand Down

0 comments on commit 5760c7c

Please sign in to comment.