add asci + cleanup

antoineBarbez · Jan 29, 2019 · 676b035 · 676b035
1 parent d6b7237
commit 676b035
Show file tree

Hide file tree

Showing 41 changed files with 202 additions and 85 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/README.md b/README.md
@@ -9,11 +9,11 @@ The key idea behind SMAD is to combine various anti-pattern detection tools by c
 their core-metrics for each input instance and use these metrics to feed a machine-learning based classifier. 
 First, for each anti-pattern considered in this study, we selected three state-of-the-art detection tools. 
 These tools respectively rely on:
-* **Rule Card**s: Affected entities are identified using a combination of source-code metrics designed to reflect the formal definition of the anti-patterns.
+* **Rule Cards:** Affected entities are identified using a combination of source-code metrics designed to reflect the formal definition of the anti-patterns.
 For this category, we selected DECOR for God Class and InCode for Feature Envy detection.
-* **Historical** Information: Affected entities are identified via an analysis of change history information derived from versioning systems. 
+* **Historical Information:** Affected entities are identified via an analysis of change history information derived from versioning systems. 
 For this category, we used HIST for both God Class and Feature Envy detection.
-* **Refactoring Opportunities**: Anti-patterns are detected by identifying the opportunities to apply their corresponding refactoring operations. 
+* **Refactoring Opportunities:** Anti-patterns are detected by identifying the opportunities to apply their corresponding refactoring operations. 
 For this category, we used the refactoring operations Extract Class and Move Method provided by JDeodorant, respectively for God Class and Feature Envy detection.
 
 Then, we selected the core-metrics, i.e., metrics that reflect best the internal decision process of each tool, as input metrics for our model. 

diff --git a/detection_tools/.DS_Store b/detection_tools/.DS_Store
diff --git a/experiments/.DS_Store b/experiments/.DS_Store
diff --git a/experiments/feature_envy/asci.py b/experiments/feature_envy/asci.py
@@ -0,0 +1,49 @@
+from context import experimentUtils, dataUtils, nnUtils, hist, incode, jdeodorant
+from sklearn import tree
+
+import numpy as np
+
+
+training_systems = ['xerces-2_7_0', 'lucene', 'apache-ant', 'argouml', 'android-frameworks-opt-telephony']
+
+def getToolIndexes(system):
+	entities = dataUtils.getCandidateFeatureEnvy(system)
+	antipatterns = dataUtils.getAntipatterns(system, 'feature_envy')
+	detected = [hist.getSmells(system), jdeodorant.getSmells(system), incode.getSmells(system)]
+
+	toolIndexes = [2 for e in entities]
+	for i, e in enumerate(entities):
+		if e in antipatterns:
+			for toolIdx, detected_tool in enumerate(detected):
+				if e in detected_tool:
+					toolIndexes[i] = toolIdx
+
+		else:
+			for toolIdx, detected_tool in enumerate(detected):
+				if e not in detected_tool:
+					toolIndexes[i] = toolIdx
+
+	return toolIndexes
+
+X = []
+Y = []
+for system in training_systems:
+	X += nnUtils.getInstances(system, 'feature_envy', False).tolist()
+	Y += getToolIndexes(system)
+
+clf = tree.DecisionTreeClassifier()
+clf = clf.fit(X, Y)
+
+def getSmells(system):
+	instances = nnUtils.getInstances(system, 'feature_envy', False)
+	predictedToolIndexes = clf.predict(instances)
+
+	entities = dataUtils.getCandidateFeatureEnvy(system)
+	detected = [hist.getSmells(system), jdeodorant.getSmells(system), incode.getSmells(system)]
+
+	smells = []
+	for i, e in enumerate(entities):
+		if e in detected[predictedToolIndexes[i]]:
+			smells.append(e)
+
+	return smells
diff --git a/experiments/feature_envy/compare_approaches.py b/experiments/feature_envy/compare_approaches.py
@@ -2,11 +2,12 @@
 
 import numpy as np
 
-import smad_fe
+import smad_fe, asci
 
 # This script is used to compare the performances of:
 # - The detection tools aggregated through SMAD
-# - The voting technique with k in {1, 2, 3}
+# - The voting technique with k=1
+# - ASCI
 # - SMAD
 # for Feature Envy detection on the three subject systems.
 
@@ -28,13 +29,9 @@
 r_v1 = []
 f_v1 = []
 
-p_v2 = []
-r_v2 = []
-f_v2 = []
-
-p_v3 = []
-r_v3 = []
-f_v3 = []
+p_asci = []
+r_asci = []
+f_asci = []
 
 p_smad = []
 r_smad = []
@@ -96,27 +93,18 @@
 	r_v1.append(recall_vote_1)
 	f_v1.append(f_measure_vote_1)
 
-	# Compute vote for k = 2
-	detected_vote_2 = experimentUtils.vote(tools_outputs, 2)
-
-	precision_vote_2 = experimentUtils.precision(detected_vote_2, true)
-	recall_vote_2 = experimentUtils.recall(detected_vote_2, true)
-	f_measure_vote_2 = experimentUtils.f_measure(detected_vote_2, true)
 
-	p_v2.append(precision_vote_2)
-	r_v2.append(recall_vote_2)
-	f_v2.append(f_measure_vote_2)
+	# ASCI
+	detected_asci = asci.getSmells(system)
 
-	# Compute vote for k = 3
-	detected_vote_3 = experimentUtils.vote(tools_outputs, 3)
+	precision_asci = experimentUtils.precision(detected_asci, true)
+	recall_asci = experimentUtils.recall(detected_asci, true)
+	f_measure_asci = experimentUtils.f_measure(detected_asci, true)
 
-	precision_vote_3 = experimentUtils.precision(detected_vote_3, true)
-	recall_vote_3 = experimentUtils.recall(detected_vote_3, true)
-	f_measure_vote_3 = experimentUtils.f_measure(detected_vote_3, true)
+	p_asci.append(precision_asci)
+	r_asci.append(recall_asci)
+	f_asci.append(f_measure_asci)
 
-	p_v3.append(precision_vote_3)
-	r_v3.append(recall_vote_3)
-	f_v3.append(f_measure_vote_3)
 
 	# SMAD
 	detected_smad = smad_fe.getSmells(system)
@@ -141,9 +129,7 @@
 	print('-------------------------------------------------')
 	print('Vote 1     |' + "{0:.3f}".format(precision_vote_1) + '      |' + "{0:.3f}".format(recall_vote_1) + '   |' + "{0:.3f}".format(f_measure_vote_1))
 	print('-------------------------------------------------')
-	print('Vote 2     |' + "{0:.3f}".format(precision_vote_2) + '      |' + "{0:.3f}".format(recall_vote_2) + '   |' + "{0:.3f}".format(f_measure_vote_2))
-	print('-------------------------------------------------')
-	print('Vote 3     |' + "{0:.3f}".format(precision_vote_3) + '      |' + "{0:.3f}".format(recall_vote_3) + '   |' + "{0:.3f}".format(f_measure_vote_3))
+	print('ASCI       |' + "{0:.3f}".format(precision_asci) + '      |' + "{0:.3f}".format(recall_asci) + '   |' + "{0:.3f}".format(f_measure_asci))
 	print('-------------------------------------------------')
 	print('SMAD       |' + "{0:.3f}".format(precision_smad) + '      |' + "{0:.3f}".format(recall_smad) + '   |' + "{0:.3f}".format(f_measure_smad))
 	print('-------------------------------------------------')
@@ -163,9 +149,7 @@
 print('-------------------------------------------------')
 print('Vote 1     |' + "{0:.3f}".format(np.mean(p_v1)) + '      |' + "{0:.3f}".format(np.mean(r_v1)) + '   |' + "{0:.3f}".format(np.mean(f_v1)))
 print('-------------------------------------------------')
-print('Vote 2     |' + "{0:.3f}".format(np.mean(p_v2)) + '      |' + "{0:.3f}".format(np.mean(r_v2)) + '   |' + "{0:.3f}".format(np.mean(f_v2)))
-print('-------------------------------------------------')
-print('Vote 3     |' + "{0:.3f}".format(np.mean(p_v3)) + '      |' + "{0:.3f}".format(np.mean(r_v3)) + '   |' + "{0:.3f}".format(np.mean(f_v3)))
+print('ASCI       |' + "{0:.3f}".format(np.mean(p_asci)) + '      |' + "{0:.3f}".format(np.mean(r_asci)) + '   |' + "{0:.3f}".format(np.mean(f_asci)))
 print('-------------------------------------------------')
 print('SMAD       |' + "{0:.3f}".format(np.mean(p_smad)) + '      |' + "{0:.3f}".format(np.mean(r_smad)) + '   |' + "{0:.3f}".format(np.mean(f_smad)))
 print('-------------------------------------------------')

diff --git a/experiments/feature_envy/parameters_calibration/context.py b/experiments/feature_envy/parameters_calibration/context.py
@@ -10,8 +10,9 @@
 import utils.liuUtils        as liuUtils
 import utils.nnUtils		 as nnUtils
 
-import detection_tools.feature_envy.hist   as hist
-import detection_tools.feature_envy.incode as incode
+import detection_tools.feature_envy.hist       as hist
+import detection_tools.feature_envy.incode     as incode
+import detection_tools.feature_envy.jdeodorant as jdeodorant
 
 import experiments.feature_envy.smad_fe as smad_fe 
 

diff --git a/.../parameters_calibration/calibrate_hist.py → ...ature_envy/parameters_calibration/hist.py b/.../parameters_calibration/calibrate_hist.py → ...ature_envy/parameters_calibration/hist.py
diff --git a/...arameters_calibration/calibrate_incode.py → ...ure_envy/parameters_calibration/incode.py b/...arameters_calibration/calibrate_incode.py → ...ure_envy/parameters_calibration/incode.py
diff --git a/...eters_calibration/liu_cross_validation.py → ...eature_envy/parameters_calibration/liu.py b/...eters_calibration/liu_cross_validation.py → ...eature_envy/parameters_calibration/liu.py
diff --git a/...ters_calibration/smad_cross_validation.py → ...ature_envy/parameters_calibration/smad.py b/...ters_calibration/smad_cross_validation.py → ...ature_envy/parameters_calibration/smad.py
diff --git a/experiments/feature_envy/parameters_calibration/vote.py b/experiments/feature_envy/parameters_calibration/vote.py
@@ -0,0 +1,21 @@
+from context import experimentUtils, dataUtils, hist, incode, jdeodorant
+from sklearn import tree
+
+import numpy as np
+
+validation_systems = ['xerces-2_7_0', 'lucene', 'apache-ant', 'argouml', 'android-frameworks-opt-telephony']
+
+fm_v1 = []
+fm_v2 = []
+fm_v3 = []
+for system in validation_systems:
+	antipatterns = dataUtils.getAntipatterns(system, 'feature_envy')
+	detected = [hist.getSmells(system), incode.getSmells(system), jdeodorant.getSmells(system)]
+
+	fm_v1.append(experimentUtils.f_measure(experimentUtils.vote(detected, 1), antipatterns))
+	fm_v2.append(experimentUtils.f_measure(experimentUtils.vote(detected, 2), antipatterns))
+	fm_v3.append(experimentUtils.f_measure(experimentUtils.vote(detected, 3), antipatterns))
+
+print('V1: ' + str(np.mean(np.array(fm_v1))))
+print('V2: ' + str(np.mean(np.array(fm_v2))))
+print('V3: ' + str(np.mean(np.array(fm_v3))))
diff --git a/experiments/feature_envy/train_liu.py b/experiments/feature_envy/train_liu.py
@@ -8,6 +8,8 @@
 import os
 import progressbar
 
+# Train Liu's model on instances detected by SMAD
+
 def get_save_path():
 	return os.path.join(ROOT_DIR, 'neural_networks/liu_replication/trained_models/generated/network')
 

diff --git a/experiments/god_class/asci.py b/experiments/god_class/asci.py
@@ -0,0 +1,50 @@
+from context import experimentUtils, dataUtils, nnUtils, hist, decor, jdeodorant
+from sklearn import tree
+
+import numpy as np
+
+
+training_systems = ['xerces-2_7_0', 'lucene', 'apache-ant', 'argouml', 'android-frameworks-opt-telephony']
+
+def getToolIndexes(system):
+	entities = dataUtils.getClasses(system)
+	antipatterns = dataUtils.getAntipatterns(system, 'god_class')
+	detected = [jdeodorant.getSmells(system), decor.getSmells(system), hist.getSmells(system)]
+
+	toolIndexes = [2 for e in entities]
+	for i, e in enumerate(entities):
+		if e in antipatterns:
+			for toolIdx, detected_tool in enumerate(detected):
+				if e in detected_tool:
+					toolIndexes[i] = toolIdx
+
+		else:
+			for toolIdx, detected_tool in enumerate(detected):
+				if e not in detected_tool:
+					toolIndexes[i] = toolIdx
+
+	return toolIndexes
+
+X = []
+Y = []
+for system in training_systems:
+	X += nnUtils.getInstances(system, 'god_class', False).tolist()
+	Y += getToolIndexes(system)
+
+clf = tree.DecisionTreeClassifier()
+clf = clf.fit(X, Y)
+
+
+def getSmells(system):
+	instances = nnUtils.getInstances(system, 'god_class', False)
+	predictedToolIndexes = clf.predict(instances)
+
+	entities = dataUtils.getClasses(system)
+	detected = [jdeodorant.getSmells(system), decor.getSmells(system), hist.getSmells(system)]
+
+	smells = []
+	for i, e in enumerate(entities):
+		if e in detected[predictedToolIndexes[i]]:
+			smells.append(e)
+
+	return smells
diff --git a/experiments/god_class/compare_approaches.py b/experiments/god_class/compare_approaches.py
@@ -2,11 +2,12 @@
 
 import numpy as np
 
-import smad_gc
+import smad_gc, asci
 
 # This script is used to compare the performances of:
 # - The detection tools aggregated through SMAD
-# - The voting technique with k in {1, 2, 3}
+# - The voting technique with k=2
+# - ASCI
 # - SMAD
 # for God Class detection on the three subject systems.
 
@@ -24,17 +25,13 @@
 r_jd = []
 f_jd = []
 
-p_v1 = []
-r_v1 = []
-f_v1 = []
-
 p_v2 = []
 r_v2 = []
 f_v2 = []
 
-p_v3 = []
-r_v3 = []
-f_v3 = []
+p_asci = []
+r_asci = []
+f_asci = []
 
 p_smad = []
 r_smad = []
@@ -82,17 +79,6 @@
 
 	tools_outputs = [detected_hist, detected_decor, detected_jdeodorant]
 
-	# Compute vote for k = 1
-	detected_vote_1 = experimentUtils.vote(tools_outputs, 1)
-
-	precision_vote_1 = experimentUtils.precision(detected_vote_1, true)
-	recall_vote_1 = experimentUtils.recall(detected_vote_1, true)
-	f_measure_vote_1 = experimentUtils.f_measure(detected_vote_1, true)
-
-	p_v1.append(precision_vote_1)
-	r_v1.append(recall_vote_1)
-	f_v1.append(f_measure_vote_1)
-
 	# Compute vote for k = 2
 	detected_vote_2 = experimentUtils.vote(tools_outputs, 2)
 
@@ -104,16 +90,18 @@
 	r_v2.append(recall_vote_2)
 	f_v2.append(f_measure_vote_2)
 
-	# Compute vote for k = 3
-	detected_vote_3 = experimentUtils.vote(tools_outputs, 3)
 
-	precision_vote_3 = experimentUtils.precision(detected_vote_3, true)
-	recall_vote_3 = experimentUtils.recall(detected_vote_3, true)
-	f_measure_vote_3 = experimentUtils.f_measure(detected_vote_3, true)
+	# ASCI
+	detected_asci = asci.getSmells(system)
+
+	precision_asci = experimentUtils.precision(detected_asci, true)
+	recall_asci = experimentUtils.recall(detected_asci, true)
+	f_measure_asci = experimentUtils.f_measure(detected_asci, true)
+
+	p_asci.append(precision_asci)
+	r_asci.append(recall_asci)
+	f_asci.append(f_measure_asci)
 
-	p_v3.append(precision_vote_3)
-	r_v3.append(recall_vote_3)
-	f_v3.append(f_measure_vote_3)
 
 	# SMAD
 	detected_smad = smad_gc.getSmells(system)
@@ -138,11 +126,9 @@
 	print('JDeodorant |' + "{0:.3f}".format(precision_jdeodorant) + '      |' + "{0:.3f}".format(recall_jdeodorant) + '   |' + "{0:.3f}".format(f_measure_jdeodorant))
 	print('-------------------------------------------------')
 	print('-------------------------------------------------')
-	print('Vote 1     |' + "{0:.3f}".format(precision_vote_1) + '      |' + "{0:.3f}".format(recall_vote_1) + '   |' + "{0:.3f}".format(f_measure_vote_1))
-	print('-------------------------------------------------')
 	print('Vote 2     |' + "{0:.3f}".format(precision_vote_2) + '      |' + "{0:.3f}".format(recall_vote_2) + '   |' + "{0:.3f}".format(f_measure_vote_2))
 	print('-------------------------------------------------')
-	print('Vote 3     |' + "{0:.3f}".format(precision_vote_3) + '      |' + "{0:.3f}".format(recall_vote_3) + '   |' + "{0:.3f}".format(f_measure_vote_3))
+	print('ASCI       |' + "{0:.3f}".format(precision_asci) + '      |' + "{0:.3f}".format(recall_asci) + '   |' + "{0:.3f}".format(f_measure_asci))
 	print('-------------------------------------------------')
 	print('SMAD       |' + "{0:.3f}".format(precision_smad) + '      |' + "{0:.3f}".format(recall_smad) + '   |' + "{0:.3f}".format(f_measure_smad))
 	print('-------------------------------------------------')
@@ -160,11 +146,9 @@
 print('JDeodorant |' + "{0:.3f}".format(np.mean(p_jd)) + '      |' + "{0:.3f}".format(np.mean(r_jd)) + '   |' + "{0:.3f}".format(np.mean(f_jd)))
 print('-------------------------------------------------')
 print('-------------------------------------------------')
-print('Vote 1     |' + "{0:.3f}".format(np.mean(p_v1)) + '      |' + "{0:.3f}".format(np.mean(r_v1)) + '   |' + "{0:.3f}".format(np.mean(f_v1)))
-print('-------------------------------------------------')
 print('Vote 2     |' + "{0:.3f}".format(np.mean(p_v2)) + '      |' + "{0:.3f}".format(np.mean(r_v2)) + '   |' + "{0:.3f}".format(np.mean(f_v2)))
 print('-------------------------------------------------')
-print('Vote 3     |' + "{0:.3f}".format(np.mean(p_v3)) + '      |' + "{0:.3f}".format(np.mean(r_v3)) + '   |' + "{0:.3f}".format(np.mean(f_v3)))
+print('ASCI       |' + "{0:.3f}".format(np.mean(p_asci)) + '      |' + "{0:.3f}".format(np.mean(r_asci)) + '   |' + "{0:.3f}".format(np.mean(f_asci)))
 print('-------------------------------------------------')
 print('SMAD       |' + "{0:.3f}".format(np.mean(p_smad)) + '      |' + "{0:.3f}".format(np.mean(r_smad)) + '   |' + "{0:.3f}".format(np.mean(f_smad)))
 print('-------------------------------------------------')

diff --git a/experiments/god_class/parameters_calibration/context.py b/experiments/god_class/parameters_calibration/context.py
@@ -8,6 +8,8 @@
 import utils.experimentUtils as experimentUtils
 import utils.nnUtils as nnUtils
 
-import detection_tools.god_class.hist as hist
+import detection_tools.god_class.hist       as hist
+import detection_tools.god_class.decor      as decor
+import detection_tools.god_class.jdeodorant as jdeodorant
 
 import neural_networks.smad.model as md
diff --git a/.../parameters_calibration/calibrate_hist.py → .../god_class/parameters_calibration/hist.py b/.../parameters_calibration/calibrate_hist.py → .../god_class/parameters_calibration/hist.py
diff --git a/...ters_calibration/smad_cross_validation.py → .../god_class/parameters_calibration/smad.py b/...ters_calibration/smad_cross_validation.py → .../god_class/parameters_calibration/smad.py
diff --git a/experiments/god_class/parameters_calibration/vote.py b/experiments/god_class/parameters_calibration/vote.py
@@ -0,0 +1,21 @@
+from context import experimentUtils, dataUtils, hist, decor, jdeodorant
+from sklearn import tree
+
+import numpy as np
+
+validation_systems = ['xerces-2_7_0', 'lucene', 'apache-ant', 'argouml', 'android-frameworks-opt-telephony']
+
+fm_v1 = []
+fm_v2 = []
+fm_v3 = []
+for system in validation_systems:
+	antipatterns = dataUtils.getAntipatterns(system, 'god_class')
+	detected = [hist.getSmells(system), decor.getSmells(system), jdeodorant.getSmells(system)]
+
+	fm_v1.append(experimentUtils.f_measure(experimentUtils.vote(detected, 1), antipatterns))
+	fm_v2.append(experimentUtils.f_measure(experimentUtils.vote(detected, 2), antipatterns))
+	fm_v3.append(experimentUtils.f_measure(experimentUtils.vote(detected, 3), antipatterns))
+
+print('V1: ' + str(np.mean(np.array(fm_v1))))
+print('V2: ' + str(np.mean(np.array(fm_v2))))
+print('V3: ' + str(np.mean(np.array(fm_v3))))
diff --git a/assets/jar/.DS_Store → images/.DS_Store b/assets/jar/.DS_Store → images/.DS_Store
diff --git a/images/SMAD.png b/images/SMAD.png
diff --git a/assets/.DS_Store → java/.DS_Store b/assets/.DS_Store → java/.DS_Store
diff --git a/java/jar/.DS_Store b/java/jar/.DS_Store
diff --git a/assets/jar/DecorMetricsFileCreator.jar → java/jar/DecorMetricsFileCreator.jar b/assets/jar/DecorMetricsFileCreator.jar → java/jar/DecorMetricsFileCreator.jar
diff --git a/assets/jar/InCodeMetricsFileCreator.jar → java/jar/InCodeMetricsFileCreator.jar b/assets/jar/InCodeMetricsFileCreator.jar → java/jar/InCodeMetricsFileCreator.jar
diff --git a/assets/jar/JDMetricsFileCreator.jar → java/jar/JDMetricsFileCreator.jar b/assets/jar/JDMetricsFileCreator.jar → java/jar/JDMetricsFileCreator.jar
diff --git a/assets/jar/diffj-1.6.3.jar → java/jar/diffj-1.6.3.jar b/assets/jar/diffj-1.6.3.jar → java/jar/diffj-1.6.3.jar
diff --git a/assets/src/.DS_Store → java/src/.DS_Store b/assets/src/.DS_Store → java/src/.DS_Store
diff --git a/...ts/src/decor/DecorMetricsFileCreator.java → java/src/decor/DecorMetricsFileCreator.java b/...ts/src/decor/DecorMetricsFileCreator.java → java/src/decor/DecorMetricsFileCreator.java
diff --git a/assets/src/incode/ATFDFileVisitor.java → java/src/incode/ATFDFileVisitor.java b/assets/src/incode/ATFDFileVisitor.java → java/src/incode/ATFDFileVisitor.java
diff --git a/.../src/incode/InCodeMetricsFileCreator.java → .../src/incode/InCodeMetricsFileCreator.java b/.../src/incode/InCodeMetricsFileCreator.java → .../src/incode/InCodeMetricsFileCreator.java
diff --git a/assets/src/jdeodorant/ASTReader.java → java/src/jdeodorant/ASTReader.java b/assets/src/jdeodorant/ASTReader.java → java/src/jdeodorant/ASTReader.java
diff --git a/assets/src/jdeodorant/ClassVisitor.java → java/src/jdeodorant/ClassVisitor.java b/assets/src/jdeodorant/ClassVisitor.java → java/src/jdeodorant/ClassVisitor.java
diff --git a/assets/src/jdeodorant/FileVisitor.java → java/src/jdeodorant/FileVisitor.java b/assets/src/jdeodorant/FileVisitor.java → java/src/jdeodorant/FileVisitor.java
diff --git a/.../src/jdeodorant/JDMetricsFileCreator.java → .../src/jdeodorant/JDMetricsFileCreator.java b/.../src/jdeodorant/JDMetricsFileCreator.java → .../src/jdeodorant/JDMetricsFileCreator.java
diff --git a/assets/src/jdeodorant/MethodVisitor.java → java/src/jdeodorant/MethodVisitor.java b/assets/src/jdeodorant/MethodVisitor.java → java/src/jdeodorant/MethodVisitor.java