Skip to content

Commit

Permalink
final history Ectraction + advisors
Browse files Browse the repository at this point in the history
  • Loading branch information
antoineBarbez committed Mar 31, 2018
1 parent 7e91150 commit 12a1aed
Show file tree
Hide file tree
Showing 218 changed files with 210,971 additions and 279,115 deletions.
Binary file modified .DS_Store
Binary file not shown.
Binary file modified historical_anti-pattern_detection/.DS_Store
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

Large diffs are not rendered by default.

File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
from reader import *

import math
import dataConstruction.systems as systems
import matplotlib.pyplot as plt
import numpy as np

def precision(detected, true):
truePos = 0
Expand All @@ -27,16 +30,18 @@ def recall(detected, true):

return truePos / len(detected)

def f_mesure(detected, true):
def f_mesure(detected, true, alpha):
pre = precision(detected, true)
rec = recall(detected, true)

if (pre + rec) ==0:
return 0

return 2*pre*rec/(pre+rec)
return pre*rec/(alpha*rec + (1-alpha)*pre)

def blob(systemName, alpha=8.0):
#return 2*pre*rec/(pre+rec)

def blob(systemName, beta, nbcst):
historyFile = './data/history/class_changes/' + systemName + '.csv'
systemClassesFile = './data/instances/classes/' + systemName + '.csv'

Expand All @@ -61,41 +66,59 @@ def blob(systemName, alpha=8.0):
commitNumber = change['Snapshot']

commit.append(change['Class'])

if i == len(changes)-1:
data.append(set(commit))

nbCommit = [0 for _ in xrange(len(classes))]
occurences = [0 for _ in xrange(len(classes))]
for commit in data:
nbCommit = [i+1 for i in nbCommit]
if len(commit) > 1:
if nbcst:
nbCommit = len(data)
occurences = [0 for _ in xrange(len(classes))]
for commit in data:
for className in commit:
if className in classes:
idx = reverseDictionnary[className]
occurences[idx] = occurences[idx] + 1

else:
className = list(commit)[0]
if className in classes:
idx = reverseDictionnary[className]
nbCommit[idx] = nbCommit[idx] - 1
smells = []
for i, nbOcc in enumerate(occurences):
threshold = nbCommit * beta / len(classes)
if nbOcc > threshold:

smells.append(classes[i])

return smells

else:
nbCommit = [0 for _ in xrange(len(classes))]
occurences = [0 for _ in xrange(len(classes))]
for commit in data:
nbCommit = [i+1 for i in nbCommit]
if len(commit) > 1:
for className in commit:
if className in classes:
idx = reverseDictionnary[className]
occurences[idx] = occurences[idx] + 1

else:
className = list(commit)[0]
if className in classes:
idx = reverseDictionnary[className]
nbCommit[idx] = nbCommit[idx] - 1

smells = []
for i, nbOcc in enumerate(occurences):
threshold = nbCommit[i] * beta / len(classes)
if nbOcc > threshold:

smells = []
for i, nbOcc in enumerate(occurences):
threshold = nbCommit[i] * alpha / 100
if nbOcc > threshold:
#print(nbOcc, nbCommit[i])
#print(classes[i])
smells.append(classes[i])
smells.append(classes[i])

return smells
return smells



def test(systemName, alpha):
print(systemName, alpha)
trueFile = './data/labels/valid/' + systemName + '.csv'
def test(systemName, alpha, nbcst=False):
#print(systemName, alpha)
trueFile = './data/labels/Blob/test/' + systemName + '.csv'
systemClassesFile = './data/instances/classes/' + systemName + '.csv'

#Get Smells occurences
Expand All @@ -106,11 +129,11 @@ def test(systemName, alpha):
for row in reader:
true.append(row[0])

detected = blob(systemName, alpha)
detected = blob(systemName, alpha, nbcst)

pre = precision(detected, true)
rec = recall(detected, true)
f_m = f_mesure(detected, true)
f_m = f_mesure(detected, true, 0.5)

#print('Precision :', "{0:.3f}".format(pre))
#print('Recall :', "{0:.3f}".format(rec))
Expand All @@ -121,38 +144,55 @@ def test(systemName, alpha):


if __name__ == "__main__":
test("ApacheAnt", 1.5)

systems = [
"android-frameworks-opt-telephony",
"android-frameworks-sdk",
"android-platform-support",
"apache-ant",
"apache-tomcat",
"jedit"
]
for system in systems.hist:
f_m = test(system['name'], 10, True)
print(system['name'] + " : " + str(f_m))


'''alphas = 3 + 0.2*np.array(range(150))
f_m = []
std = []
i = 0
for alpha in alphas:
i = i + 1
print (str(i))
s = []
for system in systems.test:
s.append(test(system['name'], alpha, True))
f_m.append(np.mean(s))
std.append(np.std(s))
plt.plot(alphas, f_m, 'ro', alphas, std)
plt.show()'''


'''
s = 0
alphas = [1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0]
for system in systems:
alphas = 3 + 0.2*np.array(range(150))
for system in systems.hist:
print(system['name'])
bestAL = 0
bestFM = 0
f_m = 0
for alpha in alphas:
f_m = test(system, int(alpha))
print(f_m)
f_m = test(system['name'], int(alpha), True)
#print(f_m)
if f_m == None:
f_m = 0
if f_m > bestFM:
bestFM = f_m
bestAL = alpha
f_m = test(system, bestAL)
f_m = test(system['name'], bestAL, True)
print(f_m)
print(bestAL)
if f_m == None:
f_m = 0
s = s + f_m
print(s/len(systems))
print(s/len(systems.hist))'''
195 changes: 195 additions & 0 deletions historical_anti-pattern_detection/advisors/detection/Hist/hist3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
from __future__ import print_function
from __future__ import division
from sklearn.preprocessing import StandardScaler

from reader import *

import math
import dataConstruction.systems as systems
import matplotlib.pyplot as plt
import numpy as np

def precision(detected, true):
truePos = 0
for className in detected:
if className in true:
truePos += 1

if len(true) == 0:
return 0

return truePos / len(true)

def recall(detected, true):
truePos = 0
for className in detected:
if className in true:
truePos += 1

if len(detected) == 0:
return 0

return truePos / len(detected)

def f_mesure(detected, true, alpha):
pre = precision(detected, true)
rec = recall(detected, true)

if (pre + rec) ==0:
return 0

return pre*rec/(alpha*rec + (1-alpha)*pre)

#return 2*pre*rec/(pre+rec)

def getRescaledOccurences(systemName):
historyFile = './data/history/class_changes/' + systemName + '.csv'
systemClassesFile = './data/instances/classes/' + systemName + '.csv'

classes = []
with open(systemClassesFile, 'rb') as csvfile:
reader = csv.reader(csvfile, delimiter=';')

for row in reader:
classes.append(row[0])

reverseDictionnary = {classes[i]: i for i in range(len(classes))}
changes = readHistory2(historyFile)


data = []
#totalClasses = []
commit = []
commitNumber = changes[0]['Snapshot']
for i, change in enumerate(changes):
if commitNumber != change['Snapshot']:
data.append(set(commit))
#totalClasses = totalClasses + list(set(commit))
commit = []
commitNumber = change['Snapshot']

commit.append(change['Class'])

if i == len(changes)-1:
data.append(set(commit))
#totalClasses = totalClasses + list(set(commit))

#totalClasses = list(set(totalClasses))
#totalReverseDictionnary = {totalClasses[i]: i for i in xrange(len(totalClasses))}

nbCommit = len(data)
occurences = [0 for _ in range(len(classes))]
#totalOccurences = [0 for _ in xrange(len(totalClasses))]
for commit in data:
for className in commit:
#tidx = totalReverseDictionnary[className]
#totalOccurences[tidx] = totalOccurences[tidx] + 1
if className in classes:
idx = reverseDictionnary[className]
occurences[idx] = occurences[idx] + 1


scaler = StandardScaler()
scaler.fit(np.array(occurences).reshape(-1, 1))
rescaledOcc = scaler.transform(np.array(occurences).reshape(-1, 1))

return {classes[i]:rescaledOcc.reshape(-1)[i] for i in range(len(classes))}


def blob(systemName, alpha):
roDictionnary = getRescaledOccurences(systemName)

smells = []
for className in roDictionnary:
if roDictionnary[className] > alpha:
smells.append(className)

return smells



def test(systemName, alpha):
#print(systemName, alpha)
trueFile = './data/labels/Blob/test/' + systemName + '.csv'
systemClassesFile = './data/instances/classes/' + systemName + '.csv'

#Get Smells occurences
true = []
with open(trueFile, 'rb') as csvfile:
reader = csv.reader(csvfile, delimiter=';')

for row in reader:
true.append(row[0])

detected = blob(systemName, alpha)

pre = precision(detected, true)
rec = recall(detected, true)
f_m = f_mesure(detected, true, 0.5)

#print('Precision :', "{0:.3f}".format(pre))
#print('Recall :', "{0:.3f}".format(rec))
#print('F-Mesure :', "{0:.3f}".format(f_m))

return f_m



if __name__ == "__main__":

'''for system in systems.hist:
f_m = test(system['name'], 2.3)
print(system['name'] + " : " + str(f_m))'''


'''alphas = 0.4 + 0.1*np.array(range(50))
f_m = []
std = []
i = 0
for alpha in alphas:
i = i + 1
print (str(i))
s = []
for system in systems.test:
s.append(test(system['name'], alpha))
f_m.append(np.mean(s))
std.append(np.std(s))
plt.plot(alphas, f_m, 'ro', alphas, std)
plt.show()'''


'''
s = 0
alphas = 1 + 0.1*np.array(range(60))
for system in systems.test:
print(system['name'])
bestAL = 0
bestFM = 0
f_m = 0
for alpha in alphas:
f_m = test(system['name'], alpha)
#print(f_m)
if f_m == None:
f_m = 0
if f_m > bestFM:
bestFM = f_m
bestAL = alpha
f_m = test(system['name'], bestAL)
print(f_m)
print(bestAL)
if f_m == None:
f_m = 0
s = s + f_m
print(s/len(systems.test))'''

for system in systems.systems_git:
roDictionnary = getRescaledOccurences(system['name'])

print(roDictionnary)
Binary file not shown.
Binary file not shown.
Loading

0 comments on commit 12a1aed

Please sign in to comment.