Skip to content

Commit

Permalink
add method level history extraction
Browse files Browse the repository at this point in the history
  • Loading branch information
antoineBarbez committed Feb 11, 2018
1 parent 3247540 commit b752cc9
Show file tree
Hide file tree
Showing 9 changed files with 195 additions and 51 deletions.
Binary file modified .DS_Store
Binary file not shown.
Binary file modified historical_anti-pattern_detection/.DS_Store
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified historical_anti-pattern_detection/data/.DS_Store
Binary file not shown.
63 changes: 63 additions & 0 deletions historical_anti-pattern_detection/hist.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
from __future__ import print_function
from __future__ import division

from reader import *

def blob(systemName, alpha=8.0):
historyFile = './data/systems_history/' + systemName + '.csv'
systemClassesFile = './data/systems_methods/' + systemName + '.csv'

classes = []
with open(systemClassesFile, 'rb') as csvfile:
reader = csv.reader(csvfile, delimiter=';')

for row in reader:
classes.append(row[0])

reverseDictionnary = {classes[i]: i for i in xrange(len(classes))}
changes = readHistory(historyFile)


data = []
commit = []
commitNumber = changes[0]['Snapshot']
for i, change in enumerate(changes):
if commitNumber != change['Snapshot']:
data.append(set(commit))
commit = []
commitNumber = change['Snapshot']

commit.append(change['Class'])
if i == len(changes)-1:
data.append(set(commit))

nbCommit = [0 for _ in xrange(len(classes))]
occurences = [0 for _ in xrange(len(classes))]
for commit in data:
nbCommit = [i+1 for i in nbCommit]
if len(commit) > 1:
for className in commit:
if className in classes:
idx = reverseDictionnary[className]
occurences[idx] = occurences[idx] + 1

else:
className = list(commit)[0]
if className in classes:
idx = reverseDictionnary[className]
nbCommit[idx] = nbCommit[idx] - 1



for i, nbOcc in enumerate(occurences):
threshold = nbCommit[i] * alpha / 100
if nbOcc > threshold:
print(classes[i])


def featureEnvy(systemName, Blob):



if __name__ == "__main__":
blob("android-frameworks-opt-telephony")
175 changes: 128 additions & 47 deletions historical_anti-pattern_detection/historyExtractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,123 @@
import urllib2
import json, ast
import progressbar
import re


''' Methods used to extract history information from the versionning systems of the differents sofwares,
and extract the anti-pattern occurences from the landfill database (http://www.sesa.unisa.it/landfill/).
all this data will be stored in csv files. '''

def getMethodeName(methodePath, mainDirectory):
methode = methodePath[len(mainDirectory):]
methode = methode[:len(methode)-len('.java')]
methode = '.'.join(methode.split('/'))
def getClassName(classPath, mainDirectory):
className = classPath[len(mainDirectory):]
className = className[:len(className)-len('.java')]
className = '.'.join(className.split('/'))

return methode
return className

def getClassChange(SHA, date, filePath, changeType, mainDirectory):
className = getClassName(filePath, mainDirectory)
line = SHA + ';' + date + ';' + 'CLASS' + ';' + className + ';' + changeType + '\n'
return line

def createHistoryFile(mainDirectory, historyFilePath):
def updateWorkingFile(wFilePath, filePath, SHA):
F = open(wFilePath, "w")

fileCommand = "git show " + SHA + ":" + filePath
ps = subprocess.Popen(fileCommand.split(), stdout=subprocess.PIPE)
file, error = ps.communicate()

F.write(file)
F.close()

def getMethodsInFile(filePath):
regex = '((public|protected|private|static|\s) +[\w\<\>\[\]]+\s+(\w+) *\([^\)]*\)\s*(\{))'

methods = []
with open(filePath, 'r') as javaFile:
content = javaFile.read()
m = re.findall(regex, content)
for method in m:
name = re.search('(\w+) *\([^\)]*\)', method[0]).groups()[0]
params = re.search('\w+ *(\([^\)]*\))', method[0]).groups()[0]
params = re.sub('\s+', ' ', params)

methodName = name + params
methods.append(methodName)

return methods

def parseLine(line):
match = re.search('\w+ *\([^\)]*\)', line)
method = ""
if match is not None:
method = match.group(0)
ct = None

if re.search('method removed', line) is not None:
ct = "REMOVED"

if re.search('method added', line) is not None:
ct = "ADDED"

if re.search('code changed', line) is not None:
ct = "BODY_MODIFIED"

return method, ct

def getMethodeChange(SHA, date, filePath, changeType, mainDirectory):
changes = []

if changeType == "A":
updateWorkingFile("../actualFile.java", filePath, SHA)
methods = getMethodsInFile("../actualFile.java")

for method in methods:
change = method + ";" + "ADDED"
changes.append(change)

if changeType == "D":
updateWorkingFile("../previousFile.java", filePath, SHA + "^")
methods = getMethodsInFile("../previousFile.java")

for method in methods:
change = method + ";" + "DELETED"
changes.append(change)

if changeType == "M":
updateWorkingFile("../actualFile.java", filePath, SHA)
updateWorkingFile("../previousFile.java", filePath, SHA + "^")

diffjCommand = "java -jar ../assets/frameworks/diffj-1.6.3.jar --brief ../previousFile.java ../actualFile.java"
ps = subprocess.Popen(diffjCommand.split(), stdout=subprocess.PIPE)
output, error = ps.communicate()

diffs = output.split('\n')

for line in diffs:
method, ct = parseLine(line)

if ct is not None:
# store change like that, so it is hashable
change = method + ";" + ct
changes.append(change)


lines = ""
className = getClassName(filePath, mainDirectory)
for change in set(changes):
lines = lines + SHA + ';' + date + ';' + 'METHOD' + ';' + className + '.' + change + '\n'

return lines

'''
creates the changes history file of the repository that is the cwd,
extract history information of files contained only in mainDirectory,
set granularity to "C" to extract information at a file level granularity,
set granularity to "M" to extract information at a methode level granularity.
'''

def createHistoryFile(mainDirectory, historyFilePath, granularity):
F = open(historyFilePath, 'w')
F.write('Snapshot;Date;Entity;Code;ChangeType\n')

Expand All @@ -33,6 +135,8 @@ def createHistoryFile(mainDirectory, historyFilePath):
widgets=['writing history file : ' ,progressbar.Percentage()])
bar.start()

options = {"C": getClassChange, "M": getMethodeChange}

for line in commits:
commit = line.split('_')
SHA = commit[0]
Expand All @@ -48,74 +152,50 @@ def createHistoryFile(mainDirectory, historyFilePath):
for fileChange in output2.split('\n'):
if fileChange.split('.')[-1] == 'java':
if fileChange.split()[1].startswith(mainDirectory):
methode = getMethodeName(fileChange.split()[1], mainDirectory)
changeType = fileChange.split()[0]
filePath = fileChange.split()[1]

line = SHA + ';' + date + ';' + 'CLASS' + ';' + methode + ';' + fileChange.split()[0] + '\n'
F.write(line)
change = options[granularity](SHA, date, filePath, changeType,mainDirectory)
F.write(change)

subprocess.call("rm -f ../previousFile.java", shell=True)
subprocess.call("rm -f ../actualFile.java", shell=True)

bar.finish()
F.close()

#create a csv file containing all the methods contained in the main directory
def createMethodsFile(mainDirectory, methodsFilePath):
F = open(methodsFilePath, 'w')

#create a csv file containing all the classes contained in the main directory
def createClassesFile(mainDirectory, classesFilePath):
F = open(classesFilePath, 'w')

for path,dirs,files in os.walk('./' + mainDirectory):
for f in fnmatch.filter(files,'*.java'):
methode = getMethodeName(os.path.join(path,f)[2:], mainDirectory)
F.write(methode + '\n')
className = getClassName(os.path.join(path,f)[2:], mainDirectory)
F.write(className + '\n')

F.close()


def extractChangeHistory(repositoryURL, systemName, snapshot, mainDirectory):
def extractChangeHistory(repositoryURL, systemName, snapshot, mainDirectory, granularity = "C"):
cloneCommand = 'git clone ' + repositoryURL + ' ' + systemName
subprocess.call(cloneCommand, shell=True)

cwd = os.getcwd()
os.chdir(systemName)
subprocess.call('git checkout -f '+ snapshot, shell=True)

methodsFile = cwd + '/data/systems_methods/' + systemName + '.csv'
classFile = cwd + '/data/systems_methods/' + systemName + '.csv'
historyFile = cwd + '/data/systems_history/' + systemName + '.csv'
createMethodsFile(mainDirectory, methodsFile)
createHistoryFile(mainDirectory, historyFile)
#createClassesFile(mainDirectory, classFile)
createHistoryFile(mainDirectory, historyFile, granularity)

subprocess.call('git checkout master', shell=True)
os.chdir(cwd)

removeDirCommand = "rm -rf " + systemName
subprocess.call(removeDirCommand, shell=True)

def createSmellFile(systemName, systemId, smell):
url = 'http://www.sesa.unisa.it/landfill/GetBadSmells?system=' + str(systemId) +'&type=' + smell
response = urllib2.urlopen(url)
data = ast.literal_eval(json.dumps(json.load(response)))

fileName = './data/anti-pattern_occurences/'+ smell + '/' + systemName + '.csv'
F = open(fileName, 'w')

for occurence in data['data']:
methode = occurence['instance']
F.write(methode + '\n')

F.close()

#Not used actually, because there is lots of mistakes in this database.
#Some occurences are not even classes of the corresponding system.
def extractSmellOccurences():
response = urllib2.urlopen('http://www.sesa.unisa.it/landfill/GetSystems?datasetId=1')
data = ast.literal_eval(json.dumps(json.load(response)))

for system in data:
systemName = '-'.join(system['name'].lower().split())
for smell in system['types']:
directoryPath = './data/anti-pattern_occurences/'+ smell['type']
if not os.path.exists(directoryPath):
os.makedirs(directoryPath)

createSmellFile(systemName, system['id'], smell['type'])



if __name__ == "__main__":
Expand Down Expand Up @@ -143,3 +223,4 @@ def extractSmellOccurences():
#extractChangeHistory('https://github.com/apache/pig.git', 'apache-pig', 'a8c680cf28ad4c2ab824c268a3dbe2783667dd94', '')
#extractChangeHistory('https://github.com/apache/struts.git', 'apache-struts', '9ad9404bfac2b936e1b5f0f5e828335bc5a51b48', 'core/src/main/')


2 changes: 1 addition & 1 deletion historical_anti-pattern_detection/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def __init__(self, input, n_in, n_out, activation=tf.tanh):


class Model(object):
def __init__(self, instances, labels=None, shape=[32,16,8], starter_learning_rate=0.28, beta=0):
def __init__(self, instances, labels=None, shape=[32,16,8], starter_learning_rate=0.26, beta=0):
self.instances = instances
self.labels = labels

Expand Down
6 changes: 3 additions & 3 deletions historical_anti-pattern_detection/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
model = Model(p_x, p_y)

# To save and restore a trained model
saver = tf.train.Saver()
#saver = tf.train.Saver()

losses_train = []
losses_valid = []
Expand All @@ -79,8 +79,8 @@
bestLossStep = step

# Save the model
save_path = saver.save(session, "./data/trained_models/model", global_step=num_steps)
print("Model saved in path: %s" % save_path)
#save_path = saver.save(session, "./data/trained_models/model", global_step=num_steps)
#print("Model saved in path: %s" % save_path)

# Evaluate the model on the validation set
output = session.run(model.inference, feed_dict=feed_dict_valid)
Expand Down

0 comments on commit b752cc9

Please sign in to comment.