diff --git a/.DS_Store b/.DS_Store index c65d70e..93d1702 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/historical_anti-pattern_detection/.DS_Store b/historical_anti-pattern_detection/.DS_Store index ec900ca..11979c4 100644 Binary files a/historical_anti-pattern_detection/.DS_Store and b/historical_anti-pattern_detection/.DS_Store differ diff --git a/historical_anti-pattern_detection/assets/.DS_Store b/historical_anti-pattern_detection/assets/.DS_Store new file mode 100644 index 0000000..0315032 Binary files /dev/null and b/historical_anti-pattern_detection/assets/.DS_Store differ diff --git a/historical_anti-pattern_detection/assets/frameworks/diffj-1.6.3.jar b/historical_anti-pattern_detection/assets/frameworks/diffj-1.6.3.jar new file mode 100644 index 0000000..91d5b78 Binary files /dev/null and b/historical_anti-pattern_detection/assets/frameworks/diffj-1.6.3.jar differ diff --git a/historical_anti-pattern_detection/data/.DS_Store b/historical_anti-pattern_detection/data/.DS_Store index c14dde0..3da246e 100644 Binary files a/historical_anti-pattern_detection/data/.DS_Store and b/historical_anti-pattern_detection/data/.DS_Store differ diff --git a/historical_anti-pattern_detection/hist.py b/historical_anti-pattern_detection/hist.py new file mode 100644 index 0000000..5165831 --- /dev/null +++ b/historical_anti-pattern_detection/hist.py @@ -0,0 +1,63 @@ +from __future__ import print_function +from __future__ import division + +from reader import * + +def blob(systemName, alpha=8.0): + historyFile = './data/systems_history/' + systemName + '.csv' + systemClassesFile = './data/systems_methods/' + systemName + '.csv' + + classes = [] + with open(systemClassesFile, 'rb') as csvfile: + reader = csv.reader(csvfile, delimiter=';') + + for row in reader: + classes.append(row[0]) + + reverseDictionnary = {classes[i]: i for i in xrange(len(classes))} + changes = readHistory(historyFile) + + + data = [] + commit = [] + commitNumber = changes[0]['Snapshot'] + for i, change in enumerate(changes): + if commitNumber != change['Snapshot']: + data.append(set(commit)) + commit = [] + commitNumber = change['Snapshot'] + + commit.append(change['Class']) + if i == len(changes)-1: + data.append(set(commit)) + + nbCommit = [0 for _ in xrange(len(classes))] + occurences = [0 for _ in xrange(len(classes))] + for commit in data: + nbCommit = [i+1 for i in nbCommit] + if len(commit) > 1: + for className in commit: + if className in classes: + idx = reverseDictionnary[className] + occurences[idx] = occurences[idx] + 1 + + else: + className = list(commit)[0] + if className in classes: + idx = reverseDictionnary[className] + nbCommit[idx] = nbCommit[idx] - 1 + + + + for i, nbOcc in enumerate(occurences): + threshold = nbCommit[i] * alpha / 100 + if nbOcc > threshold: + print(classes[i]) + + +def featureEnvy(systemName, Blob): + + + +if __name__ == "__main__": + blob("android-frameworks-opt-telephony") \ No newline at end of file diff --git a/historical_anti-pattern_detection/historyExtractor.py b/historical_anti-pattern_detection/historyExtractor.py index 6d5b6e0..f91dfd8 100644 --- a/historical_anti-pattern_detection/historyExtractor.py +++ b/historical_anti-pattern_detection/historyExtractor.py @@ -5,21 +5,123 @@ import urllib2 import json, ast import progressbar +import re ''' Methods used to extract history information from the versionning systems of the differents sofwares, and extract the anti-pattern occurences from the landfill database (http://www.sesa.unisa.it/landfill/). all this data will be stored in csv files. ''' -def getMethodeName(methodePath, mainDirectory): - methode = methodePath[len(mainDirectory):] - methode = methode[:len(methode)-len('.java')] - methode = '.'.join(methode.split('/')) +def getClassName(classPath, mainDirectory): + className = classPath[len(mainDirectory):] + className = className[:len(className)-len('.java')] + className = '.'.join(className.split('/')) - return methode + return className +def getClassChange(SHA, date, filePath, changeType, mainDirectory): + className = getClassName(filePath, mainDirectory) + line = SHA + ';' + date + ';' + 'CLASS' + ';' + className + ';' + changeType + '\n' + return line -def createHistoryFile(mainDirectory, historyFilePath): +def updateWorkingFile(wFilePath, filePath, SHA): + F = open(wFilePath, "w") + + fileCommand = "git show " + SHA + ":" + filePath + ps = subprocess.Popen(fileCommand.split(), stdout=subprocess.PIPE) + file, error = ps.communicate() + + F.write(file) + F.close() + +def getMethodsInFile(filePath): + regex = '((public|protected|private|static|\s) +[\w\<\>\[\]]+\s+(\w+) *\([^\)]*\)\s*(\{))' + + methods = [] + with open(filePath, 'r') as javaFile: + content = javaFile.read() + m = re.findall(regex, content) + for method in m: + name = re.search('(\w+) *\([^\)]*\)', method[0]).groups()[0] + params = re.search('\w+ *(\([^\)]*\))', method[0]).groups()[0] + params = re.sub('\s+', ' ', params) + + methodName = name + params + methods.append(methodName) + + return methods + +def parseLine(line): + match = re.search('\w+ *\([^\)]*\)', line) + method = "" + if match is not None: + method = match.group(0) + ct = None + + if re.search('method removed', line) is not None: + ct = "REMOVED" + + if re.search('method added', line) is not None: + ct = "ADDED" + + if re.search('code changed', line) is not None: + ct = "BODY_MODIFIED" + + return method, ct + +def getMethodeChange(SHA, date, filePath, changeType, mainDirectory): + changes = [] + + if changeType == "A": + updateWorkingFile("../actualFile.java", filePath, SHA) + methods = getMethodsInFile("../actualFile.java") + + for method in methods: + change = method + ";" + "ADDED" + changes.append(change) + + if changeType == "D": + updateWorkingFile("../previousFile.java", filePath, SHA + "^") + methods = getMethodsInFile("../previousFile.java") + + for method in methods: + change = method + ";" + "DELETED" + changes.append(change) + + if changeType == "M": + updateWorkingFile("../actualFile.java", filePath, SHA) + updateWorkingFile("../previousFile.java", filePath, SHA + "^") + + diffjCommand = "java -jar ../assets/frameworks/diffj-1.6.3.jar --brief ../previousFile.java ../actualFile.java" + ps = subprocess.Popen(diffjCommand.split(), stdout=subprocess.PIPE) + output, error = ps.communicate() + + diffs = output.split('\n') + + for line in diffs: + method, ct = parseLine(line) + + if ct is not None: + # store change like that, so it is hashable + change = method + ";" + ct + changes.append(change) + + + lines = "" + className = getClassName(filePath, mainDirectory) + for change in set(changes): + lines = lines + SHA + ';' + date + ';' + 'METHOD' + ';' + className + '.' + change + '\n' + + return lines + +''' + creates the changes history file of the repository that is the cwd, + extract history information of files contained only in mainDirectory, + set granularity to "C" to extract information at a file level granularity, + set granularity to "M" to extract information at a methode level granularity. +''' + +def createHistoryFile(mainDirectory, historyFilePath, granularity): F = open(historyFilePath, 'w') F.write('Snapshot;Date;Entity;Code;ChangeType\n') @@ -33,6 +135,8 @@ def createHistoryFile(mainDirectory, historyFilePath): widgets=['writing history file : ' ,progressbar.Percentage()]) bar.start() + options = {"C": getClassChange, "M": getMethodeChange} + for line in commits: commit = line.split('_') SHA = commit[0] @@ -48,27 +152,32 @@ def createHistoryFile(mainDirectory, historyFilePath): for fileChange in output2.split('\n'): if fileChange.split('.')[-1] == 'java': if fileChange.split()[1].startswith(mainDirectory): - methode = getMethodeName(fileChange.split()[1], mainDirectory) + changeType = fileChange.split()[0] + filePath = fileChange.split()[1] - line = SHA + ';' + date + ';' + 'CLASS' + ';' + methode + ';' + fileChange.split()[0] + '\n' - F.write(line) + change = options[granularity](SHA, date, filePath, changeType,mainDirectory) + F.write(change) + + subprocess.call("rm -f ../previousFile.java", shell=True) + subprocess.call("rm -f ../actualFile.java", shell=True) bar.finish() F.close() -#create a csv file containing all the methods contained in the main directory -def createMethodsFile(mainDirectory, methodsFilePath): - F = open(methodsFilePath, 'w') + +#create a csv file containing all the classes contained in the main directory +def createClassesFile(mainDirectory, classesFilePath): + F = open(classesFilePath, 'w') for path,dirs,files in os.walk('./' + mainDirectory): for f in fnmatch.filter(files,'*.java'): - methode = getMethodeName(os.path.join(path,f)[2:], mainDirectory) - F.write(methode + '\n') + className = getClassName(os.path.join(path,f)[2:], mainDirectory) + F.write(className + '\n') F.close() -def extractChangeHistory(repositoryURL, systemName, snapshot, mainDirectory): +def extractChangeHistory(repositoryURL, systemName, snapshot, mainDirectory, granularity = "C"): cloneCommand = 'git clone ' + repositoryURL + ' ' + systemName subprocess.call(cloneCommand, shell=True) @@ -76,10 +185,10 @@ def extractChangeHistory(repositoryURL, systemName, snapshot, mainDirectory): os.chdir(systemName) subprocess.call('git checkout -f '+ snapshot, shell=True) - methodsFile = cwd + '/data/systems_methods/' + systemName + '.csv' + classFile = cwd + '/data/systems_methods/' + systemName + '.csv' historyFile = cwd + '/data/systems_history/' + systemName + '.csv' - createMethodsFile(mainDirectory, methodsFile) - createHistoryFile(mainDirectory, historyFile) + #createClassesFile(mainDirectory, classFile) + createHistoryFile(mainDirectory, historyFile, granularity) subprocess.call('git checkout master', shell=True) os.chdir(cwd) @@ -87,35 +196,6 @@ def extractChangeHistory(repositoryURL, systemName, snapshot, mainDirectory): removeDirCommand = "rm -rf " + systemName subprocess.call(removeDirCommand, shell=True) -def createSmellFile(systemName, systemId, smell): - url = 'http://www.sesa.unisa.it/landfill/GetBadSmells?system=' + str(systemId) +'&type=' + smell - response = urllib2.urlopen(url) - data = ast.literal_eval(json.dumps(json.load(response))) - - fileName = './data/anti-pattern_occurences/'+ smell + '/' + systemName + '.csv' - F = open(fileName, 'w') - - for occurence in data['data']: - methode = occurence['instance'] - F.write(methode + '\n') - - F.close() - -#Not used actually, because there is lots of mistakes in this database. -#Some occurences are not even classes of the corresponding system. -def extractSmellOccurences(): - response = urllib2.urlopen('http://www.sesa.unisa.it/landfill/GetSystems?datasetId=1') - data = ast.literal_eval(json.dumps(json.load(response))) - - for system in data: - systemName = '-'.join(system['name'].lower().split()) - for smell in system['types']: - directoryPath = './data/anti-pattern_occurences/'+ smell['type'] - if not os.path.exists(directoryPath): - os.makedirs(directoryPath) - - createSmellFile(systemName, system['id'], smell['type']) - if __name__ == "__main__": @@ -143,3 +223,4 @@ def extractSmellOccurences(): #extractChangeHistory('https://github.com/apache/pig.git', 'apache-pig', 'a8c680cf28ad4c2ab824c268a3dbe2783667dd94', '') #extractChangeHistory('https://github.com/apache/struts.git', 'apache-struts', '9ad9404bfac2b936e1b5f0f5e828335bc5a51b48', 'core/src/main/') + diff --git a/historical_anti-pattern_detection/model.py b/historical_anti-pattern_detection/model.py index aad6191..c8e8815 100644 --- a/historical_anti-pattern_detection/model.py +++ b/historical_anti-pattern_detection/model.py @@ -14,7 +14,7 @@ def __init__(self, input, n_in, n_out, activation=tf.tanh): class Model(object): - def __init__(self, instances, labels=None, shape=[32,16,8], starter_learning_rate=0.28, beta=0): + def __init__(self, instances, labels=None, shape=[32,16,8], starter_learning_rate=0.26, beta=0): self.instances = instances self.labels = labels diff --git a/historical_anti-pattern_detection/train.py b/historical_anti-pattern_detection/train.py index ca70d25..9b4f265 100644 --- a/historical_anti-pattern_detection/train.py +++ b/historical_anti-pattern_detection/train.py @@ -53,7 +53,7 @@ model = Model(p_x, p_y) # To save and restore a trained model -saver = tf.train.Saver() +#saver = tf.train.Saver() losses_train = [] losses_valid = [] @@ -79,8 +79,8 @@ bestLossStep = step # Save the model - save_path = saver.save(session, "./data/trained_models/model", global_step=num_steps) - print("Model saved in path: %s" % save_path) + #save_path = saver.save(session, "./data/trained_models/model", global_step=num_steps) + #print("Model saved in path: %s" % save_path) # Evaluate the model on the validation set output = session.run(model.inference, feed_dict=feed_dict_valid)