Skip to content

Commit

Permalink
removed files and reformat the model to be more reusable
Browse files Browse the repository at this point in the history
  • Loading branch information
antoineBarbez committed Jan 30, 2018
1 parent e392df5 commit f26320a
Show file tree
Hide file tree
Showing 13 changed files with 388 additions and 307 deletions.
Binary file modified .DS_Store
Binary file not shown.
Binary file modified historical_anti-pattern_detection/.DS_Store
Binary file not shown.
149 changes: 0 additions & 149 deletions historical_anti-pattern_detection/MLP.py

This file was deleted.

203 changes: 190 additions & 13 deletions historical_anti-pattern_detection/NN.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,33 +6,35 @@
import sys
import reader
import math
import os

os.environ['TF_CPP_MIN_LOG_LEVEL']='2'

import transformData as td
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np

starter_learning_rate = 0.30
beta = 0.0
layers = [32,16,8]

#starter_learning_rate = 0.30
#beta = 0.0
#layers = [32,16,8]

def layer(x, n_in, n_out):
low = -4*np.sqrt(6.0/(n_in + n_out))
high = 4*np.sqrt(6.0/(n_in + n_out))
w = tf.Variable(tf.random_uniform(shape=[n_in, n_out], minval=low, maxval=high, dtype=tf.float32))
#w = tf.Variable(tf.truncated_normal(shape=[n_in, n_out]))
#w = tf.Variable(tf.random_uniform(shape=[n_in, n_out], minval=low, maxval=high, dtype=tf.float32))
w = tf.Variable(tf.truncated_normal(shape=[n_in, n_out]))
b = tf.Variable(tf.zeros([n_out]))
return w, tf.matmul(x,w) + b, tf.sigmoid(tf.matmul(x,w) + b)


''' Differentiable approximation of the f-mesure performed by the model on a given dataset.
Based on the article of Martin Jansche "Maximum Expected F-Measure Training of Logistic Regression Models"
true_positive sum(sigmoid(gamma*logits)) for label = +1
detected sum(sigmoid(gamma*logits))
true_positive -> sum(sigmoid(gamma*logits)) for label = +1
detected -> sum(sigmoid(gamma*logits))
for gamma -> +
for gamma -> + infinity
'''
def f_mesure_approx(logits, labels, gamma):
param = tf.constant([1,0], tf.float32,shape=[1,2])
Expand All @@ -44,10 +46,10 @@ def f_mesure_approx(logits, labels, gamma):


def evaluate_model(logits, labels):
true_positive = tf.cast(tf.equal(tf.argmax(logits,1) + tf.argmax(y_,1), 0), tf.float32)
positive = tf.cast(tf.equal(tf.argmax(y_,1), 0), tf.float32)
true_positive = tf.cast(tf.equal(tf.argmax(logits,1) + tf.argmax(labels,1), 0), tf.float32)
positive = tf.cast(tf.equal(tf.argmax(labels,1), 0), tf.float32)
detected = tf.cast(tf.equal(tf.argmax(logits,1), 0), tf.float32)
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(y_,1))
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels,1))

precision = tf.reduce_sum(true_positive)/tf.reduce_sum(detected)
recall = tf.reduce_sum(true_positive)/tf.reduce_sum(positive)
Expand All @@ -56,6 +58,102 @@ def evaluate_model(logits, labels):

return precision, recall, f_mesure, accuracy

def test_model(x_train, y_train, x_valid, y_valid, layers, starter_learning_rate, beta, name = "yo", i=0):
graph = tf.Graph()
with graph.as_default():

input_size = len(x_train[0]) #4
output_size = len(y_train[0]) #2
layers.append(output_size)

# Input data. For the training data, we use a placeholder that will be fed
# at run time with a training minibatch.
x = tf.placeholder(tf.float32,[None,input_size])
y_ = tf.placeholder(tf.float32,[None,output_size])


# Construct model
h = x
logits = x
regularizers = 0
n_in = input_size
for size in layers:
w, logits, h = layer(h, n_in, size)
regularizers = regularizers + tf.nn.l2_loss(w)
n_in = size

# Loss function with L2 Regularization
loss = 1 - f_mesure_approx(logits, y_, 2)
loss = tf.reduce_mean(loss + beta * regularizers)


# Learning mecanism
global_step = tf.Variable(0, trainable=False)
learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
500, 0.9, staircase=True)
# Passing global_step to minimize() will increment it at each step.
learning_step = (
tf.train.GradientDescentOptimizer(learning_rate)
.minimize(loss, global_step=global_step)
)

# Predictions for the training
train_prediction = tf.nn.softmax(logits)

precision, recall, f_mesure, accuracy = evaluate_model(logits, y_)



num_steps = 2000
losses = []
losses1 = []
fm = []
lrates = []
bestLossStep = 0
bestLoss = 100
bestFMStep = 0
bestFM = 0

with tf.Session(graph=graph) as session:
session.run(tf.global_variables_initializer())
#print("Initialized")

for step in range(num_steps):
batch_data, batch_labels = td.shuffle(x_train, y_train)
feed_dict = {x: batch_data, y_: batch_labels}

session.run(learning_step, feed_dict=feed_dict)
l1 = session.run(loss, feed_dict={x:x_train, y_:y_train})
l, f, lr = session.run([loss, f_mesure, learning_rate], feed_dict={x:x_valid, y_:y_valid})
losses1.append(l1)
losses.append(l)
fm.append(f)
lrates.append(lr)

#print(step)
if l < bestLoss:
bestLoss = l
bestLossStep = step

if f > bestFM:
bestFM = f
bestFMStep = step

precision, recall, f_mesure, accuracy = session.run([precision, recall, f_mesure, accuracy], feed_dict={x: x_valid, y_: y_valid})
F = open("results.txt", 'a')
F.write(str(name) + ":"+str(f_mesure)+ " "+str(bestFM)+"\n")
F.close()

plt.figure(i)
plt.plot(range(num_steps), losses, range(num_steps), losses1, range(num_steps), fm, range(num_steps), lrates)
plt.savefig("./data/fig/"+name+".png")






'''
graph = tf.Graph()
with graph.as_default():
Expand Down Expand Up @@ -160,4 +258,83 @@ def evaluate_model(logits, labels):
print('Best f-mesure :',bestFM,' at step :',bestFMStep)
plt.plot(range(num_steps), losses,range(num_steps), fm, range(num_steps), lrates)
plt.show()
plt.show()'''

if __name__ == "__main__":
instances , labels = reader.constructDataset2()
dataset_x , dataset_y = td.shuffle(instances , labels)
dataset_x = td.standardizeData(dataset_x)

validSet_start_idx = int(math.ceil(len(dataset_x)*0.7))

x_train = dataset_x[:validSet_start_idx,:]
y_train = dataset_y[:validSet_start_idx,:]
x_valid = dataset_x[validSet_start_idx:,:]
y_valid = dataset_y[validSet_start_idx:,:]

nonZeroTotal = np.nonzero(dataset_y[:,0])[0].size
nonZeroTrain = np.nonzero(y_train[:,0])[0].size
nonZeroValid = np.nonzero(y_valid[:,0])[0].size

print('nonzero :', nonZeroTotal, nonZeroTrain, nonZeroValid)

starter_learning_rate = 0.26
beta = 0.0
layers = [32,16,8]

test_model(x_train, y_train, x_valid, y_valid, layers, starter_learning_rate, beta)

'''starter_learning_rate = 0.22
beta = 0.0
layers = [32,16,8]
systems = [
'android-frameworks-opt-telephony',
'android-frameworks-sdk',
'android-frameworks-tool-base',
'android-platform-support',
'apache-ant',
'apache-cassandra',
'apache-commons-codec',
'apache-commons-io',
'apache-commons-lang',
'apache-commons-logging',
'apache-derby',
'apache-ivy',
'apache-karaf',
'apache-log4j',
'apache-pig',
'apache-struts',
'apache-tomcat',
'eclipse-jdt-core',
'google-guava',
'jedit',
'mongodb'
]
for i in range(len(systems)):
sys = list(systems)
sys.pop(i)
for j in range(3):
print(i,"-",j)
x_valid, y_valid = reader.system2Data(systems[i])
x_train = np.empty(shape=[0,4])
y_train = np.empty(shape=[0,2])
for system in sys:
systemInstances , systemLabels = reader.system2Data(system)
x_train = np.concatenate((x_train, systemInstances), axis=0)
y_train = np.concatenate((y_train, systemLabels), axis=0)
idx = len(x_train)
x_train = np.concatenate((x_train, x_valid), axis=0)
x_train = td.standardizeData(x_train)
x_valid = x_train[idx:,:]
x_train = x_train[:idx,:]
test_model(x_train, y_train, x_valid, y_valid, layers, starter_learning_rate, beta, systems[i], i)'''
Binary file modified historical_anti-pattern_detection/data/.DS_Store
Binary file not shown.
Loading

0 comments on commit f26320a

Please sign in to comment.