-
Notifications
You must be signed in to change notification settings - Fork 0
/
naiveBayesianHelper.py
67 lines (49 loc) · 2.47 KB
/
naiveBayesianHelper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import probabilityCalculation as pc
import driverFile as df
import readingData as rd
import pickle
def do_training(directory, mname,pname, path, splitParam):
true_labels = rd.load_label(directory['paths'][1])
allLabels = len(list(set(true_labels)))
total_images = len(true_labels)
print("===========================")
print("Reading data")
dataWithLabel, updatedLabels = df.createDataWithLabel('training',directory['paths'][0], true_labels, total_images, directory['featuretuple'], directory['dimensions'][1], directory['dimensions'][0], splitParam)
total_images = len(updatedLabels)
print("===========================")
print("Initializing training")
trainingDict = pc.training_Bayesian(dataWithLabel,allLabels, directory['featuretuple'], directory['dimensions'][1], directory['dimensions'][0])
prior_prob = {}
for each_label in range(allLabels):
val, tot = pc.calculatePrior(directory['paths'][1], each_label)
prior_prob[each_label] = (val, tot)
predicted_value = pc.posteriorProbability(dataWithLabel, allLabels, prior_prob, trainingDict)
diff = 0
for i in range(total_images):
if(int(updatedLabels[i]) != predicted_value[i]):
diff = diff + 1
accuracy = 100-((diff*100)/total_images)
print("Training done with accuracy ",accuracy)
with open(path+mname,'wb') as file:
pickle.dump(trainingDict, file)
with open(path+pname,'wb') as file:
pickle.dump(prior_prob, file)
def do_testing(mname,pname, directory, splitParam):
learnedLikelihood = pickle.load(open(mname,"rb"))
priorProb = pickle.load(open(pname,"rb"))
true_labels = rd.load_label(directory['paths'][3])
allLabels = len(list(set(true_labels)))
total_images = len(true_labels)
print("===========================")
print("Reading data")
processedData = df.createDataWithLabel('testing',directory['paths'][2], true_labels, total_images, directory['featuretuple'], directory['dimensions'][1], directory['dimensions'][0],splitParam)
print("===========================")
print("Initializing testing")
predicted_value = pc.posteriorProbability(processedData, allLabels, priorProb, learnedLikelihood)
diff = 0
for i in range(len(true_labels)):
if(int(true_labels[i]) != predicted_value[i]):
diff = diff + 1
accuracy = 100-((diff*100)/total_images)
print("Total error in testing data: ", diff)
print("Accuracy: ",accuracy)