-
Notifications
You must be signed in to change notification settings - Fork 32
/
Copy pathAdaBoost.py
183 lines (155 loc) · 6.44 KB
/
AdaBoost.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
"""
@Filename: AdaptiveBoost.py
@Author: Ryuk
@Create Date: 2019-05-03
@Update Date: 2019-05-24
@Description: Implement of Adaptive Boosting
"""
import numpy as np
import preProcess
import pickle
import random
import SVM, KNN, DecisionTree,Logistic, Perceptron
import math
class Adaboost:
def __init__(self, norm_type="Normalization", iterations=5, base_classifier="SVM"):
self.iterations = iterations
self.norm_type = norm_type
self.prediction = None
self.probability = None
self.classifier_set = None
if base_classifier == "SVM":
self.base_classifier = SVM.SVMClassifier()
elif base_classifier == "KNN":
self.base_classifier = KNN.KNNClassifier()
elif base_classifier == "DecisionTree":
self.base_classifier = DecisionTree.DecisionTreeClassifier()
elif base_classifier == "Logistic":
self.base_classifier = Logistic.LogisticRegressionClassifier()
elif base_classifier == "Perceptron":
self.base_classifier = Perceptron.PerceptronClassifier()
'''
Function: baseClassifier
Description: generate weak classifier
Input: train_data dataType: ndarray description: train_data
train_label dataType: ndarray description: train_label
w dataType: ndarray description: weight
Output: clf dataType: object description: weak classifier
weighted_error dataType: float description: weighted error
base_predictions dataType: object description: base predictions
'''
def baseClassifier(self, train_data, train_label, w):
sample_num = len(train_data)
error_index = np.ones([sample_num, 1])
clf = self.base_classifier
clf.train(train_data, train_label)
base_predictions = np.sign(clf.predict(train_data))
for i in range(sample_num):
if base_predictions[i] == train_label[i]:
error_index[i] = 0
weighted_error = np.dot(w.T, error_index)
return clf, weighted_error, base_predictions
'''
Function: updataAlpha
Description: updata alpha
Input: error dataType: float description: weighted error
Output: new_alpha dataType: float description: new alpha
'''
def updateAlpha(self, error):
temp = (1.0 - error)/max(error, 10e-6)
new_alpha = 1/2 * math.log(temp, math.e)
return new_alpha
'''
Function: train
Description: train the model
Input: train_data dataType: ndarray description: features
train_label dataType: ndarray description: labels
Output: clf_set dataType: list description: classifiers set
'''
def train(self, train_data, train_label):
if self.norm_type == "Standardization":
train_data = preProcess.Standardization(train_data)
else:
train_data = preProcess.Normalization(train_data)
train_label = np.expand_dims(train_label, axis=1)
sample_num = len(train_data)
weak_classifier = []
# initialize weights
w = np.ones([sample_num, 1])
w = w/sample_num
# predictions
agg_predicts = np.zeros([sample_num, 1]) # aggregate value of prediction
# start train
for i in range(self.iterations):
base_clf, error, base_prediction = self.baseClassifier(train_data, train_label, w)
alpha = self.updateAlpha(error)
weak_classifier.append((alpha, base_clf))
# update parameters in page of 139 Eq.(8.4)
expon = np.multiply(-1 * alpha * train_label, base_prediction)
w = np.multiply(w, np.exp(expon))
w = w/w.sum()
# calculate the total error rate
agg_predicts += alpha*base_prediction
error_rate = np.multiply(np.sign(agg_predicts) != train_label, np.ones([sample_num, 1]))
error_rate = error_rate.sum()/sample_num
if error_rate == 0:
break
self.classifier_set = weak_classifier
return weak_classifier
'''
Function: predict
Description: predict the testing set
Input: train_data dataType: ndarray description: features
prob dataType: bool description: return probaility of label
Output: prediction dataType: ndarray description: the prediction results for testing set
'''
def predict(self, test_data, prob="False"):
# Normalization
if self.norm_type == "Standardization":
test_data = preProcess.Standardization(test_data)
else:
test_data = preProcess.Normalization(test_data)
test_num = test_data.shape[0]
prediction = np.zeros([test_num, 1])
probability = np.zeros([test_num, 1])
for classifier in self.classifier_set:
alpha = classifier[0]
clf = classifier[1]
base_prediction = alpha * clf.predict(test_data)
probability += base_prediction
self.prediction = np.sign(probability)
self.probability = probability
if prob:
return probability
else:
return prediction
'''
Function: accuracy
Description: show detection result
Input: test_label dataType: ndarray description: labels of test data
Output: accuracy dataType: float description: detection accuarcy
'''
def accuarcy(self, test_label):
test_label = np.expand_dims(test_label, axis=1)
prediction = self.prediction
accuarcy = sum(prediction == test_label)/len(test_label)
return accuarcy
'''
Function: save
Description: save the model as pkl
Input: filename dataType: str description: the path to save model
'''
def save(self, filename):
f = open(filename, 'w')
pickle.dump(self.classifier_set, f)
f.close()
'''
Function: load
Description: load the model
Input: filename dataType: str description: the path to save model
Output: self dataType: obj description: the trained model
'''
def load(self, filename):
f = open(filename)
self.classifier_set = pickle.load(f)
return self