-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathAndroid_malware_classification.py
109 lines (78 loc) · 3.37 KB
/
Android_malware_classification.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import tensorflow as tf
from keras.layers import Dense, Dropout, LSTM, Embedding, Activation
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.optimizers import SGD, Adam, Nadam
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, cohen_kappa_score
from keras.utils.vis_utils import plot_model
from sklearn.metrics import confusion_matrix
from keras.layers.normalization import BatchNormalization
import keras
import keras_metrics
from keras import backend as K
import time
input_file = 'input_5000.csv'
def load_data(test_split = 0.2):
print ('Loading data...')
df = pd.read_csv(input_file)
df['sequence'] = df['sequence'].apply(lambda x: [int(e) for e in x.split()])
df = df.reindex(np.random.permutation(df.index))
train_size = int(len(df) * (1 - test_split))
X_train = df['sequence'].values[:train_size]
y_train = np.array(df['target'].values[:train_size])
X_test = np.array(df['sequence'].values[train_size:])
y_test = np.array(df['target'].values[train_size:])
return pad_sequences(X_train), y_train, pad_sequences(X_test), y_test
def create_model(input_length):
print ('Creating model...')
model = Sequential()
model.add(Dense(64, input_dim=132, init='uniform'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(32, init='uniform'))
model.add(Activation('tanh'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(1, init='uniform'))
model.add(Activation('sigmoid'))
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
adam = Adam(lr=0.02)
print ('Compiling...')
model.compile(loss='binary_crossentropy',
optimizer=Adam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False),
metrics=['accuracy',keras_metrics.precision(), keras_metrics.recall(),keras_metrics.false_positive(), keras_metrics.false_negative()])
#keras_metrics.false_positive(), keras_metrics.false_negative()
return model
X_train, y_train, X_test, y_test = load_data()
print(X_train.shape)
start_time = time.time()
model = create_model(len(X_train[0]))
print ('Fitting model...')
history = model.fit(X_train, y_train, batch_size=10, nb_epoch=1000, validation_split = 0.1, verbose = 1)
print("---------------%s seconds----------" % (time.time() - start_time))
score, acc, prec, rec, false_positive, false_negative = model.evaluate(X_test, y_test, batch_size=50)
# print('Test score:', score)
# print("----------------------------------------------------------")
print("----------------------------------------------------------")
print("----------------------------------------------------------")
print('Test accuracy:', acc )
print("----------------------------------------------------------")
print("----------------------------------------------------------")
# labels = ['target']
# cm = confusion_matrix(y_test, score, labels)
# print(cm)
# fig = plt.figure()
# ax = fig.add_subplot(111)
# cax = ax.matshow(cm)
# plt.title('Confusion matrix of the classifier')
# fig.colorbar(cax)
# ax.set_xticklabels([''] + labels)
# ax.set_yticklabels([''] + labels)
# plt.xlabel('Predicted')
# plt.ylabel('True')
# plt.show()
# print(false_negative)
model.summary()