-
Notifications
You must be signed in to change notification settings - Fork 63
/
Copy pathAPT_5_process_flowdroid_outputs.py
207 lines (154 loc) · 7 KB
/
APT_5_process_flowdroid_outputs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
import re
import os
import csv
import sys
import argparse
from tqdm import tqdm
from termcolor import colored
from argparse import RawTextHelpFormatter
PRM_DOMAINS = 'info/prmDomains.txt'
def print_message(message, with_color, color):
if with_color:
print colored(message, color)
else:
print message
def main():
parser = argparse.ArgumentParser(
description="- Script designed for processing the outputs of Flowdroid\n"
"- For each flowdroid log, a matrix counting all flows is obtained."
"- For a set of flowdroid logs, a matrix counting all flows for"
" all apps is also obtained."
"- Outputs are generated in CSV files. \n\n",
formatter_class=RawTextHelpFormatter)
parser.add_argument('-s', '--source', help='Source directory for FlowDroid logs', required=True)
parser.add_argument('-o', '--output_folder', help='Output directory for individual processed apks', required=False)
parser.add_argument('-og', '--output_global_csv', help='Output directory for global matrix', required=False)
if len(sys.argv) == 1:
parser.print_help()
sys.exit(1)
args = parser.parse_args()
process_flowdroid_outputs(flowdroid_analyses_folder=args.source_directory,
output_folder_individual_csv=args.output_folder,
output_csv_file=args.output_global_csv)
def find_index(lis, stri):
for i, s in enumerate(lis):
if stri in s:
return i
return -1
def load_categories(f):
with open(f, 'r') as fp:
content = fp.readlines()
content = [x.strip() for x in content]
cat_dic = {}
cat_lis = set()
for line in content:
if line == "":
continue
if line.split(' % ')[1].strip() in cat_dic:
cat_dic[line.split(' % ')[1].strip()].append(line.split(' % ')[0].strip())
else:
cat_dic[line.split(' % ')[1].strip()] = [line.split(' % ')[0].strip()]
cat_lis.add(line.split(' % ')[0].strip())
cat_lis.add('NOT_EXISTING')
return cat_dic, list(cat_lis)
def fill_matrix_flows(mat, cat_list, cat_map, flows):
for key, value_list in flows.iteritems():
key_cat = get_category(key, cat_map)
key_indexes = [cat_list.index(x) for x in key_cat]
value_indexes = []
for value in value_list:
value_cat = get_category(value, cat_map)
value_indexes.extend([cat_list.index(x) for x in value_cat])
for key_i in key_indexes:
for value_i in value_indexes:
mat[key_i][value_i] += 1
return mat
def combine_matrices(matrices, categories_list, output_combined_file_name):
"""
Generates a csv file with all samples as rows and columns as the combination of all possible categories
:param matrices: a dictionary with keys as apk names (hashes) and values as the 2d matrix
:param categories_list: list of categories from prmDomains
:param output_combined_file_name: name of the file to save the combined csv
"""
# Permutations with repetition of all categories
categories_combined_list = [[x + "-" + y for y in categories_list] for x in categories_list]
# Flattening of categories combinations list
categories_combined_list = [x for row in categories_combined_list for x in row]
# Flattening of each app 2d matrix
vectors = []
for index, apk_key in enumerate(matrices.keys()):
matrix = matrices[apk_key]
vectors.append([apk_key] + [x for row in matrix for x in row])
# Writing the whole matrix to a csv file
with open(output_combined_file_name, 'wb') as myfile:
wr = csv.writer(myfile)
wr.writerow(["apk"] + categories_combined_list)
for row in vectors:
wr.writerow(row)
def get_category(k, c_map):
if k in c_map:
return c_map[k] # return a list
else:
return ['NOT_EXISTING'] # when is not in our list, custom calls
def save_as_csv(path, dic, headers):
with open(path, 'wb') as csvfile:
writer = csv.writer(csvfile)
tmp_lis = headers[:]
tmp_lis.insert(0, 'Sources\Sinks')
writer.writerow(tmp_lis)
for ind in xrange(len(headers)):
tmp_lis = dic[ind][:]
tmp_lis.insert(0, headers[ind])
writer.writerow(tmp_lis)
def process_flowdroid_outputs(flowdroid_analyses_folder, output_folder_individual_csv, output_csv_file, with_color=True):
if not os.path.exists(output_folder_individual_csv):
os.makedirs(output_folder_individual_csv)
categories_map, categories_list = load_categories(PRM_DOMAINS)
flowdroid_analysis_files = []
for path, subdirs, files in os.walk(flowdroid_analyses_folder):
for name in files:
flowdroid_analysis_files.append(os.path.join(path, name))
matrices = {}
for flow_file in tqdm(flowdroid_analysis_files):
output_individual_name = os.path.join(output_folder_individual_csv,
os.path.basename(flow_file).replace(".json", "") + ".csv")
if os.path.isfile(output_individual_name):
continue
# apk_id = flow_file.split("/")[-1]
with open(flow_file, 'r') as fp:
content = fp.readlines()
ind = find_index(content, 'Found a flow to sink')
flow_content = content[ind:-2]
flow_content = [x.strip() for x in flow_content]
dic = {}
while len(flow_content) > 0:
ind1 = find_index(flow_content, 'Found a flow to sink')
tmp_key = flow_content[0]
flow_content.pop(ind1)
ind2 = find_index(flow_content, 'Found a flow to sink')
if ind2 == -1:
tmp_value = flow_content[ind1:]
flow_content = []
else:
tmp_value = flow_content[ind1:ind2]
flow_content = flow_content[ind2:]
dic[tmp_key] = tmp_value
dic_new = {}
for key, value_list in dic.iteritems():
if re.search('(<.*?>)', key).group(1) in dic_new:
for value in value_list:
dic_new[re.search('(<.*?>)', key).group(1)].append(re.search('(<.*?>)', value).group(1))
else:
dic_new[re.search('(<.*?>)', key).group(1)] = []
for value in value_list:
dic_new[re.search('(<.*?>)', key).group(1)].append(re.search('(<.*?>)', value).group(1))
w, h = len(categories_list), len(categories_list)
matrix = [[0 for _ in xrange(w)] for _ in xrange(h)]
matrix = fill_matrix_flows(matrix, categories_list, categories_map, dic_new)
save_as_csv(output_individual_name, matrix, categories_list)
matrices[flow_file] = matrix
combine_matrices(matrices, categories_list, output_csv_file)
print_message('Success!!', with_color, 'green')
print_message('Output folder: ' + str(output_folder_individual_csv), with_color, 'blue')
if __name__ == '__main__':
main()