-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy path[Extract]API_process.py
63 lines (53 loc) · 1.58 KB
/
[Extract]API_process.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import numpy as np
import pandas as pd
import subprocess
import os, sys
import time
import pefile
import dill
import matplotlib.pyplot as plt
from collections import defaultdict
from collections import Counter
data_dir = "./TrainSet" # Trainset dir
api_txt = "./api_list.txt"
api_list = []
api = open(api_txt, 'r')
while(True):
line = api.readline()
if not line: break
api_list.append(line.strip("\n"))
filelist = os.listdir(data_dir)
get_api = defaultdict(lambda: defaultdict)
start = time.time()
for index, i in enumerate(filelist):
print(index, i)
api_dict = defaultdict(lambda: 0)
try:
pe = pefile.PE(data_dir+"/"+i)
pe.parse_data_directories()
#record every file's API list
try:
for entry in pe.DIRECTORY_ENTRY_IMPORT:
for imp in entry.imports:
try:
api_name = imp.name.decode("utf-8")
if api_name in api_list:
api_dict[api_name] = 1
except:
pass
except:
api_dict = defaultdict(lambda: -1)
except:
api_dict = defaultdict(lambda: -1)
get_api[i] = api_dict
end = time.time()
print("Get API processing {}s".format(end-start))
api_list.insert(0, "filename")
api_df = pd.DataFrame(columns=api_list)
print("Dataframe processing")
for i in get_api:
get_api[i]["filename"] = i
for index, i in enumerate(get_api):
api_df = api_df.append(get_api[i], ignore_index=True)
api_df = api_df.fillna(0)
api_df.to_csv("API_information.csv", index=False)