-
Notifications
You must be signed in to change notification settings - Fork 25
/
Copy pathanalyzer.py
128 lines (105 loc) · 4.41 KB
/
analyzer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import plotly.express as px
from pathlib import Path
import pandas as pd
from config import engine
from helper import log, isTableExists
COLOR_CONTINUOUS_SCALE = ["#024E1B", "#006B3E", "#FFE733", "#FFAA1C", "#FF8C01", "#ED2938"]
OUTPUT_PATH = Path("Output")
def load_data():
"""
Loads the results from the DB.
"""
df_results = pd.read_sql_query("""
WITH TNR AS (
SELECT "WAF_Name",
SUM(CASE WHEN "isBlocked" = 0 THEN 1.0 ELSE 0.0 END) / count(*) * 100 AS true_negative_rate
FROM waf_comparison
WHERE response_status_code != 0 and "DataSetType" = 'Legitimate'
GROUP BY "WAF_Name"
),
TPR AS (
SELECT "WAF_Name",
SUM(CASE WHEN "isBlocked" = 1 THEN 1.0 ELSE 0.0 END) / count(*) * 100 AS true_positive_rate
FROM waf_comparison
WHERE response_status_code != 0 and "DataSetType" = 'Malicious'
GROUP BY "WAF_Name"
)
SELECT TPR."WAF_Name",
ROUND(100-TNR.true_negative_rate, 3) AS false_positive_rate,
ROUND(100-TPR.true_positive_rate, 3) AS false_negative_rate,
ROUND(TPR.true_positive_rate, 3) AS true_positive_rate,
ROUND(TNR.true_negative_rate, 3) AS true_negative_rate,
ROUND((TPR.true_positive_rate + TNR.true_negative_rate)/2, 3) AS balanced_accuracy
FROM TPR
JOIN TNR on TPR."WAF_Name" = TNR."WAF_Name"
ORDER BY balanced_accuracy DESC
""", engine)
_dff = df_results.rename({
"WAF_Name": "WAF Name",
"false_positive_rate": "False Positive Rate",
"false_negative_rate": "False Negative rate",
"true_positive_rate": "True Positive Rate",
"true_negative_rate": "True Negative Rate",
"balanced_accuracy": "Balanced Accuracy",
}, axis=1).copy()
return _dff
def create_graph(_df, metric, is_ascending):
"""
Creates a plotly html graph and saves it in the Output directory while also printing the results to the console.
"""
_df_sorted = _df.sort_values(metric, ascending=is_ascending).copy()
fig = px.bar(
_df_sorted,
x=metric,
y="WAF Name",
color=metric,
title=metric + " chart",
text=metric,
color_continuous_scale=COLOR_CONTINUOUS_SCALE[::-1] if is_ascending else COLOR_CONTINUOUS_SCALE,
template='plotly',
orientation='h',
).update_layout(title_x=0.5, font=dict(size=18))
# Plotly sort visualization is opposite to pandas sort.
_df_sorted = _df_sorted[::-1]
_df_sorted['Position'] = range(1, len(_df_sorted) + 1)
print(f'\n\n{metric}:\n')
print(_df_sorted[['Position', 'WAF Name', metric]].to_string(index=False))
fig.write_html(OUTPUT_PATH / f"{metric}.html")
def create_2d_graph(_df):
"""
Creates 2d graph plotly graph visualizing the True Negative Rate with the True Positive Rate.
"""
fig = px.scatter(
_df,
x='True Negative Rate',
y='True Positive Rate',
labels={
"True Negative Rate": "Detection Quality (True Negative Rate)",
"True Positive Rate": "Security Quality (True Positive Rate)"
},
color='Balanced Accuracy',
title="WAF Comparison Project - Security & Detection Quality",
text='WAF Name',
template='plotly',
color_continuous_scale=COLOR_CONTINUOUS_SCALE[::-1],
).update_layout(title_x=0.5, font=dict(size=16))
fig.update_traces(textposition="bottom center")
fig.write_html(OUTPUT_PATH / "2d Graph True Negative Rate & True Positive Rate.html")
def analyze_results():
# Check if table exits.
if not isTableExists('waf_comparison'):
log.warning("Table waf_comparison doesn't exists in the DB, The analyzer was called before the runner.")
log.warning("Please fill WAFS_DICT configuration in the config.py file and run the script again.")
return
# Create the output directory
OUTPUT_PATH.mkdir(exist_ok=True)
_dff = load_data()
create_graph(_dff, metric='False Positive Rate', is_ascending=False)
create_graph(_dff, metric='False Negative rate', is_ascending=False)
create_graph(_dff, metric='True Positive Rate', is_ascending=True)
create_graph(_dff, metric='True Negative Rate', is_ascending=True)
create_graph(_dff, metric='Balanced Accuracy', is_ascending=True)
create_2d_graph(_dff)
log.info("Graph visualization saved into Output directory.")
if __name__ == '__main__':
analyze_results()