-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcalculate.py
142 lines (102 loc) · 4.39 KB
/
calculate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import os
import numpy as np
import pandas as pd
import igraph as ig
from typing import Dict
from scipy.sparse import csr_matrix
dataset_dir = r'C:\Users\MAX\Downloads\ctu-dataset'
def calculate_alpha_centrality(g: ig.Graph, alpha: float = 0.1) -> Dict[str, float]:
"""
Calculate alpha centrality of the graph's nodes.
Alpha centrality is a measure of node importance based on the network structure,
modified by the parameter alpha.
:param g: igraph.Graph object
The graph for which to calculate the alpha centrality.
:param alpha: float, default 0.1
The alpha parameter, which controls the weight of the adjacency matrix.
Should be between 0 and 1.
:return: Dictionary of node alpha centrality scores, where the key is the node name
and the value is the alpha centrality score.
"""
A = csr_matrix(g.get_adjacency().data)
I = csr_matrix(np.identity(len(g.vs)))
# (I - alpha * A) * c = degree_vector
M = I - alpha * A
degree_vector = np.array(g.degree())
centrality = np.linalg.solve(M.toarray(), degree_vector)
return {g.vs[i]['name']: centrality[i] for i in range(len(g.vs))}
def create_graph_from_binetflow(binetflow_file: str) -> ig.Graph:
"""
Create a directed graph from a .binetflow file efficiently.
:param binetflow_file: str
Path to the .binetflow file.
:return: igraph.Graph object
The graph created from the binetflow data.
"""
print("Reading the .binetflow file...")
df = pd.read_csv(binetflow_file, sep=',', header=0)
print("Creating the graph...")
g = ig.Graph(directed=True)
ip_addresses = np.unique(df[['SrcAddr', 'DstAddr']].values.flatten())
print("Adding vertices...")
g.add_vertices(ip_addresses)
print("Filtering edges...")
edges = [(src, dst) for src, dst in zip(df['SrcAddr'], df['DstAddr']) if src != dst]
print("Adding edges...")
g.add_edges(edges)
return g
def calculate_graph_attributes(g: ig.Graph) -> Dict[str, any]:
"""
Calculate various graph attributes.
This function calculates the degree, in-degree, out-degree, closeness centrality,
eigenvector centrality, pagerank, and alpha centrality of the graph.
:param g: igraph.Graph object
The graph to calculate the attributes for.
:return: Dictionary containing graph attributes.
"""
print("Calculating graph parameters...")
attributes = {}
print("Calculating in-degree...")
in_degree = g.indegree()
max_in_degree = max(in_degree) if in_degree else 1
attributes['in_degree'] = [d / max_in_degree for d in in_degree]
print("Calculating out-degree...")
out_degree = g.outdegree()
max_out_degree = max(out_degree) if out_degree else 1
attributes['out_degree'] = [d / max_out_degree for d in out_degree]
print("Calculating closeness...")
attributes['closeness'] = g.closeness()
print("Calculating eigenvector centrality...")
attributes['eigenvector'] = g.eigenvector_centrality()
print("Calculating pagerank...")
attributes['pagerank'] = g.pagerank()
# print("Calculating alpha centrality...")
# attributes['alpha_centrality'] = calculate_alpha_centrality(g, alpha=0.1)
return attributes
def save_results_to_csv(file_name: str, attributes: Dict[str, any]) -> None:
"""
Save the calculated graph attributes to a CSV file.
:param file_name: str
The name of the CSV file to save the results to.
:param attributes: dict
The graph attributes to save.
"""
print("Saving results to CSV...")
results_df = pd.DataFrame(attributes)
results_df.to_csv(file_name, index=False)
print(f"Results saved to {file_name}")
for root, dirs, files in os.walk(dataset_dir):
print(f"Processing directory: {root}")
if not root.endswith("\\11"):
continue
for file in files:
if file.endswith(".binetflow"):
binetflow_file = os.path.join(root, file)
print(f"Processing {binetflow_file}...")
try:
g = create_graph_from_binetflow(binetflow_file)
attributes = calculate_graph_attributes(g)
result_file = os.path.join(root, f"{file}_attributes.csv")
save_results_to_csv(result_file, attributes)
except Exception as e:
print(f"Error processing {binetflow_file}: {e}")