forked from arunbaruah/Anomaly_Detection_Transformer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdetection_model.py
64 lines (54 loc) · 2.18 KB
/
detection_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import sys
sys.path.append('../')
import os
import pandas as pd
import numpy as np
import importlib
import re
from Parsers import Drain
from Parsers import Spell
importlib.reload(Spell)
def parse(log_source, log_file, algorithm, start_date = '', start_time = '', end_date = '', end_time = ''):
"""
Parses log file.
Args:
log_source: The source of the logs (e.g. HDFS, Openstack, Linux).
log_file: The name of the log file.
algorithm: Parsing algorithm: Spell or Drain.
"""
if log_source == 'HDFS':
input_dir = 'Dataset/' + log_source
log_format = '<Date> <Time> <Pid> <Level> <Component>: <Content>'
regex = [
r'blk_(|-)[0-9]+' , # block id
r'(/|)([0-9]+\.){3}[0-9]+(:[0-9]+|)(:|)', # IP
r'(?<=[^A-Za-z0-9])(\-?\+?\d+)(?=[^A-Za-z0-9])|[0-9]+$', # Numbers
]
elif log_source == 'Linux':
print("Parse linux logs")
elif log_source == 'Openstack':
print("Parse Openstack logs")
if algorithm == 'Spell':
output_dir = 'Spell_result/' # The output directory of parsing results
tau = 0.6 # Message type threshold (default: 0.5)
parser = Spell.LogParser(indir=input_dir, outdir=output_dir, log_format=log_format, tau=tau, rex=regex)
parser.parse(log_file)
elif algorithm == 'Drain':
output_dir = 'Drain_result/' # The output directory of parsing results
st = 0.5 # Similarity threshold
depth = 4 # Depth of all leaf nodes
parser = Drain.LogParser(log_format, indir=input_dir, outdir=output_dir, depth=depth, st=st, rex=regex)
parser.parse(log_file)
def backtrace(pred, log_source, algorithm):
"""
Find log templates from sequence of log keys.
Args:
pred: The sequence of log keys.
log_source: The source of the log keys.
algorithm: Parsing algorithm: Spell or Drain.
"""
log_template = pd.read_csv(algorithm + "_result/" + log_source + "_templates.csv")
y = np.squeeze(pred.tolist())
for log in y:
if log == -1: continue
print(log, log_template['EventTemplate'][log-1])