-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path4_ede_clustering_predict_y2.yaml
127 lines (117 loc) · 3.69 KB
/
4_ede_clustering_predict_y2.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
Connector:
PREndpoint: 194.102.62.155 #hal720m.sage.ieat.ro
Dask:
SchedulerEndpoint: local
Scale: 3
SchedulerPort: 8787
EnforceCheck: False
MPort: 9200 # Moitoring port
KafkaEndpoint: 10.9.8.136
KafkaPort: 9092
KafkaTopic: edetopic
# Query: { "query": 'node_disk_written_bytes_total[5m]'}
Query: {"query": '{__name__=~"node.+"}[1m]'}
MetricsInterval: "1m" # Metrics datapoint interval definition
QSize: 0
Index: time
QDelay: 10s # Polling period for metrics fetching
# Local: /Users/Gabriel/Documents/workspaces/Event-Detection-Engine/data/demo_data.csv # Define the path to the local file for training
Mode:
Training: False
Validate: False
Detect: True
Filter:
# Columns: # Which columns remain
# - "col1"
# - "col2"
# - "col4"
# Rows:
# ld: 145607979
# gd: 145607979
# DColumns: # Which columns to delete
# - node_boot_time_seconds_10.211.55.101:9100
# - node_boot_time_seconds_10.211.55.102:9100
# - node_boot_time_seconds_10.211.55.103:9100
Fillna: True # fill none values with 0
Dropna: True # delete columns woth none values
# LowVariance: True
DWild:
Regex: '10.251.0.114' # filter based on wildcard (regex)
Keep: True
DColumns:
Dlist: /Users/Gabriel/Documents/workspaces/Event-Detection-Engine/data/low_variance.yaml
Augmentation:
Scaler: # if not used set to false
StandardScaler: # All scalers from scikitlearn
copy: True
with_mean: True
with_std: True
# Operations:
# STD:
# - cpu_load1:
# - node_load1_10.211.55.101:9100
# - node_load1_10.211.55.102:9100
# - node_load1_10.211.55.103:9100
# - memory:
# - node_memory_Active_anon_bytes_10.211.55.101:9100
# - node_memory_Active_anon_bytes_10.211.55.101:9100
# - node_memory_Active_anon_bytes_10.211.55.101:9100
# Mean:
# - network_flags:
# - node_network_flags_10.211.55.101:9100
# - node_network_flags_10.211.55.102:9100
# - node_network_flags_10.211.55.103:9100
# - network_out:
# - node_network_mtu_bytes_10.211.55.101:9100
# - node_network_mtu_bytes_10.211.55.102:9100
# - node_network_mtu_bytes_10.211.55.103:9100
# Median:
# - memory_file:
# - node_memory_Active_file_bytes_10.211.55.101:9100
# - node_memory_Active_file_bytes_10.211.55.102:9100
# - node_memory_Active_file_bytes_10.211.55.103:9100
# - memory_buffered:
# - node_memory_Buffers_bytes_10.211.55.101:9100
# - node_memory_Buffers_bytes_10.211.55.102:9100
# - node_memory_Buffers_bytes_10.211.55.103:9100
# RemoveFiltered: True
#
# Method: !!python/object/apply:edeuser.user_methods.wrapper_add_columns # user defined operation
# kwds:
# columns: !!python/tuple [node_load15_10.211.55.101:9100, node_load15_10.211.55.102:9100]
# column_name: sum_load15
# Categorical:
# - col1
# - col2
# OH: True
# Clustering example
Training:
Type: clustering
Method: isoforest
Export: clustering_1
MethodSettings:
n_estimators: 10
max_samples: 10
contamination: 0.1
verbose: True
bootstrap: True
#Detect: # Method loading has the form <Method>_<Load>.pkl
# Method: IsolationForest
# Type: clustering
# Load: clustering_2
# Scaler: StandardScaler # Same as for training
Detect:
Method: IForest
Type: clustering
Load: cluster_y2_v3
Scaler: StandardScaler
# Analysis: True # Start Shapely value based analysis
Analysis: # if plotting of heatmap, summary and feature importance is require, if not set False or use previous example
Plot: True
Misc:
heap: 512m
checkpoint: True
delay: 10s
interval: 30m
resetindex: False
point: False