4_ede_clustering_predict_y2.yaml

Connector:
  PREndpoint: 194.102.62.155 #hal720m.sage.ieat.ro
  Dask:
    SchedulerEndpoint: local
    Scale: 3
    SchedulerPort: 8787
    EnforceCheck: False
  MPort: 9200 # Moitoring port
  KafkaEndpoint: 10.9.8.136
  KafkaPort: 9092
  KafkaTopic: edetopic
#  Query: { "query": 'node_disk_written_bytes_total[5m]'}
  Query: {"query": '{__name__=~"node.+"}[1m]'}
  MetricsInterval: "1m" # Metrics datapoint interval definition
  QSize: 0
  Index: time
  QDelay: 10s # Polling period for metrics fetching
#  Local: /Users/Gabriel/Documents/workspaces/Event-Detection-Engine/data/demo_data.csv # Define the path to the local file for training

Mode:
  Training: False
  Validate: False
  Detect: True

Filter:
#  Columns:   # Which columns remain
#    - "col1"
#    - "col2"
#    - "col4"
#  Rows:
#    ld: 145607979
#    gd: 145607979
#  DColumns:  # Which columns to delete
#    - node_boot_time_seconds_10.211.55.101:9100
#    - node_boot_time_seconds_10.211.55.102:9100
#    - node_boot_time_seconds_10.211.55.103:9100
  Fillna: True # fill none values with 0
  Dropna: True # delete columns woth none values
#  LowVariance: True
  DWild:
    Regex: '10.251.0.114' # filter based on wildcard (regex)
    Keep: True
  DColumns:
    Dlist: /Users/Gabriel/Documents/workspaces/Event-Detection-Engine/data/low_variance.yaml


Augmentation:
  Scaler: # if not used set to false
    StandardScaler:   # All scalers from scikitlearn
      copy: True
      with_mean: True
      with_std: True
#  Operations:
#    STD:
#      - cpu_load1:
#          - node_load1_10.211.55.101:9100
#          - node_load1_10.211.55.102:9100
#          - node_load1_10.211.55.103:9100
#      - memory:
#          - node_memory_Active_anon_bytes_10.211.55.101:9100
#          - node_memory_Active_anon_bytes_10.211.55.101:9100
#          - node_memory_Active_anon_bytes_10.211.55.101:9100
#    Mean:
#      - network_flags:
#          - node_network_flags_10.211.55.101:9100
#          - node_network_flags_10.211.55.102:9100
#          - node_network_flags_10.211.55.103:9100
#      - network_out:
#          - node_network_mtu_bytes_10.211.55.101:9100
#          - node_network_mtu_bytes_10.211.55.102:9100
#          - node_network_mtu_bytes_10.211.55.103:9100
#    Median:
#      - memory_file:
#          - node_memory_Active_file_bytes_10.211.55.101:9100
#          - node_memory_Active_file_bytes_10.211.55.102:9100
#          - node_memory_Active_file_bytes_10.211.55.103:9100
#      - memory_buffered:
#          - node_memory_Buffers_bytes_10.211.55.101:9100
#          - node_memory_Buffers_bytes_10.211.55.102:9100
#          - node_memory_Buffers_bytes_10.211.55.103:9100
#    RemoveFiltered: True
#
#    Method: !!python/object/apply:edeuser.user_methods.wrapper_add_columns # user defined operation
#      kwds:
#        columns: !!python/tuple [node_load15_10.211.55.101:9100, node_load15_10.211.55.102:9100]
#        column_name: sum_load15
#  Categorical:
#    - col1
#    - col2
#    OH: True

# Clustering example
Training:
  Type: clustering
  Method: isoforest
  Export: clustering_1
  MethodSettings:
    n_estimators: 10
    max_samples: 10
    contamination: 0.1
    verbose: True
    bootstrap: True

#Detect:   # Method loading has the form <Method>_<Load>.pkl
#  Method: IsolationForest
#  Type: clustering
#  Load: clustering_2
#  Scaler: StandardScaler  # Same as for training

Detect:
  Method: IForest
  Type: clustering
  Load: cluster_y2_v3
  Scaler: StandardScaler
  # Analysis: True # Start Shapely value based analysis
  Analysis: # if plotting of heatmap, summary and feature importance is require, if not set False or use previous example
    Plot: True


Misc:
  heap: 512m
  checkpoint: True
  delay: 10s
  interval: 30m
  resetindex: False
  point: False