2_ede_clustering_y2.yaml

Connector:
  #  PREndpoint: 194.102.62.155
  Dask:
    SchedulerEndpoint: local
    Scale: 3
    SchedulerPort: 8787
    EnforceCheck: False
  MPort: 9200 # Moitoring port
  KafkaEndpoint: 10.9.8.136
  KafkaPort: 9092
  KafkaTopic: edetopic
#  Query: { "query": 'node_disk_written_bytes_total[5m]'}
  Query: {"query": '{__name__=~"node.+"}[1m]'}
  MetricsInterval: "1m" # Metrics datapoint interval definition
  QSize: 0
  Index: time
  QDelay: 10s # Polling period for metrics fetching
  Local: /Users/Gabriel/Dropbox/Research/ASPIDE/Datasets/ECI Chaos/Distributed Phase 1/finalized/single_node/training/df_anomaly.csv # Define the path to the local file for training

Mode:
  Training: True
  Validate: False
  Detect: False

Filter:
  DColumns:  # Which columns to delete
    - target
  Fillna: True # fill none values with 0
  Dropna: True # delete columns woth none values

Augmentation:
  Scaler: # if not used set to false
    StandardScaler:   # All scalers from scikitlearn
      copy: True
      # with_mean: True
      # with_std: True
  

# Clustering example
Training:
  Type: clustering
  Method: isoforest
  Export: clustering_1
  MethodSettings:
    n_estimators: 10
    max_samples: 10
    contamination: 0.1
    verbose: True
    bootstrap: True

Detect:
  Method: isoforest
  Type: clustering
  Load: clustering_1
  Scaler: StandardScaler  # Same as for training


Misc:
  heap: 512m
  checkpoint: True
  delay: 10s
  interval: 30m
  resetindex: False
  point: False