7_ede_hpo_y2.yaml

Connector:
#  PREndpoint: 194.102.62.155 #hal720m.sage.ieat.ro
  Dask:
    SchedulerEndpoint: local # if not local add DASK schedueler endpoint
    Scale: 3 # Number of workers if local otherwise ignored
    SchedulerPort: 8787 # This is the default point
    EnforceCheck: False # Irrelevant for local
  MPort: 9200 # Moitoring port
  KafkaEndpoint: 10.9.8.136
  KafkaPort: 9092
  KafkaTopic: edetopic
#  Query: { "query": 'node_disk_written_bytes_total[5m]'} # Query for specific metrics
  Query: {"query": '{__name__=~"node.+"}[1m]'}
  MetricsInterval: "1m" # Metrics datapoint interval definition
  QSize: 0
  Index: time
  QDelay: "10s" # Polling period for metrics fetching
  Local: /Users/Gabriel/Dropbox/Research/ASPIDE/Datasets/ECI Chaos/Distributed Phase 1/finalized/single_node/training/df_anomaly.csv # Define the path to the local file for training

Mode:
  Training: True
  Validate: False
  Detect: False

Filter:
  Fillna: True # fill none values with 0
  Dropna: True # delete columns woth none values


Augmentation:
  Scaler: # if not used set to false
    StandardScaler:   # All scalers from scikitlearn
      copy: True
#      with_mean: True
#      with_std: true


# For HPO methods
# Training:
#   Type: hpo
#   HPOMethod: Random  # random, grid, bayesian, tpot, evol
#   HPOParam:
#     n_iter: 2
#     n_jobs: -1
#     refit: F1_weighted  # if multi metric used, refit should be metric name, mandatory
#     verbose: True
#   Method: randomforest
#   ParamDistribution:
#     n_estimators:
#       - 10
#       - 100
#     max_depth:
#       - 2
#       - 3
#   Target: target
#   Export: hpo_1
#   CV:
#     Type: StratifiedKFold  # user defined all from sklearn
#     Params:
#       n_splits: 5
#       shuffle: True
#       random_state: 5
#   Scorers:
#     Scorer_list:
#       - Scorer:
#           Scorer_name: F1_weighted
#           skScorer: f1_weighted
#       - Scorer:
#           Scorer_name: Jaccard_Index
#           skScorer: jaccard_weighted # changes in scoring sklearn, for multiclass add suffix micro, weighted or sample
#       - Scorer:
#           Scorer_name: AUC
#           skScorer: roc_auc_ovr_weighted
#     User_scorer1: balanced_accuracy_score

Training:
 Type: hpo
 HPOMethod: Evol  # Random, Grid, Bayesian, tpot, Evol
 HPOParam:
   n_jobs: 1 # must be number, not -1 for all
   scoring: f1_weighted
   gene_mutation_prob: 0.20
   gene_crossover_prob: 0.5
   tournament_size: 4
   generations_number: 30
   population_size: 40  # if multi metric used, refit should be metric name, mandatory
   verbose: 4
 Method: randomforest
 ParamDistribution:
   n_estimators:
     - 10
     - 100
   max_depth:
     - 2
     - 3
 Target: target
 Export: hpo_1_y2
 CV:
   Type: StratifiedKFold  # user defined all from sklearn
   Params:
     n_splits: 5
     shuffle: True
     random_state: 5
 Scorers:
   Scorer_list:
     - Scorer:
         Scorer_name: F1_weighted
         skScorer: f1_weighted
     - Scorer:
         Scorer_name: Jaccard_Index
         skScorer: jaccard_weighted # changes in scoring sklearn, for multiclass add suffix micro, weighted or sample
     - Scorer:
         Scorer_name: AUC
         skScorer: roc_auc_ovr_weighted
   User_scorer1: balanced_accuracy_score

Detect:
  Method: RandomForest
  Type: classification
  Load: hpo_1
  Scaler: StandardScaler # Same as for training


Misc:
  heap: 512m
  checkpoint: True
  delay: 15s
  interval: 30m
  resetindex: False
  point: False