modified notebooks

se4ai2324-uniba · Dec 28, 2023 · 716f519 · 716f519
1 parent 9326a82
commit 716f519
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 116 deletions.
diff --git a/performance_monitoring/outliers_detection.py → performance_monitoring/drift_detection.py b/performance_monitoring/outliers_detection.py → performance_monitoring/drift_detection.py
@@ -21,42 +21,13 @@
 def outlier_detection_retrain():
     data = pd.read_csv(DATA_PATH + "\\urls_with_features_selected.csv")
     api_data = pd.read_csv(DATA_PATH + "\\api_urls.csv")
+
     X = data.loc[:, data.columns != 'type']
     y = data['type']
+
     X_api_data = api_data.loc[:, data.columns != 'type']
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
 
-    detector_name = 'IForest'
-
-    od = IForest(threshold=0.5, n_estimators=100)
-
-    # train
-    od.fit(X_train)
-
-
-    od.infer_threshold(
-        X,
-        threshold_perc=95
-    )
-
-    preds = od.predict(
-        X_api_data,
-        return_instance_score=True
-    )
-
-    y_outlier = api_data['type'].values
-
-    labels = ["normal", "outlier"]
-
-    y_pred = preds['data']['is_outlier']
-
-    f1 = f1 = f1_score(y_outlier, y_pred)
-
-    print('F1 score: {:.4f}'.format(f1))
-
-    cm = confusion_matrix(y_outlier, y_pred)
-    df_cm = pd.DataFrame(cm, index=labels, columns=labels)
-
     """
         Data drift
     """

diff --git a/...rmance_monitoring/outliers_detector.ipynb → ...mance_monitoring/outliers_detection.ipynb b/...rmance_monitoring/outliers_detector.ipynb → ...mance_monitoring/outliers_detection.ipynb
@@ -26,7 +26,6 @@
     "\n",
     "from alibi_detect.od import IForest\n",
     "from alibi_detect.utils.visualize import plot_instance_score, plot_roc\n",
-    "from alibi_detect.cd import KSDrift\n",
     "\n",
     "from sklearn.model_selection import train_test_split"
    ]
@@ -165,96 +164,13 @@
     "plot_instance_score(preds, y_outlier, labels, od.threshold)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "cd = KSDrift(X_train.values, p_val=0.05)\n",
-    "\n",
-    "preds = cd.predict(X_api_data.values, drift_type='batch', return_p_val=True, return_distance=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'data': {'is_drift': 1,\n",
-       "  'distance': array([0.26394078, 0.10457566, 0.53439146, 0.60707897, 0.27317435,\n",
-       "         0.15185855, 0.09701316, 0.35929605, 0.35704604, 0.28070396,\n",
-       "         0.29539803, 0.066625  , 0.7242994 , 0.6914309 , 0.32504606],\n",
-       "        dtype=float32),\n",
-       "  'p_val': array([1.17257595e-01, 9.71364021e-01, 1.34772617e-05, 3.03037723e-07,\n",
-       "         9.60995555e-02, 7.18030095e-01, 9.86106217e-01, 1.05687575e-02,\n",
-       "         1.12897130e-02, 8.12749192e-02, 5.78064322e-02, 9.99959171e-01,\n",
-       "         1.17449175e-10, 1.37775014e-09, 2.74743363e-02], dtype=float32),\n",
-       "  'threshold': 0.0033333333333333335},\n",
-       " 'meta': {'name': 'KSDrift',\n",
-       "  'online': False,\n",
-       "  'data_type': None,\n",
-       "  'version': '0.11.4',\n",
-       "  'detector_type': 'drift'}}"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "preds"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "alert_drift = preds['data']['is_drift']"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Re-training of the models if there is drift"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "if alert_drift == 1:\n",
-    "    import sys\n",
-    "\n",
-    "    # Vai alla directory genitore\n",
-    "    parent_directory = os.path.abspath(os.path.join('..'))\n",
-    "    sys.path.append(parent_directory)\n",
-    "\n",
-    "    from src.features.build_features import split\n",
-    "    from src.models.train_base_model import train_base_rf\n",
-    "    from src.models.train_tuned_model import train_tuned_rf\n",
-    "    \n",
-    "    train_base_rf()\n",
-    "    \n",
-    "    train_tuned_rf()"
+    "## Creation of report file"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
   {
    "cell_type": "code",
    "execution_count": null,