Skip to content

Commit

Permalink
modified notebooks
Browse files Browse the repository at this point in the history
  • Loading branch information
simonegramegna committed Dec 28, 2023
1 parent 9326a82 commit 716f519
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 116 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,42 +21,13 @@
def outlier_detection_retrain():
data = pd.read_csv(DATA_PATH + "\\urls_with_features_selected.csv")
api_data = pd.read_csv(DATA_PATH + "\\api_urls.csv")

X = data.loc[:, data.columns != 'type']
y = data['type']

X_api_data = api_data.loc[:, data.columns != 'type']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

detector_name = 'IForest'

od = IForest(threshold=0.5, n_estimators=100)

# train
od.fit(X_train)


od.infer_threshold(
X,
threshold_perc=95
)

preds = od.predict(
X_api_data,
return_instance_score=True
)

y_outlier = api_data['type'].values

labels = ["normal", "outlier"]

y_pred = preds['data']['is_outlier']

f1 = f1 = f1_score(y_outlier, y_pred)

print('F1 score: {:.4f}'.format(f1))

cm = confusion_matrix(y_outlier, y_pred)
df_cm = pd.DataFrame(cm, index=labels, columns=labels)

"""
Data drift
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
"\n",
"from alibi_detect.od import IForest\n",
"from alibi_detect.utils.visualize import plot_instance_score, plot_roc\n",
"from alibi_detect.cd import KSDrift\n",
"\n",
"from sklearn.model_selection import train_test_split"
]
Expand Down Expand Up @@ -165,96 +164,13 @@
"plot_instance_score(preds, y_outlier, labels, od.threshold)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"cd = KSDrift(X_train.values, p_val=0.05)\n",
"\n",
"preds = cd.predict(X_api_data.values, drift_type='batch', return_p_val=True, return_distance=True)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'data': {'is_drift': 1,\n",
" 'distance': array([0.26394078, 0.10457566, 0.53439146, 0.60707897, 0.27317435,\n",
" 0.15185855, 0.09701316, 0.35929605, 0.35704604, 0.28070396,\n",
" 0.29539803, 0.066625 , 0.7242994 , 0.6914309 , 0.32504606],\n",
" dtype=float32),\n",
" 'p_val': array([1.17257595e-01, 9.71364021e-01, 1.34772617e-05, 3.03037723e-07,\n",
" 9.60995555e-02, 7.18030095e-01, 9.86106217e-01, 1.05687575e-02,\n",
" 1.12897130e-02, 8.12749192e-02, 5.78064322e-02, 9.99959171e-01,\n",
" 1.17449175e-10, 1.37775014e-09, 2.74743363e-02], dtype=float32),\n",
" 'threshold': 0.0033333333333333335},\n",
" 'meta': {'name': 'KSDrift',\n",
" 'online': False,\n",
" 'data_type': None,\n",
" 'version': '0.11.4',\n",
" 'detector_type': 'drift'}}"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"preds"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"alert_drift = preds['data']['is_drift']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Re-training of the models if there is drift"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"if alert_drift == 1:\n",
" import sys\n",
"\n",
" # Vai alla directory genitore\n",
" parent_directory = os.path.abspath(os.path.join('..'))\n",
" sys.path.append(parent_directory)\n",
"\n",
" from src.features.build_features import split\n",
" from src.models.train_base_model import train_base_rf\n",
" from src.models.train_tuned_model import train_tuned_rf\n",
" \n",
" train_base_rf()\n",
" \n",
" train_tuned_rf()"
"## Creation of report file"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
Expand Down

0 comments on commit 716f519

Please sign in to comment.