Skip to content

Commit

Permalink
Adding class type into the PR and F1 scores (#7)
Browse files Browse the repository at this point in the history
* added dir for nbconverted

* added labeling to evaluation

* saving shuffled model
  • Loading branch information
axiomcura authored Mar 27, 2024
1 parent cb8cdaf commit 62c6f92
Show file tree
Hide file tree
Showing 6 changed files with 140 additions and 83 deletions.
111 changes: 63 additions & 48 deletions notebooks/2.modeling/2.modeling.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,7 @@
{
"data": {
"text/plain": [
"['/home/erikserrano/Desktop/Cytotoxic-Nuisance-Metadata-Analysis/results/2.modeling/multi_class_model.joblib']"
"['/home/erikserrano/Development/Cytotoxic-Nuisance-Metadata-Analysis/results/2.modeling/multi_class_model.joblib']"
]
},
"execution_count": 5,
Expand All @@ -399,12 +399,37 @@
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"bin_labels = label_binarize(\n",
" np.unique(y_labels.values), classes=[*range(n_classes)]\n",
").tolist()\n",
"labeled_bin = {str(bin_label): idx for idx, bin_label in enumerate(bin_labels)}"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"test_precision_recall_df, test_f1_score_df = evaluate(\n",
" model=best_model, X=X_test, y=y_test, dataset=\"test\", shuffled=False, seed=seed\n",
" model=best_model,\n",
" X=X_test,\n",
" y=y_test,\n",
" mapped_classes=labeled_bin,\n",
" dataset=\"test\",\n",
" shuffled=False,\n",
" seed=seed,\n",
")\n",
"\n",
"train_precision_recall_df, train_f1_score_df = evaluate(\n",
" model=best_model, X=X_train, y=y_train, dataset=\"train\", shuffled=False, seed=seed\n",
" model=best_model,\n",
" X=X_train,\n",
" y=y_train,\n",
" mapped_classes=labeled_bin,\n",
" dataset=\"train\",\n",
" shuffled=False,\n",
" seed=seed,\n",
")"
]
},
Expand All @@ -417,7 +442,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -427,28 +452,47 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 9,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"['/home/erikserrano/Development/Cytotoxic-Nuisance-Metadata-Analysis/results/2.modeling/shuffled_multi_class_model.joblib']"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"shuffled_best_model = train_multiclass(\n",
" shuffled_X_train, y_train, param_grid=param_grid, seed=seed\n",
")"
")\n",
"joblib.dump(shuffled_best_model, modeling_dir / \"shuffled_multi_class_model.joblib\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"shuffle_test_precision_recall_df, shuffle_test_f1_score_df = evaluate(\n",
" model=best_model, X=X_test, y=y_test, dataset=\"test\", shuffled=True, seed=seed\n",
" model=best_model,\n",
" X=X_test,\n",
" y=y_test,\n",
" mapped_classes=labeled_bin,\n",
" dataset=\"test\",\n",
" shuffled=True,\n",
" seed=seed,\n",
")\n",
"shuffle_train_precision_recall_df, shuffle_train_f1_score_df = evaluate(\n",
" model=best_model,\n",
" X=shuffled_X_train,\n",
" y=y_train,\n",
" mapped_classes=labeled_bin,\n",
" dataset=\"train\",\n",
" shuffled=True,\n",
" seed=seed,\n",
Expand All @@ -464,7 +508,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -506,7 +550,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -515,6 +559,7 @@
" model=best_model,\n",
" X=X_plate_holdout,\n",
" y=y_plate_holout,\n",
" mapped_classes=labeled_bin,\n",
" dataset=\"plate_holdout\",\n",
" shuffled=False,\n",
" seed=seed,\n",
Expand All @@ -523,6 +568,7 @@
" model=shuffled_best_model,\n",
" X=X_plate_holdout,\n",
" y=y_plate_holout,\n",
" mapped_classes=labeled_bin,\n",
" dataset=\"plate_holdout\",\n",
" shuffled=True,\n",
" seed=seed,\n",
Expand All @@ -533,6 +579,7 @@
" model=best_model,\n",
" X=X_treatment_holdout,\n",
" y=y_treatment_holout,\n",
" mapped_classes=labeled_bin,\n",
" dataset=\"treatment_holdout\",\n",
" shuffled=False,\n",
" seed=seed,\n",
Expand All @@ -542,6 +589,7 @@
" model=shuffled_best_model,\n",
" X=X_treatment_holdout,\n",
" y=y_treatment_holout,\n",
" mapped_classes=labeled_bin,\n",
" dataset=\"treatment_holdout\",\n",
" shuffled=True,\n",
" seed=seed,\n",
Expand All @@ -553,6 +601,7 @@
" model=best_model,\n",
" X=X_well_holdout,\n",
" y=y_well_holout,\n",
" mapped_classes=labeled_bin,\n",
" dataset=\"well_holdout\",\n",
" shuffled=False,\n",
" seed=seed,\n",
Expand All @@ -561,6 +610,7 @@
" model=shuffled_best_model,\n",
" X=X_well_holdout,\n",
" y=y_well_holout,\n",
" mapped_classes=labeled_bin,\n",
" dataset=\"well_holdout\",\n",
" shuffled=True,\n",
" seed=seed,\n",
Expand All @@ -569,7 +619,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -597,7 +647,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -622,41 +672,6 @@
" modeling_dir / \"precision_recall_scores.csv.gz\", index=False, compression=\"gzip\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
Loading

0 comments on commit 62c6f92

Please sign in to comment.