Skip to content

Commit

Permalink
added updated models
Browse files Browse the repository at this point in the history
  • Loading branch information
simonegramegna committed Jan 5, 2024
1 parent 76a5ecd commit f1cf8a2
Show file tree
Hide file tree
Showing 34 changed files with 127 additions and 56 deletions.
48 changes: 19 additions & 29 deletions dvc.lock
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ stages:
size: 3165487
- path: src/data/make_dataset.py
hash: md5
md5: 60206638c903696890c2776593751058
size: 1109
md5: da6d3d579738ff659f03e8220c3f0246
size: 1377
outs:
- path: data/urls_with_features_selected.csv
hash: md5
Expand All @@ -25,8 +25,8 @@ stages:
size: 730854
- path: src/features/build_features.py
hash: md5
md5: 143cec654560aca16bb5ac766c580a59
size: 1714
md5: a7874e52c6739bd5b9d230fc9df0a4a0
size: 2069
outs:
- path: data/test.csv
hash: md5
Expand All @@ -46,17 +46,12 @@ stages:
size: 2565327
- path: src/models/train_base_model.py
hash: md5
md5: 9663eeef47f9cd136bd79cb03054fe49
size: 1471
md5: be582060012939ccfac0735a6dd732ae
size: 1686
- path: src/models/utils.py
hash: md5
md5: 86c9055fb54ac2dc222ef4c4512d8b0e
size: 1718
outs:
- path: models/base_rf_model.pkl
hash: md5
md5: b9d39c597fcea9393ec7777fd017daf9
size: 34199068
md5: f391ab9e1d87bd85759f26698cc86dbd
size: 1929
train_tuned_model:
cmd:
- python src/models/train_tuned_model.py
Expand All @@ -67,17 +62,12 @@ stages:
size: 2565327
- path: src/models/train_tuned_model.py
hash: md5
md5: 0e2dd488e85a330227cd3e8c1eb7a939
size: 1797
md5: 3cf8bbaf1de889a0680f33b47f13bf81
size: 1993
- path: src/models/utils.py
hash: md5
md5: 86c9055fb54ac2dc222ef4c4512d8b0e
size: 1718
outs:
- path: models/tuned_rf_model.pkl
hash: md5
md5: 08d9d5e9e4e10d84f1eabdf1916300e6
size: 781192562
md5: f391ab9e1d87bd85759f26698cc86dbd
size: 1929
predict:
cmd:
- python src/models/predict.py
Expand All @@ -92,30 +82,30 @@ stages:
size: 34199068
- path: models/tuned_rf_model.pkl
hash: md5
md5: 08d9d5e9e4e10d84f1eabdf1916300e6
size: 781192562
md5: 6d240e274219304618af4a238eb03b39
size: 781507762
- path: src/models/predict.py
hash: md5
md5: 422b5f22679fcc6ce1e308524a508457
size: 4037
- path: src/models/utils.py
hash: md5
md5: 86c9055fb54ac2dc222ef4c4512d8b0e
size: 1718
md5: f391ab9e1d87bd85759f26698cc86dbd
size: 1929
outs:
- path: reports/classification_report_base_rf.json
hash: md5
md5: 163d82e336564ec3b0a2a4d5a4b5abf4
size: 687
- path: reports/classification_report_tuned_rf.json
hash: md5
md5: ce564e7b231a3df65ff427eb5dba187b
size: 689
md5: 658c8edb7ab4641ffdcc31a336a487fe
size: 693
- path: reports/confusion_matrix_base_rf.json
hash: md5
md5: 5bd49e3e36e33ae371bc1b67744c363c
size: 155
- path: reports/confusion_matrix_tuned_rf.json
hash: md5
md5: c7d479cd4f59d4c4af79637c0daea80a
md5: 18712a8ab178d7b32f84b6694733b69d
size: 155
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
artifact_uri: file:///C:\Users\Lenovo\Desktop\SW_engineering\MalURLs\mlruns/463846434585356198/33e684f6d4c54ba092055110e171c45a/artifacts
end_time: 1704457577656
entry_point_name: ''
experiment_id: '463846434585356198'
lifecycle_stage: active
run_id: 33e684f6d4c54ba092055110e171c45a
run_name: receptive-sow-39
run_uuid: 33e684f6d4c54ba092055110e171c45a
source_name: ''
source_type: 4
source_version: ''
start_time: 1704457566868
status: 3
tags: []
user_id: baf89bde9935867d5202c57720161daa936b62ed
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1704457567262 0.8929922686914129 0
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1704457567262 0.8931136098332417 0
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1704457567262 0.893 0
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1704457567262 0.893 0
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
receptive-sow-39
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
76a5ecd23ad0d66fc7d1e1fe4b864f30ed39cadf
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
src/models/predict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
LOCAL
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
baf89bde9935867d5202c57720161daa936b62ed
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
random_forest_base_model
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
artifact_uri: file:///C:\Users\Lenovo\Desktop\SW_engineering\MalURLs\mlruns/463846434585356198/58d8ef119ef3423f811f1379850a7633/artifacts
end_time: 1704457577650
entry_point_name: ''
experiment_id: '463846434585356198'
lifecycle_stage: active
run_id: 58d8ef119ef3423f811f1379850a7633
run_name: auspicious-gnu-79
run_uuid: 58d8ef119ef3423f811f1379850a7633
source_name: ''
source_type: 4
source_version: ''
start_time: 1704457567398
status: 3
tags: []
user_id: baf89bde9935867d5202c57720161daa936b62ed
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1704457577639 0.8962411221310123 0
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1704457577639 0.8963856629931594 0
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1704457577639 0.89625 0
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1704457577639 0.89625 0
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
33e684f6d4c54ba092055110e171c45a
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
auspicious-gnu-79
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
76a5ecd23ad0d66fc7d1e1fe4b864f30ed39cadf
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
src/models/predict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
LOCAL
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
baf89bde9935867d5202c57720161daa936b62ed
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
random_forest_tuned_model
2 changes: 1 addition & 1 deletion mlruns/models/base_rf_model/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
aliases: {}
creation_timestamp: 1702748644144
description: null
last_updated_timestamp: 1704390891144
last_updated_timestamp: 1704457567335
name: base_rf_model
14 changes: 14 additions & 0 deletions mlruns/models/base_rf_model/version-8/meta.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
aliases: []
creation_timestamp: 1704457567335
current_stage: None
description: null
last_updated_timestamp: 1704457567335
name: base_rf_model
run_id: 33e684f6d4c54ba092055110e171c45a
run_link: null
source: file:///C:\Users\Lenovo\Desktop\SW_engineering\MalURLs\mlruns/463846434585356198/33e684f6d4c54ba092055110e171c45a/artifacts/models
status: READY
status_message: null
storage_location: file:///C:\Users\Lenovo\Desktop\SW_engineering\MalURLs\mlruns/463846434585356198/33e684f6d4c54ba092055110e171c45a/artifacts/models
user_id: null
version: 8
2 changes: 1 addition & 1 deletion mlruns/models/tuned_rf_model/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
aliases: {}
creation_timestamp: 1702748651445
description: null
last_updated_timestamp: 1704390896921
last_updated_timestamp: 1704457577593
name: tuned_rf_model
14 changes: 14 additions & 0 deletions mlruns/models/tuned_rf_model/version-8/meta.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
aliases: []
creation_timestamp: 1704457577593
current_stage: None
description: null
last_updated_timestamp: 1704457577593
name: tuned_rf_model
run_id: 58d8ef119ef3423f811f1379850a7633
run_link: null
source: file:///C:\Users\Lenovo\Desktop\SW_engineering\MalURLs\mlruns/463846434585356198/58d8ef119ef3423f811f1379850a7633/artifacts/models
status: READY
status_message: null
storage_location: file:///C:\Users\Lenovo\Desktop\SW_engineering\MalURLs\mlruns/463846434585356198/58d8ef119ef3423f811f1379850a7633/artifacts/models
user_id: null
version: 8
4 changes: 2 additions & 2 deletions models/base_rf_model.pkl.dvc
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
outs:
- md5: d141c79da310be2e3ded5ab8236d42d6
size: 30782445
- md5: b9d39c597fcea9393ec7777fd017daf9
size: 34199068
hash: md5
path: base_rf_model.pkl
4 changes: 2 additions & 2 deletions models/tuned_rf_model.pkl.dvc
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
outs:
- md5: c6fcb006c533c3dcda73a66ee7c310f3
size: 703178515
- md5: 6d240e274219304618af4a238eb03b39
size: 781507762
hash: md5
path: tuned_rf_model.pkl
26 changes: 13 additions & 13 deletions reports/classification_report_tuned_rf.json
Original file line number Diff line number Diff line change
@@ -1,27 +1,27 @@
{
"safe_URL": {
"precision": 0.8896551724137931,
"recall": 0.903,
"f1-score": 0.8962779156327544,
"precision": 0.8890525282277859,
"recall": 0.9055,
"f1-score": 0.8972008917513005,
"support": 2000.0
},
"unsafe_URL": {
"precision": 0.9015228426395939,
"recall": 0.888,
"f1-score": 0.8947103274559194,
"precision": 0.9037187977585328,
"recall": 0.887,
"f1-score": 0.8952813525107242,
"support": 2000.0
},
"accuracy": 0.8955,
"accuracy": 0.89625,
"macro avg": {
"precision": 0.8955890075266935,
"recall": 0.8955,
"f1-score": 0.8954941215443368,
"precision": 0.8963856629931594,
"recall": 0.89625,
"f1-score": 0.8962411221310123,
"support": 4000.0
},
"weighted avg": {
"precision": 0.8955890075266936,
"recall": 0.8955,
"f1-score": 0.8954941215443368,
"precision": 0.8963856629931594,
"recall": 0.89625,
"f1-score": 0.8962411221310123,
"support": 4000.0
}
}
8 changes: 4 additions & 4 deletions reports/confusion_matrix_tuned_rf.json
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
{
"confusion_matrix": [
[
1806,
194
1811,
189
],
[
224,
1776
226,
1774
]
]
}
6 changes: 3 additions & 3 deletions src/data/make_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@

PROJECT_PATH = str(Path(Path(__file__).resolve().parents[2]))
DATA_PATH = PROJECT_PATH + "\\data"
data_file = PROJECT_PATH + "\\data\\urls_with_features.csv"
data_output_file = DATA_PATH + "\\urls_with_features_selected.csv"


# The function returns the processed feature dataset X.
def make_dataset():

data_file = PROJECT_PATH + "\\data\\urls_with_features.csv"
data_output_file = DATA_PATH + "\\urls_with_features_selected.csv"

if os.name == 'posix':
data_file = PROJECT_PATH + "/data/urls_with_features.csv"
data_output_file = PROJECT_PATH + "/data/urls_with_features_selected.csv"
Expand Down
4 changes: 3 additions & 1 deletion src/features/build_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

PROJECT_PATH = str(Path(Path(__file__).resolve().parents[2]))
DATA_PATH = PROJECT_PATH + "\\data"
urls_file = PROJECT_PATH + "\\data\\urls_with_features_selected.csv"



'''
Expand All @@ -20,6 +20,8 @@


def split():
urls_file = PROJECT_PATH + "\\data\\urls_with_features_selected.csv"

if os.name == 'posix':
urls_file = PROJECT_PATH + "/data/urls_with_features_selected.csv"

Expand Down

0 comments on commit f1cf8a2

Please sign in to comment.