From 663491b91bc7d0888ba27e76ebd0466b1bf3615d Mon Sep 17 00:00:00 2001 From: Tom Hepworth Date: Mon, 22 Jan 2024 17:10:25 +0000 Subject: [PATCH 1/3] convert accuracy metrics to float --- splink/accuracy.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/splink/accuracy.py b/splink/accuracy.py index d8a92a0491..500b552429 100644 --- a/splink/accuracy.py +++ b/splink/accuracy.py @@ -132,10 +132,10 @@ def truth_space_table_from_labels_with_predictions_sqls( cast(TN as float)/N as specificity, case when TN+FN=0 then 1 else cast(TN as float)/(TN+FN) end as npv, cast(TP+TN as float)/(P+N) as accuracy, - 2.0*TP/(2*TP + FN + FP) as f1, - 5.0*TP/(5*TP + 4*FN + FP) as f2, - 1.25*TP/(1.25*TP + 0.25*FN + FP) as f0_5, - 4.0*TP*TN/((4.0*TP*TN) + ((TP + TN)*(FP + FN))) as p4, + cast(2.0*TP/(2*TP + FN + FP) as float) as f1, + cast(5.0*TP/(5*TP + 4*FN + FP) as float) as f2, + cast(1.25*TP/(1.25*TP + 0.25*FN + FP) as float) as f0_5, + cast(4.0*TP*TN/((4.0*TP*TN) + ((TP + TN)*(FP + FN))) as float) as p4, case when TN+FN=0 or TP+FP=0 or P=0 or N=0 then 0 else cast((TP*TN)-(FP*FN) as float)/sqrt((TP+FP)*P*N*(TN+FN)) end as phi From b77cf6b01d30c4165a58a3446ef877c0c2667c58 Mon Sep 17 00:00:00 2001 From: Tom Hepworth Date: Mon, 5 Feb 2024 12:54:02 +0000 Subject: [PATCH 2/3] convert float casts to float8 casts --- splink/accuracy.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/splink/accuracy.py b/splink/accuracy.py index 500b552429..2b886ca82f 100644 --- a/splink/accuracy.py +++ b/splink/accuracy.py @@ -123,21 +123,21 @@ def truth_space_table_from_labels_with_predictions_sqls( FN as fn, P/row_count as P_rate, cast(N as float)/row_count as N_rate, - cast(TP as float)/P as tp_rate, - cast(TN as float)/N as tn_rate, - cast(FP as float)/N as fp_rate, - cast(FN as float)/P as fn_rate, - case when TP+FP=0 then 1 else cast(TP as float)/(TP+FP) end as precision, - cast(TP as float)/P as recall, - cast(TN as float)/N as specificity, - case when TN+FN=0 then 1 else cast(TN as float)/(TN+FN) end as npv, - cast(TP+TN as float)/(P+N) as accuracy, - cast(2.0*TP/(2*TP + FN + FP) as float) as f1, - cast(5.0*TP/(5*TP + 4*FN + FP) as float) as f2, - cast(1.25*TP/(1.25*TP + 0.25*FN + FP) as float) as f0_5, - cast(4.0*TP*TN/((4.0*TP*TN) + ((TP + TN)*(FP + FN))) as float) as p4, + cast(TP as float8)/P as tp_rate, + cast(TN as float8)/N as tn_rate, + cast(FP as float8)/N as fp_rate, + cast(FN as float8)/P as fn_rate, + case when TP+FP=0 then 1 else cast(TP as float8)/(TP+FP) end as precision, + cast(TP as float8)/P as recall, + cast(TN as float8)/N as specificity, + case when TN+FN=0 then 1 else cast(TN as float8)/(TN+FN) end as npv, + cast(TP+TN as float8)/(P+N) as accuracy, + cast(2.0*TP/(2*TP + FN + FP) as float8) as f1, + cast(5.0*TP/(5*TP + 4*FN + FP) as float8) as f2, + cast(1.25*TP/(1.25*TP + 0.25*FN + FP) as float8) as f0_5, + cast(4.0*TP*TN/((4.0*TP*TN) + ((TP + TN)*(FP + FN))) as float8) as p4, case when TN+FN=0 or TP+FP=0 or P=0 or N=0 then 0 - else cast((TP*TN)-(FP*FN) as float)/sqrt((TP+FP)*P*N*(TN+FN)) end as phi + else cast((TP*TN)-(FP*FN) as float8)/sqrt((TP+FP)*P*N*(TN+FN)) end as phi from __splink__labels_with_pos_neg_grouped_with_truth_stats """ From 2192ba6d6b72c2ecc08565ddbd47fdca26dacd53 Mon Sep 17 00:00:00 2001 From: Tom Hepworth Date: Mon, 5 Feb 2024 14:12:48 +0000 Subject: [PATCH 3/3] Add additional casts to confusion matrix values --- splink/accuracy.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/splink/accuracy.py b/splink/accuracy.py index 2b886ca82f..cfb8aa59e1 100644 --- a/splink/accuracy.py +++ b/splink/accuracy.py @@ -115,13 +115,13 @@ def truth_space_table_from_labels_with_predictions_sqls( power(2, truth_threshold) / (1 + power(2, truth_threshold)) as match_probability, row_count, - P as p, - N as n, - TP as tp, - TN as tn, - FP as fp, - FN as fn, - P/row_count as P_rate, + cast(P as float8) as p, + cast(N as float8) as n, + cast(TP as float8) as tp, + cast(TN as float8) as tn, + cast(FP as float8) as fp, + cast(FN as float8) as fn, + cast(P/row_count as float8) as P_rate, cast(N as float)/row_count as N_rate, cast(TP as float8)/P as tp_rate, cast(TN as float8)/N as tn_rate,